Pārlūkot izejas kodu

add training & update openai version

kevin 1 gadu atpakaļ
vecāks
revīzija
4b3763b7e2
3 mainītis faili ar 172 papildinājumiem un 34 dzēšanām
  1. 35 0
      convert.py
  2. 92 33
      main.py
  3. 45 1
      requirements.txt

+ 35 - 0
convert.py

@@ -0,0 +1,35 @@
+import pandas as pd
+import json
+import jsonlines
+from io import BytesIO, StringIO
+def alpaca_to_chatgpt(jsf):
+    res = []
+    SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request."
+    js = None
+    if isinstance(jsf, str):
+        js = json.loads(jsf)
+    else:
+        js = json.load(jsf)
+    print(js)
+    for j in js:
+        r = {
+            "messages":
+                [
+                    {"role": "system", "content": SYSTEM_MESSAGE},
+                    {"role": "user", "content": j['instruction']},
+                    {"role": "assistant", "content": j['output']}
+                ]
+        }
+        res.append(r)
+    fp = BytesIO()
+    writer = jsonlines.Writer(fp)
+    for j in res:
+        writer.write(j)
+    return fp
+
+def csv_to_jsonl(csvf):
+    df = pd.read_csv(BytesIO(csvf))
+    df.fillna("", inplace=True)
+    json_string = df.to_json(orient="records")
+    return alpaca_to_chatgpt(json_string)
+

+ 92 - 33
main.py

@@ -1,17 +1,19 @@
+import io
 import os
+import json
 
-import openai
+from openai import OpenAI
 from flask import Flask, request, jsonify, send_from_directory, url_for
 
-import json
+from convert import alpaca_to_chatgpt, csv_to_jsonl
 
 app = Flask(__name__)
 ssl = None
 # ssl =('/etc/ssl/sample.crt', '/etc/ssl/sample.pem')
 
-openai_key = os.environ.get("OPENAI_KEY", "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")
+app.openai_key = os.environ.get("OPENAI_KEY", "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")
+app.openai_client = OpenAI(api_key=app.openai_key)
 
-openai.api_key = openai_key
 app.chat_messages = [
     {"role": "system",
      "content": "Please respond professionally and in a friendly manner, using the same language as the original request."}
@@ -27,47 +29,51 @@ app.suggest_messages = [
 app.recommend_messages = [
     {"role": "system",
      "content": "Give normalized total weight of each category in json based on headlines"
-    }
+     }
 ]
 app.summary_messages = [
     {"role": "system",
      "content": "Please summarize an article."
-    }
+     }
 ]
 UPLOAD_FOLDER = 'files'
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 
+
 @app.route('/files/<name>')
 def download_file(name):
     return send_from_directory(app.config["UPLOAD_FOLDER"], name)
 
+
 @app.route('/', methods=['GET', 'POST'])
 def test():
     return jsonify({"status": "0"})
 
+
 def recommend(headlines, category):
     chat_messages = app.recommend_messages.copy()
     try:
         json_payload = {
-                        "role": "user",
-                        "content": f"""{headlines}
+            "role": "user",
+            "content": f"""{headlines}
                                     Berikan nilai berat masing-masing kategori, jumlahkan dan normalisasikan:
                                     {category}
                                     Berikan dalam bentuk json
                                     """
-                        }
+        }
         chat_messages.append(json_payload)
         print(chat_messages)
-        json_response = openai.ChatCompletion.create(model="gpt-3.5-turbo-1106",
-                                                     messages=chat_messages,
-                                                     response_format={ "type": "json_object" }
-                                                     )
-        print(json_response.choices[0]["message"]["content"])
-        return json.loads(json_response.choices[0]["message"]["content"])
+        json_response = app.openai_client.chat.completions.create(model="gpt-3.5-turbo-1106",
+                                                                  messages=chat_messages,
+                                                                  response_format={"type": "json_object"}
+                                                                  )
+        print(json_response.choices[0].message.content)
+        return json.loads(json_response.choices[0].message.content)
     except Exception as error_print:
         app.logger.error(error_print)
         result = {}, 405
 
+
 def vision(message, image_url=None, image_b64=None):
     chat_messages = app.chat_messages.copy()
     url = ""
@@ -76,7 +82,7 @@ def vision(message, image_url=None, image_b64=None):
     elif image_b64:
         url = f"data:image/jpeg;base64,{image_b64}"
     try:
-        json_payload =  {
+        json_payload = {
             "role": "user",
             "content": [
                 {"type": "text", "text": message},
@@ -90,12 +96,12 @@ def vision(message, image_url=None, image_b64=None):
         }
         chat_messages.append(json_payload)
         print(chat_messages)
-        json_response = openai.ChatCompletion.create(
+        json_response = app.openai_client.chat.completions.create(
             model="gpt-4-vision-preview",
             messages=chat_messages,
             max_tokens=500
         )
-        return json_response.choices[0]["message"]
+        return {"role": "assistant", "content": json_response.choices[0].message.content}
     except Exception as error_print:
         app.logger.error(error_print)
         result = {}, 405
@@ -104,6 +110,7 @@ def vision(message, image_url=None, image_b64=None):
 @app.route('/gpt', methods=['POST'])
 def gpt():
     chat_messages = app.chat_messages.copy()
+    chat_model = "gpt-3.5-turbo"
     use_video = False
     suggest = False
     summarize = False
@@ -124,16 +131,20 @@ def gpt():
                 num_choices = 5 if json_payload['num_choices'] > 5 else json_payload['num_choices']
             if 'use_video' in json_payload:
                 use_video = json_payload['use_video'] == "1"
+            if 'chat_model' in json_payload:
+                chat_model = json_payload['chat_model']
             if 'translate' in json_payload:
                 chat_messages = app.translate_messages.copy()
-                json_payload['payload'][-1]['content'] = json_payload['payload'][-1]['content'] + f" (Translate to {json_payload['translate']})"
+                json_payload['payload'][-1]['content'] = json_payload['payload'][-1][
+                                                             'content'] + f" (Translate to {json_payload['translate']})"
             elif 'suggest' in json_payload:
                 suggest = json_payload['suggest'] == "1"
                 if suggest:
                     chat_messages = app.suggest_messages.copy()
                 else:
                     chat_messages = app.chat_messages.copy()
-                json_payload['payload'][-1]['content'] = json_payload['payload'][-1]['content'] + f" What can I say to him/her?"
+                json_payload['payload'][-1]['content'] = json_payload['payload'][-1][
+                                                             'content'] + f" What can I say to him/her?"
             elif 'summarize' in json_payload:
                 summarize = json_payload['summarize'] == "1"
                 if summarize:
@@ -142,7 +153,8 @@ def gpt():
                     max_resp_token = 4096
                 else:
                     chat_messages = app.chat_messages.copy()
-                json_payload['payload'][-1]['content'] = f"Please summarize this article:\n" + json_payload['payload'][-1]['content']
+                json_payload['payload'][-1]['content'] = f"Please summarize this article:\n" + \
+                                                         json_payload['payload'][-1]['content']
             else:
                 chat_messages = app.chat_messages.copy()
             json_payload = json_payload['payload']
@@ -158,9 +170,9 @@ def gpt():
                 "role": "user",
                 "content": islamic_message
             })
-            islamic_response = openai.ChatCompletion.create(model="gpt-3.5-turbo",  # GPT-3.5 Turbo engine
-                                                     messages=islam_messages,
-                                                     max_tokens=2, temperature=0.5)
+            islamic_response = app.openai_client.chat.completions.create(model="gpt-3.5-turbo",  # GPT-3.5 Turbo engine
+                                                                         messages=islam_messages,
+                                                                         max_tokens=2, temperature=0.5)
             if 'Ya' in islamic_response.choices[0].message['content']:
                 greeting_message = f"Buatkan respons chatbot berupa greeting dari chat perusahaan bernama {company_name} pada jam {timestamp}, tidak perlu mention waktu, dan jawab dengan 'Assalamu'alaikum...' terlebih dahulu"
             else:
@@ -178,11 +190,11 @@ def gpt():
         elif 'image_url' in json_payload:
             image = json_payload['image_url']
             message = json_payload["message"] if 'message' in json_payload else "Ini gambar apa?"
-            return vision(message,image_url=image)
+            return vision(message, image_url=image)
         elif 'image_b64' in json_payload:
             image = json_payload['image_b64']
             message = json_payload["message"] if 'message' in json_payload else "Ini gambar apa?"
-            return vision(message,image_b64=image_url)
+            return vision(message, image_b64=image_url)
         else:
             chat_messages = app.chat_messages.copy()
             json_payload = [json_payload]
@@ -201,28 +213,75 @@ def gpt():
     result = {}
     try:
         n = num_choices
-        json_response = openai.ChatCompletion.create(model="gpt-3.5-turbo",  # GPT-3.5 Turbo engine
-                                                     messages=chat_messages,
-                                                     max_tokens=max_resp_token, temperature=0.7, n = n)
+        json_response = app.openai_client.chat.completions.create(model=chat_model,  # GPT-3.5 Turbo engine
+                                                                  messages=chat_messages,
+                                                                  max_tokens=max_resp_token, temperature=0.7, n=n)
         app.logger.info(json_response.choices[0].message)
         if has_named_params:
             if suggest:
                 choices = json_response.choices
                 messages = [i.message for i in choices]
-                result = {"url": "", "message": messages}
+                json_formatted = []
+                for message in messages:
+                    json_formatted.append({"role": "assistant", "content": message.content})
+                result = {"url": "", "message": json_formatted}
             elif use_video:
                 # TODO: to be implemented
-                result = {"url":  url_for('download_file', name="test.mp4", _external=True), "message": json_response.choices[0].message}
+                result = {"url": url_for('download_file', name="test.mp4", _external=True),
+                          "message": {"role": "assistant", "content": json_response.choices[0].message.content}}
             else:
-                result = {"url": "", "message": json_response.choices[0].message}
+                result = {"url": "", "message": {"role": "assistant", "content": json_response.choices[0].message.content}}
         else:
-            result = json_response.choices[0].message
+            result = {"role": "assistant", "content": json_response.choices[0].message.content}
     except Exception as error_print:
         app.logger.error(error_print)
         result = {}, 405
     return result
 
 
+@app.route('/train', methods=['POST'])
+def train():
+    if 'job_id' in request.form:
+        return train_with_id(job_id=request.form['job_id'])
+    elif 'train_file' in request.files:
+        train_file = request.files['train_file']
+        openai_file = None
+        if train_file.filename.split('.')[1] == 'jsonl':
+            openai_file = train_file.stream.read()
+        elif train_file.filename.split('.')[1] == 'csv':
+            openai_file = csv_to_jsonl(train_file.stream.read())
+        elif train_file.filename.split('.')[1] == 'json':
+            openai_file = alpaca_to_chatgpt(train_file)
+        if 'mock' not in request.form:
+            f = app.openai_client.files.create(
+                file=openai_file,
+                purpose="fine-tune"
+            )
+            job = app.openai_client.fine_tuning.jobs.create(
+                training_file=f.id,
+                model="gpt-3.5-turbo",
+                hyperparameters={
+                    "n_epochs": 5
+                }
+            )
+            return {"status": job.status, "job_id": job.id}
+        else:
+            return {"status": "ok"}
+    else:
+        return {"status": "error", "message": "Training file not found"}
+
+def train_with_id(job_id):
+    try:
+        job = app.openai_client.fine_tuning.jobs.retrieve(job_id)
+        if job.fine_tuned_model is None:
+            return {"status": job.status}
+        else:
+            return {"status": job.status, "model_name": job.fine_tuned_model}
+    except Exception as error_print:
+        print(error_print)
+        return {"status": "Could not find job from id"}
+
+
 # Press the green button in the gutter to run the script.
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=8348, debug=True, ssl_context=ssl)

+ 45 - 1
requirements.txt

@@ -1,4 +1,48 @@
+aiohttp==3.8.4
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.3.0
+async-timeout==4.0.2
+attrs==23.1.0
+blinker==1.6.2
+certifi==2023.5.7
+chardet==3.0.4
+charset-normalizer==3.1.0
+click==8.1.3
+distro==1.9.0
+exceptiongroup==1.2.0
 Flask==2.3.2
+frozenlist==1.3.3
+googletrans==3.0.0
 gunicorn==20.1.0
+h11==0.14.0
+h2==3.2.0
+hpack==3.0.0
+hstspreload==2023.1.1
+httpcore==1.0.5
+httpx==0.27.0
+hyperframe==5.2.0
+idna==2.10
+itsdangerous==2.1.2
+Jinja2==3.1.2
+jsonlines==4.0.0
+MarkupSafe==2.1.3
 mod-wsgi==4.9.4
-openai==0.27.8
+multidict==6.0.4
+numpy==1.26.4
+openai==1.16.0
+pandas==2.2.1
+pydantic==2.6.4
+pydantic_core==2.16.3
+python-dateutil==2.9.0.post0
+pytz==2024.1
+requests==2.31.0
+rfc3986==1.5.0
+six==1.16.0
+sniffio==1.3.0
+tqdm==4.65.0
+typing_extensions==4.10.0
+tzdata==2024.1
+urllib3==2.0.3
+Werkzeug==2.3.5
+yarl==1.9.2