12345678910111213141516171819202122232425262728293031323334353637383940 |
- import pandas as pd
- import json
- import jsonlines
- from io import BytesIO, StringIO
- def alpaca_to_chatgpt(jsf, sys_m=None):
- res = []
- if sys_m is not None:
- SYSTEM_MESSAGE = sys_m
- else:
- SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request."
- js = None
- if isinstance(jsf, str):
- js = json.loads(jsf)
- else:
- js = json.load(jsf)
- print(js)
- for j in js:
- r = {
- "messages":
- [
- {"role": "system", "content": SYSTEM_MESSAGE},
- {"role": "user", "content": j['instruction'].lower()},
- {"role": "assistant", "content": j['output']}
- ]
- }
- res.append(r)
- fp = BytesIO()
- writer = jsonlines.Writer(fp)
- for j in res:
- writer.write(j)
- return fp
- def csv_to_jsonl(csvf, sys_m=None):
- df = pd.read_csv(BytesIO(csvf))
- df.fillna("", inplace=True)
- json_string = df.to_json(orient="records")
- return alpaca_to_chatgpt(json_string, sys_m)
|