convert.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import pandas as pd
  2. import json
  3. import jsonlines
  4. from io import BytesIO, StringIO
  5. def alpaca_to_chatgpt(jsf, sys_m=None):
  6. res = []
  7. if sys_m is not None:
  8. SYSTEM_MESSAGE = sys_m
  9. else:
  10. SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request."
  11. js = None
  12. if isinstance(jsf, str):
  13. js = json.loads(jsf)
  14. else:
  15. js = json.load(jsf)
  16. print(js)
  17. for j in js:
  18. r = {
  19. "messages":
  20. [
  21. {"role": "system", "content": SYSTEM_MESSAGE},
  22. {"role": "user", "content": j['instruction'].lower()},
  23. {"role": "assistant", "content": j['output']}
  24. ]
  25. }
  26. res.append(r)
  27. fp = BytesIO()
  28. writer = jsonlines.Writer(fp)
  29. for j in res:
  30. writer.write(j)
  31. return fp
  32. def csv_to_jsonl(csvf, sys_m=None):
  33. df = pd.read_csv(BytesIO(csvf))
  34. df.fillna("", inplace=True)
  35. json_string = df.to_json(orient="records")
  36. return alpaca_to_chatgpt(json_string, sys_m)