convert.py 998 B

1234567891011121314151617181920212223242526272829303132333435
  1. import pandas as pd
  2. import json
  3. import jsonlines
  4. from io import BytesIO, StringIO
  5. def alpaca_to_chatgpt(jsf):
  6. res = []
  7. SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request."
  8. js = None
  9. if isinstance(jsf, str):
  10. js = json.loads(jsf)
  11. else:
  12. js = json.load(jsf)
  13. print(js)
  14. for j in js:
  15. r = {
  16. "messages":
  17. [
  18. {"role": "system", "content": SYSTEM_MESSAGE},
  19. {"role": "user", "content": j['instruction'].lower()},
  20. {"role": "assistant", "content": j['output']}
  21. ]
  22. }
  23. res.append(r)
  24. fp = BytesIO()
  25. writer = jsonlines.Writer(fp)
  26. for j in res:
  27. writer.write(j)
  28. return fp
  29. def csv_to_jsonl(csvf):
  30. df = pd.read_csv(BytesIO(csvf))
  31. df.fillna("", inplace=True)
  32. json_string = df.to_json(orient="records")
  33. return alpaca_to_chatgpt(json_string)