1234567891011121314151617181920212223242526272829303132333435 |
- import pandas as pd
- import json
- import jsonlines
- from io import BytesIO, StringIO
- def alpaca_to_chatgpt(jsf):
- res = []
- SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request."
- js = None
- if isinstance(jsf, str):
- js = json.loads(jsf)
- else:
- js = json.load(jsf)
- print(js)
- for j in js:
- r = {
- "messages":
- [
- {"role": "system", "content": SYSTEM_MESSAGE},
- {"role": "user", "content": j['instruction'].lower()},
- {"role": "assistant", "content": j['output']}
- ]
- }
- res.append(r)
- fp = BytesIO()
- writer = jsonlines.Writer(fp)
- for j in res:
- writer.write(j)
- return fp
- def csv_to_jsonl(csvf):
- df = pd.read_csv(BytesIO(csvf))
- df.fillna("", inplace=True)
- json_string = df.to_json(orient="records")
- return alpaca_to_chatgpt(json_string)
|