import pandas as pd import json import jsonlines from io import BytesIO, StringIO def alpaca_to_chatgpt(jsf): res = [] SYSTEM_MESSAGE = "Please respond professionally and in a friendly manner, using the same language as the original request." js = None if isinstance(jsf, str): js = json.loads(jsf) else: js = json.load(jsf) print(js) for j in js: r = { "messages": [ {"role": "system", "content": SYSTEM_MESSAGE}, {"role": "user", "content": j['instruction'].lower()}, {"role": "assistant", "content": j['output']} ] } res.append(r) fp = BytesIO() writer = jsonlines.Writer(fp) for j in res: writer.write(j) return fp def csv_to_jsonl(csvf): df = pd.read_csv(BytesIO(csvf)) df.fillna("", inplace=True) json_string = df.to_json(orient="records") return alpaca_to_chatgpt(json_string)