forked from ckdu/ai-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenAiDatasetDM.py
More file actions
53 lines (42 loc) · 1.61 KB
/
openAiDatasetDM.py
File metadata and controls
53 lines (42 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import json
promptUserId = "157252176350150656"
outputName = "example_dm"
openAiLines = []
for jsonFileName in os.listdir('input'):
os.chdir('input')
jsonFileName = os.listdir()[0]
f = open(jsonFileName, mode="r", encoding="utf-8")
data = json.load(f)
print("Loaded " + jsonFileName)
prompt = ""
completion = " "
previousId = ""
for i in data["messages"]:
content = i["content"].encode("ascii", "ignore").decode()
userId = i["author"]["id"]
msgType = i["type"]
if msgType == "Default" or msgType == "Reply":
if userId == promptUserId and previousId == "":
prompt = content
previousId = userId
elif userId == promptUserId and userId == previousId:
prompt += "\n" + content
previousId = userId
elif userId != promptUserId and userId == previousId:
completion += "\n" + content
previousId = userId
elif userId == promptUserId and userId != previousId and previousId != "":
prompt += "\n\n###\n\n"
openAiLines.append({"prompt": prompt, "completion": completion})
prompt = content
previousId = userId
elif userId != promptUserId and userId != previousId and previousId != "":
completion = content
previousId = userId
os.chdir("../")
os.chdir("output")
with open(outputName + '.jsonl', 'w') as outputFile:
for entry in openAiLines:
json.dump(entry, outputFile)
outputFile.write('\n')