-
Notifications
You must be signed in to change notification settings - Fork 2
/
synthetise_dialogue_react.py
120 lines (83 loc) · 3.59 KB
/
synthetise_dialogue_react.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# loading the messages from fewshot_mpc/mpc_messages.jsonl, use them to generate dialogue-react examples
# and then saving the generated conversations in a jsonl file
# import llm library
from llm_engines import ChatgptLLM
from jinja2 import Template
import json, random, re
import numpy as np
# test the llm model
def generate_examples(llm, template, pre_examples, messages):
# messages should be a list, each element is a message
context = {
"messages": messages,
"pre_examples": pre_examples
}
template = template.render(context)
response = llm.generate_response(template)
return response
def main():
# load the llm model
llm = ChatgptLLM()
llm.model= "gpt-4-turbo-preview"
# load the jinja template
with open('fewshots_mpc/mpc_react.j2', 'r') as f:
template = Template(f.read())
# load the messages
messages_path= "fewshots_mpc/mpc_messages.jsonl"
# load jsonl
with open(messages_path, "r") as f:
messages= [json.loads(line) for line in f]
print(f"Total messages: {len(messages)}")
## loading pre_examples
## pre_examples are contained in the pre_examples.txt file, they are delimited by a line with "### pre_example n"
pre_examples_path= "fewshots_mpc/pre_examples.txt"
with open(pre_examples_path, "r") as f:
pre_examples= f.readlines()
pre_examples= [pre_example.strip() for pre_example in pre_examples]
# make it one string
pre_examples= "\n".join(pre_examples)
## split the pre_examples by "### pre_example n"
dividing_pattern= re.compile(r"### pre_examples \d+")
pre_examples= dividing_pattern.split(pre_examples)
pre_examples= [pre_example.strip() for pre_example in pre_examples]
# drop pre_examples that are empty
pre_examples= [pre_example for pre_example in pre_examples if pre_example]
print(f"Total pre_examples: {len(pre_examples)}")
file= open("fewshots_mpc/generated_dialogue_reacts.jsonl", "a")
n_iterations= 100
print(f"Generating {n_iterations} times")
# update i to be the latest id in the file if it exists
try:
with open("fewshots_mpc/generated_dialogue_reacts.jsonl", "r") as f:
lines= f.readlines()
last_line= lines[-1]
last_id= json.loads(last_line)["id"]
starting_file_index= last_id+1
except:
starting_file_index= 0
# sample a uni
for i in range(n_iterations):
i= i+starting_file_index
n_messages= int(random.normalvariate(15, 2))
if n_messages < 7:
n_messages= 7
if n_messages > 30:
n_messages= 30
print("Generation number: ", i)
print(f"Generating a conversation with {n_messages} messages")
starting_index= random.randint(0, len(messages)-n_messages)
messages_for_gen= messages[starting_index:starting_index+n_messages]
print(f"Starting index: {starting_index}")
pre_examples_for_gen= random.sample(pre_examples, 1)
response= generate_examples(llm, template, pre_examples_for_gen, messages_for_gen)
print(f"Generated response of length: {len(response)}")
conv_dict= {
"id": i,
"pre_examples": pre_examples_for_gen,
"messages": messages_for_gen,
"generated_response": response
}
dump= json.dumps(conv_dict)
file.write(dump+"\n")
if __name__ == "__main__":
main()