-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprompts_subprocess.py
126 lines (111 loc) · 5.69 KB
/
prompts_subprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import subprocess
import sys
import ast
import json
import argparse
sys.path.append("/home/ayushagrawal/FSTAR/AI4F-star")
from openai_query import chat
import os
# fixed_prompts =
def run_cmd_elaborate(jsonl_file,data_type):
try:
elaborated = subprocess.check_output(["lake","exe","bulkelab","--query_data",jsonl_file,data_type,"-d","0"], stderr=subprocess.STDOUT, universal_newlines=True)
except subprocess.CalledProcessError as e:
elaborated = e.output
with open(jsonl_file.replace(".jsonl",".txt"),"w") as f:
f.write(elaborated)
def run_cmd_prompts(nl_stmt,num):
try:
prompts = subprocess.check_output(["lake","exe","nearest_embeddings_full",nl_stmt,str(num)], stderr=subprocess.STDOUT, universal_newlines=True)
except subprocess.CalledProcessError as e:
prompts = e.output
print("done",nl_stmt)
return prompts
def prompt_create(nl_stmt,prompts):
#print(prompts)
prompts = json.loads(prompts)
prompt = ""
for entry in prompts:
cur_prompt = f"""/-- {entry["docString"].strip()} -/
theorem {entry["theorem"].strip()} :=
"""
prompt = prompt + cur_prompt
add_nl_stmt = f"""/-- {nl_stmt.strip()} -/
theorem"""
prompt = prompt + add_nl_stmt
return prompt
def run_model(model,temperature,max_tokens,stop,num_gen,load_path):
# parser = argparse.ArgumentParser()
# parser.add_argument("--model","-m",type=str,required=True,help="path to model")
# parser.add_argument("--temperature","-t",type=float,default=0.8,help="temperature")
# parser.add_argument("--max_tokens","-max_t",type=int,default=500,help="max_tokens")
# parser.add_argument("--stop","-stop",type=str,default=":=",help="stop token")
# parser.add_argument("--load_path","-l",type=str,required=True,help="path to load json file")
# parser.add_argument("--save_path","-s",type=str,required=True,help="path to save directory")
# parser.add_argument("--num","-n",type=int,required=True,help="number of outputs to generate")
# parser.add_argument("--data_type","-d",type=str,required=True,help="data type")
# args = parser.parse_args()
stmt_json = json.load(open(load_path,"r"))
res = []
for i,entry in enumerate(stmt_json):
stmt = entry["stmt"]
prompt = prompt_create(stmt,entry["prompts"])
entry["prompt_cons"] = prompt
messages = [{"role" : "system", "content" : "You are a coding assistant who translates from natural language to Lean Theorem Prover code following examples. Follow EXACTLY the examples given"},{"role" : "user", "content" : prompt}]
entry["choices"] = chat(model,messages,temperature,num_gen,max_tokens,stop)
res.append(entry)
print("done",i)
with open(load_path.replace(".json","_results.json"),"w") as f:
json.dump(res,f,indent=2,ensure_ascii=False)
def save_prompts_jsons(load_path,save_path,num_prompts,prompt_type):
# parser = argparse.ArgumentParser()
# parser.add_argument("--load_path","-l",type=str,required=True,help="path to log directory")
# parser.add_argument("--save_path","-s",type=str,required=True,help="path to save directory")
# parser.add_argument("--num","-n",type=int,required=True,help="number of prompts to generate")
# args = parser.parse_args()
stmts = open(load_path,"r").readlines()
res = []
for stmt in stmts:
stmt = stmt.strip()
if stmt == "":
continue
prompts = run_cmd_prompts(stmt,num_prompts)
res.append({"stmt":stmt,"prompts":prompts})
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path,"w") as f:
json.dump(res,f,indent=2)
def json_to_jsonl(load_path):
#parser = argparse.ArgumentParser()
#parser.add_argument("--load_path","-l",type=str,required=True,help="path to json file")
# Load the JSON data as a list of dictionaries
#args = parser.parse_args()
with open(load_path, 'r') as f:
data = json.load(f)
# Dump each dictionary to a file with a newline character
with open(load_path.replace("json","jsonl"), 'w') as f:
for item in data:
json.dump({"docString" : item["stmt"], "choices" : item["choices"]}, f, ensure_ascii=False)
f.write('\n')
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--load_path","-l",type=str,required=True,help="path to log directory")
parser.add_argument("--save_path","-s",type=str,required=True,help="path to save directory")
parser.add_argument("--num_prompts","-np",type=int,required=True,help="number of prompts to generate")
parser.add_argument("--model","-m",type=str,required=True,help="path to model")
parser.add_argument("--temperature","-t",type=float,default=0.8,help="temperature")
parser.add_argument("--max_tokens","-max_t",type=int,default=500,help="max_tokens")
parser.add_argument("--stop","-stop",type=str,default=":=",help="stop token")
parser.add_argument("--num_gen","-ng",type=int,required=True,help="number of outputs to generate")
parser.add_argument("--data_type","-d",type=str,required=True,help="data type")
parser.add_argument("--prompt_type","-pt",type=str,required=True,help="prompt type")
args = parser.parse_args()
save_prompts_jsons(args.load_path,args.save_path,args.num_prompts)
# #model,temperature,max_tokens,stop,prompt,num_gen,load_path
run_model(args.model,args.temperature,args.max_tokens,args.stop,args.num_gen,args.save_path)
json_to_jsonl(args.save_path.replace(".json","_results.json"))
#run_cmd_elaborate(args.save_path.replace(".json","_results.jsonl"),args.data_type)
if __name__ == "__main__":
#save_prompts_jsons()
#run_model()
#json_to_jsonl()
main()