Skip to content

Commit 1a39336

Browse files
committed
rm dev_lst
1 parent 69a5cf4 commit 1a39336

File tree

1 file changed

+11
-36
lines changed

1 file changed

+11
-36
lines changed

eval/scripts/test_generated_code.py

+11-36
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
from scicode.parse.parse import read_from_jsonl
99

1010

11-
prob_num = 80
12-
step_num = 338
11+
prob_num = 65
12+
step_num = 288
1313

1414
logs_dir = 'eval/logs'
15-
dev_lst = [1, 3, 4, 6, 7, 10, 19, 29, 38, 44, 47, 49, 51, 70, 78]
16-
step_num_dev = 50
15+
code_dir = 'eval_results / generated_code'
16+
test_result_dir = 'test_result'
17+
1718

1819
if not os.path.exists(logs_dir):
1920
os.makedirs(logs_dir)
@@ -31,13 +32,13 @@
3132
def test_code(model_name):
3233
start_time = time.time()
3334

34-
code_dir = f'eval_results/generated_code/{model_name}'
35+
code_dir_ = f'{code_dir}/{model_name}'
3536
tmp_dir = f'tmp_{start_time}'
3637

3738
if not os.path.exists(tmp_dir):
3839
os.makedirs(tmp_dir)
3940

40-
for root, _, files in os.walk(code_dir):
41+
for root, _, files in os.walk(code_dir_):
4142
for file in files:
4243
file_name = Path(file).stem
4344
file_id = file_name.split(".")[0]
@@ -120,24 +121,16 @@ def run_script(script_path):
120121
correct_prob_num = sum(1 for i in range(prob_num) if
121122
correct_prob[i] == tot_prob[i]
122123
and tot_prob[i] != 0)
123-
correct_prob_num_dev = sum(1 for i in range(prob_num) if
124-
correct_prob[i] == tot_prob[i] and
125-
tot_prob[i] != 0 and
126-
(i + 1) not in dev_lst)
127124

128-
correct_step_dev = [i for i in correct_step if int(i.split('.')[0]) not in dev_lst]
129-
130-
print(f'correct problems(include dev set): {correct_prob_num}/{prob_num}')
131-
print(f'correct steps(include dev set): {len(correct_step)}/{step_num}')
125+
print(f'correct problems: {correct_prob_num}/{prob_num}')
126+
print(f'correct steps: {len(correct_step)}/{step_num}')
132127

133128
if not os.path.exists(test_result_dir):
134129
os.makedirs(test_result_dir)
135130

136131
with open(f'{test_result_dir}/{model_name}.txt', 'w') as f:
137132
f.write(f'correct problems(include dev set): {correct_prob_num}/{prob_num}\n')
138133
f.write(f'correct steps(include dev set): {len(correct_step)}/{step_num}\n\n')
139-
f.write(f'correct problems(exclude dev set): {correct_prob_num_dev}/{prob_num - len(dev_lst)}\n')
140-
f.write(f'correct steps(exclude dev set): {len(correct_step_dev)}/{step_num - step_num_dev}\n\n')
141134
f.write(f'duration: {test_time} seconds\n')
142135
f.write('\ncorrect problems: ')
143136
f.write(f'\n\n{[i + 1 for i in range(prob_num) if correct_prob[i] == tot_prob[i] and tot_prob[i] != 0]}\n')
@@ -148,25 +141,7 @@ def run_script(script_path):
148141
Path(tmp_dir).rmdir()
149142

150143

151-
def delete_all_files_in_folder(folder_path):
152-
try:
153-
for filename in os.listdir(folder_path):
154-
file_path = os.path.join(folder_path, filename)
155-
if os.path.isfile(file_path):
156-
os.unlink(file_path)
157-
elif os.path.isdir(file_path):
158-
delete_all_files_in_folder(file_path)
159-
os.rmdir(file_path)
160-
print(f"All files in {folder_path} have been deleted.")
161-
except Exception as e:
162-
print(f"Error: {e}")
163144

164145

165-
test_result_dir = 'test_result'
166-
models = ['gpt-4o', 'claude-3-5-sonnet-20240620', 'gpt-4-turbo-2024-04-09',
167-
'claude-3-sonnet-20240229', 'claude-3-opus-20240229', 'gemini-1.5-pro']
168-
llm_code = True
169-
bgs = [0, 1, 3]
170-
for m in models:
171-
for bg in bgs:
172-
test_code(m)
146+
model = 'gpt-4o'
147+
test_code(model)

0 commit comments

Comments
 (0)