Skip to content

Commit a8f1ae5

Browse files
committed
add CodeQwen1.5-7B
1 parent 7322255 commit a8f1ae5

4 files changed

+37
-30
lines changed

results/evalperf/COMBINED-RESULTS.json

+35-29
Original file line numberDiff line numberDiff line change
@@ -3,144 +3,150 @@
33
"dps": 79.85512300275154,
44
"dps_norm": 76.4398130083651,
55
"pass@1": 77.63559322033899,
6-
"win_rate": 0.49004524886877826
6+
"win_rate": 0.4936570428696413
7+
},
8+
"Qwen--CodeQwen1.5-7B-Chat": {
9+
"dps": 80.70604298474056,
10+
"dps_norm": 77.70445772388321,
11+
"pass@1": 74.11016949152543,
12+
"win_rate": 0.3984565393988627
713
},
814
"mistralai--Codestral-22B-v0.1": {
915
"dps": 82.68924330620962,
1016
"dps_norm": 79.07819827252048,
1117
"pass@1": 72.83898305084746,
12-
"win_rate": 0.48872506504770163
18+
"win_rate": 0.4920368818105616
1319
},
1420
"Qwen--Qwen2.5-14B-Instruct": {
1521
"dps": 82.48566826452165,
1622
"dps_norm": 77.66575182191508,
1723
"pass@1": 82.48305084745763,
18-
"win_rate": 0.5291380008093889
24+
"win_rate": 0.5283979631805719
1925
},
2026
"deepseek-ai--deepseek-coder-33b-instruct": {
2127
"dps": 82.65680460618937,
2228
"dps_norm": 77.67106700323255,
2329
"pass@1": 77.50847457627118,
24-
"win_rate": 0.46629901960784315
30+
"win_rate": 0.46857707509881424
2531
},
2632
"01-ai--Yi-Coder-9B-Chat": {
2733
"dps": 83.60305818801254,
2834
"dps_norm": 78.76801867108128,
2935
"pass@1": 78.80508474576271,
30-
"win_rate": 0.4875316990701606
36+
"win_rate": 0.4916189697465249
3137
},
3238
"google--gemma-2-27b-it": {
3339
"dps": 84.20223252621882,
3440
"dps_norm": 78.67511770954597,
3541
"pass@1": 80.23728813559322,
36-
"win_rate": 0.5551975945017182
37-
},
38-
"Qwen--Qwen2.5-Coder-7B-Instruct": {
39-
"dps": 79.81023172966313,
40-
"dps_norm": 76.74937875060594,
41-
"pass@1": 76.42372881355932,
42-
"win_rate": 0.3791905151267375
42+
"win_rate": 0.5566625155666252
4343
},
4444
"Qwen--Qwen2.5-32B-Instruct": {
4545
"dps": 83.54712283112897,
4646
"dps_norm": 78.3588319852899,
4747
"pass@1": 87.5677966101695,
48-
"win_rate": 0.5474555735056543
48+
"win_rate": 0.5515826494724502
4949
},
5050
"mistralai--Mistral-Nemo-Instruct-2407": {
5151
"dps": 81.98436677712466,
5252
"dps_norm": 77.61969110961331,
5353
"pass@1": 57.389830508474574,
54-
"win_rate": 0.4432494279176201
54+
"win_rate": 0.44836716681376876
5555
},
5656
"Qwen--Qwen2.5-72B-Instruct": {
5757
"dps": 84.69258296490358,
5858
"dps_norm": 79.00610315513151,
5959
"pass@1": 88.27966101694915,
60-
"win_rate": 0.5433430111986728
60+
"win_rate": 0.5473515248796148
6161
},
6262
"gpt-4o-mini-2024-07-18": {
6363
"dps": 84.19186096830988,
6464
"dps_norm": 79.21827803090933,
6565
"pass@1": 85.51694915254237,
66-
"win_rate": 0.5626810095159288
66+
"win_rate": 0.5631009615384616
6767
},
6868
"Qwen--Qwen2.5-7B-Instruct": {
6969
"dps": 84.68492179229716,
7070
"dps_norm": 79.33664874489173,
7171
"pass@1": 80.02542372881356,
72-
"win_rate": 0.5323045267489712
72+
"win_rate": 0.5342220453641067
7373
},
7474
"gemini-1.5-pro-002": {
7575
"dps": 76.26393608564656,
7676
"dps_norm": 75.16850711244093,
7777
"pass@1": 83.71186440677967,
78-
"win_rate": 0.44697294976384716
78+
"win_rate": 0.4499168744804655
7979
},
8080
"deepseek-ai--deepseek-coder-6.7b-instruct": {
8181
"dps": 83.55516548026816,
8282
"dps_norm": 78.20569308671841,
8383
"pass@1": 73.57627118644068,
84-
"win_rate": 0.47451820128479655
84+
"win_rate": 0.4788907284768212
8585
},
8686
"nvidia--Llama-3.1-Nemotron-70B-Instruct-HF": {
8787
"dps": 78.40270067725943,
8888
"dps_norm": 76.22281062678165,
8989
"pass@1": 61.83050847457627,
90-
"win_rate": 0.4121043627031651
90+
"win_rate": 0.4110099337748344
9191
},
9292
"ise-uiuc--Magicoder-S-DS-6.7B": {
9393
"dps": 83.62957240263601,
9494
"dps_norm": 78.58003556526222,
9595
"pass@1": 69.83898305084746,
96-
"win_rate": 0.45699152542372884
96+
"win_rate": 0.45927138763814984
9797
},
9898
"google--gemma-2-9b-it": {
9999
"dps": 82.35863751376931,
100100
"dps_norm": 78.14625766928611,
101101
"pass@1": 68.07627118644068,
102-
"win_rate": 0.5287128712871287
102+
"win_rate": 0.5286123032904149
103103
},
104104
"mistralai--Mistral-Large-Instruct-2407": {
105105
"dps": 85.58694758404829,
106106
"dps_norm": 80.65167529745199,
107107
"pass@1": 82.4322033898305,
108-
"win_rate": 0.578808752025932
108+
"win_rate": 0.5823852491173009
109109
},
110110
"deepseek-ai--DeepSeek-Coder-V2-Lite-Instruct": {
111111
"dps": 82.81873138697289,
112112
"dps_norm": 78.09379634032757,
113113
"pass@1": 79.04237288135593,
114-
"win_rate": 0.5301230377598642
114+
"win_rate": 0.5299548625359048
115+
},
116+
"meta-llama--Meta-Llama-3-8B-Instruct": {
117+
"dps": 77.04079291937829,
118+
"dps_norm": 75.1570617360002,
119+
"pass@1": 43.69491525423729,
120+
"win_rate": 0.4024024024024024
115121
},
116122
"deepseek-chat": {
117123
"dps": 86.84652683144942,
118124
"dps_norm": 79.08774817854689,
119125
"pass@1": 88.51694915254237,
120-
"win_rate": 0.5940152801358234
126+
"win_rate": 0.5948665297741274
121127
},
122128
"gpt-4o-2024-08-06": {
123129
"dps": 87.8465793074948,
124130
"dps_norm": 82.23308124778713,
125131
"pass@1": 86.65254237288136,
126-
"win_rate": 0.6191666666666666
132+
"win_rate": 0.6226795803066989
127133
},
128134
"meta-llama--Llama-3.1-70B-Instruct": {
129135
"dps": 80.40042339048047,
130136
"dps_norm": 77.29818188061222,
131137
"pass@1": 77.26271186440678,
132-
"win_rate": 0.452760736196319
138+
"win_rate": 0.45465346534653467
133139
},
134140
"mistralai--Mistral-Small-Instruct-2409": {
135141
"dps": 82.25625421641564,
136142
"dps_norm": 78.98044554401069,
137143
"pass@1": 69.51694915254237,
138-
"win_rate": 0.47498931167165453
144+
"win_rate": 0.4797436957420422
139145
},
140146
"meta-llama--Llama-3.1-8B-Instruct": {
141147
"dps": 80.93273308299496,
142148
"dps_norm": 76.57037397653662,
143149
"pass@1": 64.34745762711864,
144-
"win_rate": 0.39338071968014215
150+
"win_rate": 0.3991434689507495
145151
}
146152
}

results/evalperf/Qwen--CodeQwen1.5-7B-Chat_vllm_temp_1.0_evalperf_results.brief.json

+1
Large diffs are not rendered by default.

results/evalperf/Qwen--Qwen2.5-Coder-7B-Instruct_vllm_temp_1.0_evalperf_results.brief.json

-1
This file was deleted.

results/evalperf/meta-llama--Meta-Llama-3-8B-Instruct_vllm_temp_1.0_evalperf_results.brief.json

+1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)