|
3 | 3 | "dps": 79.85512300275154,
|
4 | 4 | "dps_norm": 76.4398130083651,
|
5 | 5 | "pass@1": 77.63559322033899,
|
6 |
| - "win_rate": 0.49004524886877826 |
| 6 | + "win_rate": 0.4936570428696413 |
| 7 | + }, |
| 8 | + "Qwen--CodeQwen1.5-7B-Chat": { |
| 9 | + "dps": 80.70604298474056, |
| 10 | + "dps_norm": 77.70445772388321, |
| 11 | + "pass@1": 74.11016949152543, |
| 12 | + "win_rate": 0.3984565393988627 |
7 | 13 | },
|
8 | 14 | "mistralai--Codestral-22B-v0.1": {
|
9 | 15 | "dps": 82.68924330620962,
|
10 | 16 | "dps_norm": 79.07819827252048,
|
11 | 17 | "pass@1": 72.83898305084746,
|
12 |
| - "win_rate": 0.48872506504770163 |
| 18 | + "win_rate": 0.4920368818105616 |
13 | 19 | },
|
14 | 20 | "Qwen--Qwen2.5-14B-Instruct": {
|
15 | 21 | "dps": 82.48566826452165,
|
16 | 22 | "dps_norm": 77.66575182191508,
|
17 | 23 | "pass@1": 82.48305084745763,
|
18 |
| - "win_rate": 0.5291380008093889 |
| 24 | + "win_rate": 0.5283979631805719 |
19 | 25 | },
|
20 | 26 | "deepseek-ai--deepseek-coder-33b-instruct": {
|
21 | 27 | "dps": 82.65680460618937,
|
22 | 28 | "dps_norm": 77.67106700323255,
|
23 | 29 | "pass@1": 77.50847457627118,
|
24 |
| - "win_rate": 0.46629901960784315 |
| 30 | + "win_rate": 0.46857707509881424 |
25 | 31 | },
|
26 | 32 | "01-ai--Yi-Coder-9B-Chat": {
|
27 | 33 | "dps": 83.60305818801254,
|
28 | 34 | "dps_norm": 78.76801867108128,
|
29 | 35 | "pass@1": 78.80508474576271,
|
30 |
| - "win_rate": 0.4875316990701606 |
| 36 | + "win_rate": 0.4916189697465249 |
31 | 37 | },
|
32 | 38 | "google--gemma-2-27b-it": {
|
33 | 39 | "dps": 84.20223252621882,
|
34 | 40 | "dps_norm": 78.67511770954597,
|
35 | 41 | "pass@1": 80.23728813559322,
|
36 |
| - "win_rate": 0.5551975945017182 |
37 |
| - }, |
38 |
| - "Qwen--Qwen2.5-Coder-7B-Instruct": { |
39 |
| - "dps": 79.81023172966313, |
40 |
| - "dps_norm": 76.74937875060594, |
41 |
| - "pass@1": 76.42372881355932, |
42 |
| - "win_rate": 0.3791905151267375 |
| 42 | + "win_rate": 0.5566625155666252 |
43 | 43 | },
|
44 | 44 | "Qwen--Qwen2.5-32B-Instruct": {
|
45 | 45 | "dps": 83.54712283112897,
|
46 | 46 | "dps_norm": 78.3588319852899,
|
47 | 47 | "pass@1": 87.5677966101695,
|
48 |
| - "win_rate": 0.5474555735056543 |
| 48 | + "win_rate": 0.5515826494724502 |
49 | 49 | },
|
50 | 50 | "mistralai--Mistral-Nemo-Instruct-2407": {
|
51 | 51 | "dps": 81.98436677712466,
|
52 | 52 | "dps_norm": 77.61969110961331,
|
53 | 53 | "pass@1": 57.389830508474574,
|
54 |
| - "win_rate": 0.4432494279176201 |
| 54 | + "win_rate": 0.44836716681376876 |
55 | 55 | },
|
56 | 56 | "Qwen--Qwen2.5-72B-Instruct": {
|
57 | 57 | "dps": 84.69258296490358,
|
58 | 58 | "dps_norm": 79.00610315513151,
|
59 | 59 | "pass@1": 88.27966101694915,
|
60 |
| - "win_rate": 0.5433430111986728 |
| 60 | + "win_rate": 0.5473515248796148 |
61 | 61 | },
|
62 | 62 | "gpt-4o-mini-2024-07-18": {
|
63 | 63 | "dps": 84.19186096830988,
|
64 | 64 | "dps_norm": 79.21827803090933,
|
65 | 65 | "pass@1": 85.51694915254237,
|
66 |
| - "win_rate": 0.5626810095159288 |
| 66 | + "win_rate": 0.5631009615384616 |
67 | 67 | },
|
68 | 68 | "Qwen--Qwen2.5-7B-Instruct": {
|
69 | 69 | "dps": 84.68492179229716,
|
70 | 70 | "dps_norm": 79.33664874489173,
|
71 | 71 | "pass@1": 80.02542372881356,
|
72 |
| - "win_rate": 0.5323045267489712 |
| 72 | + "win_rate": 0.5342220453641067 |
73 | 73 | },
|
74 | 74 | "gemini-1.5-pro-002": {
|
75 | 75 | "dps": 76.26393608564656,
|
76 | 76 | "dps_norm": 75.16850711244093,
|
77 | 77 | "pass@1": 83.71186440677967,
|
78 |
| - "win_rate": 0.44697294976384716 |
| 78 | + "win_rate": 0.4499168744804655 |
79 | 79 | },
|
80 | 80 | "deepseek-ai--deepseek-coder-6.7b-instruct": {
|
81 | 81 | "dps": 83.55516548026816,
|
82 | 82 | "dps_norm": 78.20569308671841,
|
83 | 83 | "pass@1": 73.57627118644068,
|
84 |
| - "win_rate": 0.47451820128479655 |
| 84 | + "win_rate": 0.4788907284768212 |
85 | 85 | },
|
86 | 86 | "nvidia--Llama-3.1-Nemotron-70B-Instruct-HF": {
|
87 | 87 | "dps": 78.40270067725943,
|
88 | 88 | "dps_norm": 76.22281062678165,
|
89 | 89 | "pass@1": 61.83050847457627,
|
90 |
| - "win_rate": 0.4121043627031651 |
| 90 | + "win_rate": 0.4110099337748344 |
91 | 91 | },
|
92 | 92 | "ise-uiuc--Magicoder-S-DS-6.7B": {
|
93 | 93 | "dps": 83.62957240263601,
|
94 | 94 | "dps_norm": 78.58003556526222,
|
95 | 95 | "pass@1": 69.83898305084746,
|
96 |
| - "win_rate": 0.45699152542372884 |
| 96 | + "win_rate": 0.45927138763814984 |
97 | 97 | },
|
98 | 98 | "google--gemma-2-9b-it": {
|
99 | 99 | "dps": 82.35863751376931,
|
100 | 100 | "dps_norm": 78.14625766928611,
|
101 | 101 | "pass@1": 68.07627118644068,
|
102 |
| - "win_rate": 0.5287128712871287 |
| 102 | + "win_rate": 0.5286123032904149 |
103 | 103 | },
|
104 | 104 | "mistralai--Mistral-Large-Instruct-2407": {
|
105 | 105 | "dps": 85.58694758404829,
|
106 | 106 | "dps_norm": 80.65167529745199,
|
107 | 107 | "pass@1": 82.4322033898305,
|
108 |
| - "win_rate": 0.578808752025932 |
| 108 | + "win_rate": 0.5823852491173009 |
109 | 109 | },
|
110 | 110 | "deepseek-ai--DeepSeek-Coder-V2-Lite-Instruct": {
|
111 | 111 | "dps": 82.81873138697289,
|
112 | 112 | "dps_norm": 78.09379634032757,
|
113 | 113 | "pass@1": 79.04237288135593,
|
114 |
| - "win_rate": 0.5301230377598642 |
| 114 | + "win_rate": 0.5299548625359048 |
| 115 | + }, |
| 116 | + "meta-llama--Meta-Llama-3-8B-Instruct": { |
| 117 | + "dps": 77.04079291937829, |
| 118 | + "dps_norm": 75.1570617360002, |
| 119 | + "pass@1": 43.69491525423729, |
| 120 | + "win_rate": 0.4024024024024024 |
115 | 121 | },
|
116 | 122 | "deepseek-chat": {
|
117 | 123 | "dps": 86.84652683144942,
|
118 | 124 | "dps_norm": 79.08774817854689,
|
119 | 125 | "pass@1": 88.51694915254237,
|
120 |
| - "win_rate": 0.5940152801358234 |
| 126 | + "win_rate": 0.5948665297741274 |
121 | 127 | },
|
122 | 128 | "gpt-4o-2024-08-06": {
|
123 | 129 | "dps": 87.8465793074948,
|
124 | 130 | "dps_norm": 82.23308124778713,
|
125 | 131 | "pass@1": 86.65254237288136,
|
126 |
| - "win_rate": 0.6191666666666666 |
| 132 | + "win_rate": 0.6226795803066989 |
127 | 133 | },
|
128 | 134 | "meta-llama--Llama-3.1-70B-Instruct": {
|
129 | 135 | "dps": 80.40042339048047,
|
130 | 136 | "dps_norm": 77.29818188061222,
|
131 | 137 | "pass@1": 77.26271186440678,
|
132 |
| - "win_rate": 0.452760736196319 |
| 138 | + "win_rate": 0.45465346534653467 |
133 | 139 | },
|
134 | 140 | "mistralai--Mistral-Small-Instruct-2409": {
|
135 | 141 | "dps": 82.25625421641564,
|
136 | 142 | "dps_norm": 78.98044554401069,
|
137 | 143 | "pass@1": 69.51694915254237,
|
138 |
| - "win_rate": 0.47498931167165453 |
| 144 | + "win_rate": 0.4797436957420422 |
139 | 145 | },
|
140 | 146 | "meta-llama--Llama-3.1-8B-Instruct": {
|
141 | 147 | "dps": 80.93273308299496,
|
142 | 148 | "dps_norm": 76.57037397653662,
|
143 | 149 | "pass@1": 64.34745762711864,
|
144 |
| - "win_rate": 0.39338071968014215 |
| 150 | + "win_rate": 0.3991434689507495 |
145 | 151 | }
|
146 | 152 | }
|
0 commit comments