Skip to content

Commit 9228603

Browse files
authored
Merge pull request #265 from deepmodeling/fix/cap-error-metric
Fix: error metric calculation
2 parents 4812e81 + 18630d9 commit 9228603

File tree

8 files changed

+89
-84
lines changed

8 files changed

+89
-84
lines changed

lambench/metrics/results/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ To assess model performance across these domains, we use zero-shot inference wit
4444

4545
1. The error metric is normalized against the error metric of a baseline model (dummy model) as follows:
4646

47-
$$\hat{M}^m_{k,p,i} = \frac{M^m_{k,p,i}}{M^{\mathrm{dummy}}_{k,p,i}}$$
47+
$$\hat{M}^m_{k,p,i} = \min\left(\frac{M^m_{k,p,i}}{M^{\mathrm{dummy}}_{k,p,i}},\quad 1\right)$$
4848

49-
where $M^m_{k,p,i}$ is the original error metric, $m$ indicates the model, $k$ denotes the domain index, $p$ signifies the prediction index, and $i$ represents the test set index.
49+
where $M^m_{k,p,i}$ is the original error metric, $m$ indicates the model, $k$ denotes the domain index, $p$ signifies the prediction index, and $i$ represents the test set index. For a model with worse accuracy than a dummy model, the error metric is set to 1.
5050
For instance, in force field tasks, the domains include Small Molecules, Inorganic Materials, Biomolecules, Reactions, and Catalysis, such that $k \in \{\text{Small Molecules, Inorganic Materials, Biomolecules, Reactions, Catalysis}\}$. The prediction types are categorized as energy ($E$), force ($F$), or virial ($V$), with $p \in \{E, F, V\}$.
5151
For the specific domain of Reactions, the test sets are indexed as $i \in \{\text{Guan2022Benchmark, Gasteiger2020Fast}\}$. This baseline model predicts energy based solely on the chemical formula, disregarding any structural details, thereby providing a reference point for evaluating the improvement offered by more sophisticated models.
5252

lambench/metrics/results/barplot.json

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
},
1717
"Inorganic Materials": {
1818
"DPA-2.4-7M": 0.26,
19-
"DPA-3.0-7M": 0.27,
19+
"DPA-3.0-7M": 0.25,
2020
"DPA-3.0-3M": 0.26,
21-
"MACE-MP-0": 0.38,
21+
"MACE-MP-0": 0.35,
2222
"MACE-MPA-0": 0.26,
23-
"Orb-v2": 0.29,
23+
"Orb-v2": 0.28,
2424
"Orb-v3": 0.23,
25-
"SevenNet-0": 0.34,
25+
"SevenNet-0": 0.33,
2626
"SevenNet-l3i5": 0.3,
2727
"SevenNet-MF-ompa": 0.24,
2828
"MatterSim-v1-5M": 0.27,
@@ -61,17 +61,17 @@
6161
},
6262
"Catalysis": {
6363
"DPA-2.4-7M": 0.39,
64-
"DPA-3.0-7M": 0.41,
65-
"DPA-3.0-3M": 0.76,
66-
"MACE-MP-0": 0.56,
67-
"MACE-MPA-0": 0.59,
68-
"Orb-v2": 0.73,
69-
"Orb-v3": 0.5,
70-
"SevenNet-0": 0.63,
71-
"SevenNet-l3i5": 0.64,
72-
"SevenNet-MF-ompa": 0.73,
73-
"MatterSim-v1-5M": 0.85,
74-
"GRACE-2L-OAM": 0.6,
75-
"PET-MAD": 0.53
64+
"DPA-3.0-7M": 0.33,
65+
"DPA-3.0-3M": 0.49,
66+
"MACE-MP-0": 0.46,
67+
"MACE-MPA-0": 0.43,
68+
"Orb-v2": 0.37,
69+
"Orb-v3": 0.31,
70+
"SevenNet-0": 0.51,
71+
"SevenNet-l3i5": 0.52,
72+
"SevenNet-MF-ompa": 0.4,
73+
"MatterSim-v1-5M": 0.43,
74+
"GRACE-2L-OAM": 0.38,
75+
"PET-MAD": 0.49
7676
}
7777
}

lambench/metrics/results/final_rankings.json

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,84 @@
11
[
22
{
33
"Model": "DPA-3.0-7M",
4-
"Generalizability-FF Error \u2193": 0.245,
4+
"Generalizability-FF Error \u2193": 0.225,
55
"Generalizability-DS Error \u2193": 0.161,
66
"Applicability-Instability \u2193": 0.291,
77
"Applicability-Efficiency \u2191": 0.151
88
},
9+
{
10+
"Model": "Orb-v3",
11+
"Generalizability-FF Error \u2193": 0.24,
12+
"Generalizability-DS Error \u2193": 0.24,
13+
"Applicability-Instability \u2193": 0.0,
14+
"Applicability-Efficiency \u2191": 0.4
15+
},
916
{
1017
"Model": "DPA-2.4-7M",
1118
"Generalizability-FF Error \u2193": 0.265,
1219
"Generalizability-DS Error \u2193": 0.208,
1320
"Applicability-Instability \u2193": 0.039,
1421
"Applicability-Efficiency \u2191": 0.614
1522
},
16-
{
17-
"Model": "Orb-v3",
18-
"Generalizability-FF Error \u2193": 0.28,
19-
"Generalizability-DS Error \u2193": 0.24,
20-
"Applicability-Instability \u2193": 0.0,
21-
"Applicability-Efficiency \u2191": 0.4
22-
},
2323
{
2424
"Model": "DPA-3.0-3M",
25-
"Generalizability-FF Error \u2193": 0.338,
25+
"Generalizability-FF Error \u2193": 0.284,
2626
"Generalizability-DS Error \u2193": 0.257,
2727
"Applicability-Instability \u2193": 0.48,
2828
"Applicability-Efficiency \u2191": 0.296
2929
},
30+
{
31+
"Model": "Orb-v2",
32+
"Generalizability-FF Error \u2193": 0.284,
33+
"Generalizability-DS Error \u2193": 0.501,
34+
"Applicability-Instability \u2193": 2.649,
35+
"Applicability-Efficiency \u2191": 1.343
36+
},
37+
{
38+
"Model": "SevenNet-MF-ompa",
39+
"Generalizability-FF Error \u2193": 0.292,
40+
"Generalizability-DS Error \u2193": 0.3,
41+
"Applicability-Instability \u2193": 0.0,
42+
"Applicability-Efficiency \u2191": 0.088
43+
},
3044
{
3145
"Model": "GRACE-2L-OAM",
32-
"Generalizability-FF Error \u2193": 0.34,
46+
"Generalizability-FF Error \u2193": 0.296,
3347
"Generalizability-DS Error \u2193": 0.262,
3448
"Applicability-Instability \u2193": 0.309,
3549
"Applicability-Efficiency \u2191": 0.678
3650
},
3751
{
38-
"Model": "SevenNet-l3i5",
39-
"Generalizability-FF Error \u2193": 0.355,
40-
"Generalizability-DS Error \u2193": 0.24,
41-
"Applicability-Instability \u2193": 0.036,
42-
"Applicability-Efficiency \u2191": 0.279
52+
"Model": "MatterSim-v1-5M",
53+
"Generalizability-FF Error \u2193": 0.306,
54+
"Generalizability-DS Error \u2193": 0.28,
55+
"Applicability-Instability \u2193": 0.0,
56+
"Applicability-Efficiency \u2191": 0.388
4357
},
4458
{
4559
"Model": "MACE-MPA-0",
46-
"Generalizability-FF Error \u2193": 0.356,
60+
"Generalizability-FF Error \u2193": 0.324,
4761
"Generalizability-DS Error \u2193": 0.291,
4862
"Applicability-Instability \u2193": 0.0,
4963
"Applicability-Efficiency \u2191": 0.291
5064
},
5165
{
52-
"Model": "Orb-v2",
53-
"Generalizability-FF Error \u2193": 0.356,
54-
"Generalizability-DS Error \u2193": 0.56,
55-
"Applicability-Instability \u2193": 2.649,
56-
"Applicability-Efficiency \u2191": 1.343
57-
},
58-
{
59-
"Model": "SevenNet-MF-ompa",
60-
"Generalizability-FF Error \u2193": 0.358,
61-
"Generalizability-DS Error \u2193": 0.3,
62-
"Applicability-Instability \u2193": 0.0,
63-
"Applicability-Efficiency \u2191": 0.088
66+
"Model": "SevenNet-l3i5",
67+
"Generalizability-FF Error \u2193": 0.33,
68+
"Generalizability-DS Error \u2193": 0.24,
69+
"Applicability-Instability \u2193": 0.036,
70+
"Applicability-Efficiency \u2191": 0.279
6471
},
6572
{
6673
"Model": "SevenNet-0",
67-
"Generalizability-FF Error \u2193": 0.369,
74+
"Generalizability-FF Error \u2193": 0.343,
6875
"Generalizability-DS Error \u2193": 0.246,
6976
"Applicability-Instability \u2193": 0.556,
7077
"Applicability-Efficiency \u2191": 0.76
7178
},
72-
{
73-
"Model": "MatterSim-v1-5M",
74-
"Generalizability-FF Error \u2193": 0.389,
75-
"Generalizability-DS Error \u2193": 0.28,
76-
"Applicability-Instability \u2193": 0.0,
77-
"Applicability-Efficiency \u2191": 0.388
78-
},
7979
{
8080
"Model": "MACE-MP-0",
81-
"Generalizability-FF Error \u2193": 0.405,
81+
"Generalizability-FF Error \u2193": 0.38,
8282
"Generalizability-DS Error \u2193": 0.341,
8383
"Applicability-Instability \u2193": 0.089,
8484
"Applicability-Efficiency \u2191": 0.291

lambench/metrics/results/radar.json

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@
6666
"name": "DPA-3.0-7M",
6767
"value": [
6868
0.83,
69-
0.73,
69+
0.75,
7070
0.83,
7171
0.8,
72-
0.5900000000000001
72+
0.6699999999999999
7373
],
7474
"areaStyle": {
7575
"opacity": 0.1
@@ -82,17 +82,17 @@
8282
0.74,
8383
0.75,
8484
0.76,
85-
0.24
85+
0.51
8686
]
8787
},
8888
{
8989
"name": "MACE-MP-0",
9090
"value": [
9191
0.72,
92-
0.62,
92+
0.65,
9393
0.51,
9494
0.6799999999999999,
95-
0.43999999999999995
95+
0.54
9696
]
9797
},
9898
{
@@ -102,17 +102,17 @@
102102
0.74,
103103
0.6,
104104
0.71,
105-
0.41000000000000003
105+
0.5700000000000001
106106
]
107107
},
108108
{
109109
"name": "Orb-v2",
110110
"value": [
111111
0.8,
112-
0.71,
112+
0.72,
113113
0.6699999999999999,
114114
0.76,
115-
0.27
115+
0.63
116116
]
117117
},
118118
{
@@ -122,17 +122,17 @@
122122
0.77,
123123
0.74,
124124
0.79,
125-
0.5
125+
0.69
126126
]
127127
},
128128
{
129129
"name": "SevenNet-0",
130130
"value": [
131131
0.76,
132-
0.6599999999999999,
132+
0.6699999999999999,
133133
0.64,
134134
0.72,
135-
0.37
135+
0.49
136136
]
137137
},
138138
{
@@ -142,7 +142,7 @@
142142
0.7,
143143
0.6699999999999999,
144144
0.71,
145-
0.36
145+
0.48
146146
]
147147
},
148148
{
@@ -152,7 +152,7 @@
152152
0.76,
153153
0.64,
154154
0.76,
155-
0.27
155+
0.6
156156
]
157157
},
158158
{
@@ -162,7 +162,7 @@
162162
0.73,
163163
0.6599999999999999,
164164
0.74,
165-
0.15000000000000002
165+
0.5700000000000001
166166
]
167167
},
168168
{
@@ -172,7 +172,7 @@
172172
0.75,
173173
0.6799999999999999,
174174
0.71,
175-
0.4
175+
0.62
176176
]
177177
},
178178
{
@@ -182,7 +182,7 @@
182182
null,
183183
0.6599999999999999,
184184
0.6,
185-
0.47
185+
0.51
186186
]
187187
}
188188
]

0 commit comments

Comments
 (0)