Skip to content

Feat: Add new efficiency test #264

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 9, 2025
2 changes: 1 addition & 1 deletion lambench/metrics/vishelper/metrics_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def calculate_stability_results(self) -> dict[str, float]:
}

stability_results = pd.DataFrame.from_dict(stability_results, orient="index")
stability_results = stability_results.applymap(
stability_results = stability_results.map(
lambda cell: self._calculate_instability_error(cell)
)
# average over all systems
Expand Down
4 changes: 2 additions & 2 deletions lambench/tasks/calculator/calculator_tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ phonon_mdr:
calculator_params:
distance: 0.01
inference_efficiency:
test_data: /bohr/lambench-efficiency-rg7a/v2/efficiency
test_data: /bohr/lambench-efficiency-rg7a/v3/efficiency
calculator_params:
warmup_ratio: 0.2
warmup_ratio: 0.1
torsionnet:
test_data: /bohr/lambench-torsionnet-e4sc/v2/torsionnet500_wB97m
calculator_params: null
100 changes: 100 additions & 0 deletions lambench/tasks/calculator/inference_efficiency/efficiency_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from ase.atoms import Atoms
from lambench.models.ase_models import ASEModel
import numpy as np
import math


def get_efv(atoms: Atoms) -> tuple[float, np.ndarray, np.ndarray]:
"""
Perform force field prediction for one system, return energy, forces and stress.
"""
e = atoms.get_potential_energy()
f = atoms.get_forces()
stress = atoms.get_stress()
v = (

Check warning on line 14 in lambench/tasks/calculator/inference_efficiency/efficiency_utils.py

View check run for this annotation

Codecov / codecov/patch

lambench/tasks/calculator/inference_efficiency/efficiency_utils.py#L14

Added line #L14 was not covered by tests
-np.array(
[
[stress[0], stress[5], stress[4]],
[stress[5], stress[1], stress[3]],
[stress[4], stress[3], stress[2]],
]
)
* atoms.get_volume()
)
return e, f, v

Check warning on line 24 in lambench/tasks/calculator/inference_efficiency/efficiency_utils.py

View check run for this annotation

Codecov / codecov/patch

lambench/tasks/calculator/inference_efficiency/efficiency_utils.py#L24

Added line #L24 was not covered by tests


def catch_oom_error(atoms: Atoms) -> bool:
"""
Catch OOM error when running inference.
"""
try:
get_efv(atoms)
return False

Check warning on line 33 in lambench/tasks/calculator/inference_efficiency/efficiency_utils.py

View check run for this annotation

Codecov / codecov/patch

lambench/tasks/calculator/inference_efficiency/efficiency_utils.py#L33

Added line #L33 was not covered by tests
except Exception as e:
if "out of memory" in str(e) or "OOM" in str(e):
return True
else:
return False


def get_divisors(num: int) -> list[int]:
divisors = set()
for i in range(1, int(math.isqrt(num)) + 1):
if num % i == 0:
divisors.add(i)
divisors.add(num // i)
return sorted(divisors)


def find_even_factors(num: int) -> tuple[int, int, int]:
"""
Find three factors of a number that are as evenly distributed as possible.
The function returns a tuple of three factors (a, b, c) such that a * b * c = num.
The factors are sorted in ascending order (a <= b <= c).
"""
divisors = get_divisors(num)
best = None
min_spread = float("inf")

for a in divisors:
num_div_a = num // a
divisors_b = get_divisors(num_div_a)

# Since a <= b <= c, no need to consider b < a
for b in divisors_b:
if b < a:
continue
c = num_div_a // b
if a * b * c == num:
factors = [a, b, c]
spread = max(factors) - min(factors)
if spread < min_spread:
min_spread = spread
best = (a, b, c)
if spread == 0: # Perfect distribution found
return best
return best


def binary_search_max_natoms(
model: ASEModel, atoms: Atoms, upper_limit: int = 1000, max_iterations: int = 15
) -> int:
"""
Binary search for the maximum number of atoms that can be processed by the model.

"""
low, high, iteration = 1, upper_limit, 0
while low < high and iteration < max_iterations:
mid = (low + high + 1) // 2
scaling_factor = np.int32(np.ceil(mid / len(atoms)))
scaled_atoms = atoms.copy()
a, b, c = find_even_factors(scaling_factor)
scaled_atoms = scaled_atoms.repeat((a, b, c))
scaled_atoms.calc = model.calc
if catch_oom_error(scaled_atoms):
high = mid - 1
else:
low = mid
iteration += 1
return low
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from lambench.models.ase_models import ASEModel
from lambench.tasks.calculator.inference_efficiency.efficiency_utils import (
binary_search_max_natoms,
get_efv,
find_even_factors,
)
from ase.io import read
from ase.atoms import Atoms
import logging
import time
import numpy as np
Expand All @@ -11,23 +15,6 @@
)


def get_efv(atoms: Atoms) -> tuple[float, np.ndarray]:
e = atoms.get_potential_energy()
f = atoms.get_forces()
stress = atoms.get_stress()
v = (
-np.array(
[
[stress[0], stress[5], stress[4]],
[stress[5], stress[1], stress[3]],
[stress[4], stress[3], stress[2]],
]
)
* atoms.get_volume()
)
return e, f, v


def run_inference(
model: ASEModel, test_data: Path, warmup_ratio: float
) -> dict[str, dict[str, float]]:
Expand Down Expand Up @@ -62,7 +49,9 @@ def run_inference(


def run_one_inference(
model: ASEModel, test_traj: Path, warmup_ratio: float
model: ASEModel,
test_traj: Path,
warmup_ratio: float,
) -> dict[str, float]:
"""
Infer for one trajectory, return averaged time and success rate, starting timing at warmup_ratio.
Expand All @@ -75,6 +64,14 @@ def run_one_inference(

efficiency = []
for i, atoms in enumerate(test_atoms):
# find maximum allowed natoms
max_natoms = binary_search_max_natoms(model, atoms)
# on-the-fly expand atoms
scaling_factor = np.int32(np.floor(max_natoms / len(atoms)))
while 1 in find_even_factors(scaling_factor) and scaling_factor > 1:
scaling_factor -= 1
a, b, c = find_even_factors(scaling_factor)
atoms = atoms.repeat((a, b, c))
atoms.calc = model.calc
n_atoms = len(atoms)
start = time.time()
Expand Down
53 changes: 53 additions & 0 deletions tests/tasks/calculator/test_efficiency_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from lambench.tasks.calculator.inference_efficiency.efficiency_utils import (
find_even_factors,
binary_search_max_natoms,
)
import pytest
import numpy as np
from ase.atoms import Atoms
from unittest.mock import MagicMock

OOM_TEST_ATOM = Atoms(
symbols="Mg",
pbc=True,
cell=[
[-2.244256, -2.244256, 0.0],
[-2.244256, 0.0, -2.244256],
[0.0, -2.244256, -2.244256],
],
positions=[
[0, 0, 0],
],
) # mp-1056702


@pytest.mark.parametrize(
"num, expected",
[
(27, (3, 3, 3)), # Perfect cube
(13, (1, 1, 13)), # Prime number
(16, (2, 2, 4)), # Even number
(728, (7, 8, 13)), # Large number
],
)
def test_find_even_factors(num, expected):
result = find_even_factors(num)
assert result == expected, f"Expected {expected}, got {result}"


@pytest.mark.parametrize(
"threshold, max_natoms",
[(1999, 1000), (247, 247), (121, 121), (100, 100), (38, 38), (31, 31)],
)
def test_binary_search_max_natoms(threshold, max_natoms):
def mock_get_potential_energy(atoms=None):
if len(atoms) > threshold:
raise MemoryError("OOM: Too many atoms!")
return np.random.rand()

mock_model = MagicMock()
mock_model.calc = MagicMock()
mock_model.calc.get_potential_energy.side_effect = mock_get_potential_energy

result = binary_search_max_natoms(mock_model, OOM_TEST_ATOM)
assert result == max_natoms, f"Expected {max_natoms}, got {result}"