Skip to content

Commit 02f4bda

Browse files
authored
feat: add LocalFileTaskResultStore for caching task results locally (#178)
Introduce a new LocalFileTaskResultStore class that persists evaluation task results as JSON files on the local filesystem. Each case is saved as a separate file using the case name as the filename. Includes unit tests and exports the class from the public API.
1 parent 83010ba commit 02f4bda

3 files changed

Lines changed: 94 additions & 0 deletions

File tree

src/strands_evals/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
from .case import Case
33
from .evaluation_data_store import EvaluationDataStore
44
from .experiment import Experiment
5+
from .local_file_task_result_store import LocalFileTaskResultStore
56
from .simulation import ActorSimulator, UserSimulator
67
from .telemetry import StrandsEvalsTelemetry, get_tracer
78

89
__all__ = [
910
"Experiment",
1011
"Case",
12+
"LocalFileTaskResultStore",
1113
"EvaluationDataStore",
1214
"evaluators",
1315
"extractors",
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from pathlib import Path
2+
3+
from .types.evaluation import EvaluationData
4+
5+
6+
class LocalFileTaskResultStore:
7+
"""Task result store backed by local JSON files.
8+
9+
Saves one JSON file per case in the specified directory,
10+
using the case name as the filename.
11+
"""
12+
13+
def __init__(self, directory: str | Path):
14+
self._directory = Path(directory)
15+
self._directory.mkdir(parents=True, exist_ok=True)
16+
17+
def load(self, case_name: str) -> EvaluationData | None:
18+
"""Load a cached task result from a JSON file.
19+
20+
Args:
21+
case_name: The name of the case to load results for.
22+
23+
Returns:
24+
The cached EvaluationData if the file exists, None otherwise.
25+
"""
26+
path = self._directory / f"{case_name}.json"
27+
if not path.exists():
28+
return None
29+
return EvaluationData.model_validate_json(path.read_text())
30+
31+
def save(self, case_name: str, result: EvaluationData) -> None:
32+
"""Save a task result to a JSON file.
33+
34+
Args:
35+
case_name: The name of the case to save results for.
36+
result: The EvaluationData to save.
37+
"""
38+
path = self._directory / f"{case_name}.json"
39+
path.write_text(result.model_dump_json(indent=2))
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import json
2+
3+
import pytest
4+
5+
from strands_evals.local_file_task_result_store import LocalFileTaskResultStore
6+
from strands_evals.types import EvaluationData
7+
8+
9+
@pytest.fixture
10+
def evaluation_data():
11+
return EvaluationData(
12+
input="What is 2+2?",
13+
actual_output="4",
14+
name="math_case",
15+
expected_output="4",
16+
metadata={"difficulty": "easy"},
17+
)
18+
19+
20+
@pytest.fixture
21+
def store(tmp_path):
22+
return LocalFileTaskResultStore(directory=tmp_path / "results")
23+
24+
25+
class TestLocalFileTaskResultStore:
26+
def test_save_and_load(self, store, evaluation_data):
27+
store.save("math_case", evaluation_data)
28+
loaded = store.load("math_case")
29+
30+
assert loaded is not None
31+
assert loaded.input == evaluation_data.input
32+
assert loaded.actual_output == evaluation_data.actual_output
33+
assert loaded.expected_output == evaluation_data.expected_output
34+
assert loaded.name == evaluation_data.name
35+
assert loaded.metadata == evaluation_data.metadata
36+
37+
def test_load_missing_returns_none(self, store):
38+
result = store.load("nonexistent_case")
39+
assert result is None
40+
41+
def test_creates_directory(self, tmp_path):
42+
new_dir = tmp_path / "nested" / "results"
43+
assert not new_dir.exists()
44+
LocalFileTaskResultStore(directory=new_dir)
45+
assert new_dir.exists()
46+
47+
def test_save_writes_json_file(self, store, evaluation_data, tmp_path):
48+
store.save("math_case", evaluation_data)
49+
file_path = tmp_path / "results" / "math_case.json"
50+
assert file_path.exists()
51+
data = json.loads(file_path.read_text())
52+
assert data["input"] == "What is 2+2?"
53+
assert data["actual_output"] == "4"

0 commit comments

Comments
 (0)