diff --git a/src/strands_evals/__init__.py b/src/strands_evals/__init__.py index 460a55c..7de7520 100644 --- a/src/strands_evals/__init__.py +++ b/src/strands_evals/__init__.py @@ -2,12 +2,14 @@ from .case import Case from .evaluation_data_store import EvaluationDataStore from .experiment import Experiment +from .local_file_task_result_store import LocalFileTaskResultStore from .simulation import ActorSimulator, UserSimulator from .telemetry import StrandsEvalsTelemetry, get_tracer __all__ = [ "Experiment", "Case", + "LocalFileTaskResultStore", "EvaluationDataStore", "evaluators", "extractors", diff --git a/src/strands_evals/local_file_task_result_store.py b/src/strands_evals/local_file_task_result_store.py new file mode 100644 index 0000000..3080e04 --- /dev/null +++ b/src/strands_evals/local_file_task_result_store.py @@ -0,0 +1,39 @@ +from pathlib import Path + +from .types.evaluation import EvaluationData + + +class LocalFileTaskResultStore: + """Task result store backed by local JSON files. + + Saves one JSON file per case in the specified directory, + using the case name as the filename. + """ + + def __init__(self, directory: str | Path): + self._directory = Path(directory) + self._directory.mkdir(parents=True, exist_ok=True) + + def load(self, case_name: str) -> EvaluationData | None: + """Load a cached task result from a JSON file. + + Args: + case_name: The name of the case to load results for. + + Returns: + The cached EvaluationData if the file exists, None otherwise. + """ + path = self._directory / f"{case_name}.json" + if not path.exists(): + return None + return EvaluationData.model_validate_json(path.read_text()) + + def save(self, case_name: str, result: EvaluationData) -> None: + """Save a task result to a JSON file. + + Args: + case_name: The name of the case to save results for. + result: The EvaluationData to save. + """ + path = self._directory / f"{case_name}.json" + path.write_text(result.model_dump_json(indent=2)) diff --git a/tests/strands_evals/test_local_file_task_result_store.py b/tests/strands_evals/test_local_file_task_result_store.py new file mode 100644 index 0000000..ab0fe54 --- /dev/null +++ b/tests/strands_evals/test_local_file_task_result_store.py @@ -0,0 +1,53 @@ +import json + +import pytest + +from strands_evals.local_file_task_result_store import LocalFileTaskResultStore +from strands_evals.types import EvaluationData + + +@pytest.fixture +def evaluation_data(): + return EvaluationData( + input="What is 2+2?", + actual_output="4", + name="math_case", + expected_output="4", + metadata={"difficulty": "easy"}, + ) + + +@pytest.fixture +def store(tmp_path): + return LocalFileTaskResultStore(directory=tmp_path / "results") + + +class TestLocalFileTaskResultStore: + def test_save_and_load(self, store, evaluation_data): + store.save("math_case", evaluation_data) + loaded = store.load("math_case") + + assert loaded is not None + assert loaded.input == evaluation_data.input + assert loaded.actual_output == evaluation_data.actual_output + assert loaded.expected_output == evaluation_data.expected_output + assert loaded.name == evaluation_data.name + assert loaded.metadata == evaluation_data.metadata + + def test_load_missing_returns_none(self, store): + result = store.load("nonexistent_case") + assert result is None + + def test_creates_directory(self, tmp_path): + new_dir = tmp_path / "nested" / "results" + assert not new_dir.exists() + LocalFileTaskResultStore(directory=new_dir) + assert new_dir.exists() + + def test_save_writes_json_file(self, store, evaluation_data, tmp_path): + store.save("math_case", evaluation_data) + file_path = tmp_path / "results" / "math_case.json" + assert file_path.exists() + data = json.loads(file_path.read_text()) + assert data["input"] == "What is 2+2?" + assert data["actual_output"] == "4"