strands-agents · afarntrog · Mar 26, 2026 · Mar 26, 2026
diff --git a/src/strands_evals/__init__.py b/src/strands_evals/__init__.py
@@ -2,12 +2,14 @@
 from .case import Case
 from .evaluation_data_store import EvaluationDataStore
 from .experiment import Experiment
+from .local_file_task_result_store import LocalFileTaskResultStore
 from .simulation import ActorSimulator, UserSimulator
 from .telemetry import StrandsEvalsTelemetry, get_tracer
 
 __all__ = [
     "Experiment",
     "Case",
+    "LocalFileTaskResultStore",
     "EvaluationDataStore",
     "evaluators",
     "extractors",

diff --git a/src/strands_evals/local_file_task_result_store.py b/src/strands_evals/local_file_task_result_store.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+
+from .types.evaluation import EvaluationData
+
+
+class LocalFileTaskResultStore:
+    """Task result store backed by local JSON files.
+
+    Saves one JSON file per case in the specified directory,
+    using the case name as the filename.
+    """
+
+    def __init__(self, directory: str | Path):
+        self._directory = Path(directory)
+        self._directory.mkdir(parents=True, exist_ok=True)
+
+    def load(self, case_name: str) -> EvaluationData | None:
+        """Load a cached task result from a JSON file.
+
+        Args:
+            case_name: The name of the case to load results for.
+
+        Returns:
+            The cached EvaluationData if the file exists, None otherwise.
+        """
+        path = self._directory / f"{case_name}.json"
+        if not path.exists():
+            return None
+        return EvaluationData.model_validate_json(path.read_text())
+
+    def save(self, case_name: str, result: EvaluationData) -> None:
+        """Save a task result to a JSON file.
+
+        Args:
+            case_name: The name of the case to save results for.
+            result: The EvaluationData to save.
+        """
+        path = self._directory / f"{case_name}.json"
+        path.write_text(result.model_dump_json(indent=2))
diff --git a/tests/strands_evals/test_local_file_task_result_store.py b/tests/strands_evals/test_local_file_task_result_store.py
@@ -0,0 +1,53 @@
+import json
+
+import pytest
+
+from strands_evals.local_file_task_result_store import LocalFileTaskResultStore
+from strands_evals.types import EvaluationData
+
+
+@pytest.fixture
+def evaluation_data():
+    return EvaluationData(
+        input="What is 2+2?",
+        actual_output="4",
+        name="math_case",
+        expected_output="4",
+        metadata={"difficulty": "easy"},
+    )
+
+
+@pytest.fixture
+def store(tmp_path):
+    return LocalFileTaskResultStore(directory=tmp_path / "results")
+
+
+class TestLocalFileTaskResultStore:
+    def test_save_and_load(self, store, evaluation_data):
+        store.save("math_case", evaluation_data)
+        loaded = store.load("math_case")
+
+        assert loaded is not None
+        assert loaded.input == evaluation_data.input
+        assert loaded.actual_output == evaluation_data.actual_output
+        assert loaded.expected_output == evaluation_data.expected_output
+        assert loaded.name == evaluation_data.name
+        assert loaded.metadata == evaluation_data.metadata
+
+    def test_load_missing_returns_none(self, store):
+        result = store.load("nonexistent_case")
+        assert result is None
+
+    def test_creates_directory(self, tmp_path):
+        new_dir = tmp_path / "nested" / "results"
+        assert not new_dir.exists()
+        LocalFileTaskResultStore(directory=new_dir)
+        assert new_dir.exists()
+
+    def test_save_writes_json_file(self, store, evaluation_data, tmp_path):
+        store.save("math_case", evaluation_data)
+        file_path = tmp_path / "results" / "math_case.json"
+        assert file_path.exists()
+        data = json.loads(file_path.read_text())
+        assert data["input"] == "What is 2+2?"
+        assert data["actual_output"] == "4"