Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/strands_evals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from .case import Case
from .evaluation_data_store import EvaluationDataStore
from .experiment import Experiment
from .local_file_task_result_store import LocalFileTaskResultStore
from .simulation import ActorSimulator, UserSimulator
from .telemetry import StrandsEvalsTelemetry, get_tracer

__all__ = [
"Experiment",
"Case",
"LocalFileTaskResultStore",
"EvaluationDataStore",
"evaluators",
"extractors",
Expand Down
39 changes: 39 additions & 0 deletions src/strands_evals/local_file_task_result_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from pathlib import Path

from .types.evaluation import EvaluationData


class LocalFileTaskResultStore:
"""Task result store backed by local JSON files.

Saves one JSON file per case in the specified directory,
using the case name as the filename.
"""

def __init__(self, directory: str | Path):
self._directory = Path(directory)
self._directory.mkdir(parents=True, exist_ok=True)

def load(self, case_name: str) -> EvaluationData | None:
"""Load a cached task result from a JSON file.

Args:
case_name: The name of the case to load results for.

Returns:
The cached EvaluationData if the file exists, None otherwise.
"""
path = self._directory / f"{case_name}.json"
if not path.exists():
return None
return EvaluationData.model_validate_json(path.read_text())

def save(self, case_name: str, result: EvaluationData) -> None:
"""Save a task result to a JSON file.

Args:
case_name: The name of the case to save results for.
result: The EvaluationData to save.
"""
path = self._directory / f"{case_name}.json"
path.write_text(result.model_dump_json(indent=2))
53 changes: 53 additions & 0 deletions tests/strands_evals/test_local_file_task_result_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import json

import pytest

from strands_evals.local_file_task_result_store import LocalFileTaskResultStore
from strands_evals.types import EvaluationData


@pytest.fixture
def evaluation_data():
return EvaluationData(
input="What is 2+2?",
actual_output="4",
name="math_case",
expected_output="4",
metadata={"difficulty": "easy"},
)


@pytest.fixture
def store(tmp_path):
return LocalFileTaskResultStore(directory=tmp_path / "results")


class TestLocalFileTaskResultStore:
def test_save_and_load(self, store, evaluation_data):
store.save("math_case", evaluation_data)
loaded = store.load("math_case")

assert loaded is not None
assert loaded.input == evaluation_data.input
assert loaded.actual_output == evaluation_data.actual_output
assert loaded.expected_output == evaluation_data.expected_output
assert loaded.name == evaluation_data.name
assert loaded.metadata == evaluation_data.metadata

def test_load_missing_returns_none(self, store):
result = store.load("nonexistent_case")
assert result is None

def test_creates_directory(self, tmp_path):
new_dir = tmp_path / "nested" / "results"
assert not new_dir.exists()
LocalFileTaskResultStore(directory=new_dir)
assert new_dir.exists()

def test_save_writes_json_file(self, store, evaluation_data, tmp_path):
store.save("math_case", evaluation_data)
file_path = tmp_path / "results" / "math_case.json"
assert file_path.exists()
data = json.loads(file_path.read_text())
assert data["input"] == "What is 2+2?"
assert data["actual_output"] == "4"