diff --git a/apps/data-processing/scripts/README_ledger_export.md b/apps/data-processing/scripts/README_ledger_export.md new file mode 100644 index 00000000..fbaae3c9 --- /dev/null +++ b/apps/data-processing/scripts/README_ledger_export.md @@ -0,0 +1,188 @@ +# Ledger-Range Export — Issue #883 + +Repeatable export of raw Soroban contract events and normalized project state for a given Stellar ledger range. Intended for **incident debugging** by maintainers. + +## Quick Start + +```bash +# Export all data for ledger range 1000–2000 +python scripts/export_ledger_range.py --start-ledger 1000 --end-ledger 2000 + +# Custom output directory +python scripts/export_ledger_range.py --start-ledger 1000 --end-ledger 2000 \ + --output-dir /tmp/incident_exports + +# Single-ledger export +python scripts/export_ledger_range.py --start-ledger 1500 --end-ledger 1500 + +# Override database URL +python scripts/export_ledger_range.py --start-ledger 1000 --end-ledger 2000 \ + --database-url postgresql://user:pass@host:5432/lumenpulse +``` + +The script reads `DATABASE_URL` from the environment if `--database-url` is not provided. + +## Input Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `--start-ledger` | int | ✓ | First ledger number, inclusive | +| `--end-ledger` | int | ✓ | Last ledger number, inclusive | +| `--output-dir` | string | | Output directory (default: `exports/ledger`) | +| `--database-url` | string | | Overrides `DATABASE_URL` env var | + +Validation rules: +- Both ledgers must be non-negative integers. +- `start_ledger` must be ≤ `end_ledger`. +- `start_ledger == end_ledger` is valid (single-ledger export). + +## Output File + +A single JSON file is written: + +``` +/ledger_export__.json +``` + +Example: `exports/ledger/ledger_export_1000_2000.json` + +### Top-Level Structure + +```json +{ + "metadata": { + "startLedger": 1000, + "endLedger": 2000, + "exportTimestamp": "2026-06-25T12:00:00+00:00", + "exportVersion": "1" + }, + "raw": [ ... ], + "normalized": { + "project_views": [ ... ], + "project_contributors": [ ... ], + "project_milestones": [ ... ] + } +} +``` + +### `metadata` + +| Field | Type | Description | +|-------|------|-------------| +| `startLedger` | int | Inclusive start of the requested range | +| `endLedger` | int | Inclusive end of the requested range | +| `exportTimestamp` | ISO-8601 | UTC time of export run | +| `exportVersion` | string | Schema version (`"1"`) | + +### `raw` — array of ContractEvent rows + +Each object represents one raw Soroban event whose `ledger` column falls in `[startLedger, endLedger]`: + +```json +{ + "id": 1, + "contract_id": "CABC...", + "event_id": "evt-1", + "ledger": 1500, + "event_type": "contribution", + "project_id": 42, + "contributor": "GBOB...", + "amount": 100.0, + "milestone_id": null, + "status": "active", + "topics": [], + "raw_data": { "key": "value" }, + "timestamp": "2024-01-01T00:00:00+00:00" +} +``` + +### `normalized` — object with three arrays + +Normalized rows are matched by their `last_event_ledger` / `last_contribution_ledger` column: + +#### `project_views` + +Rows from `project_views` where `last_event_ledger` is in range: + +```json +{ + "id": 1, + "project_id": 42, + "contract_id": "CABC...", + "owner": "GALICE...", + "total_contributions": 100.0, + "unique_contributors": 1, + "status": "active", + "last_event_ledger": 1500, + "extra_data": {} +} +``` + +#### `project_contributors` + +Rows from `project_contributors` where `last_contribution_ledger` is in range: + +```json +{ + "id": 1, + "project_id": 42, + "contributor": "GBOB...", + "total_contributed": 100.0, + "first_contribution_ledger": 1500, + "last_contribution_ledger": 1500, + "extra_data": {} +} +``` + +#### `project_milestones` + +Rows from `project_milestones` where `last_event_ledger` is in range: + +```json +{ + "id": 1, + "project_id": 42, + "milestone_id": 1, + "status": "pending", + "approved_at": null, + "last_event_ledger": 1500, + "extra_data": {} +} +``` + +## Intended Debugging Workflow + +1. Identify the approximate ledger range of an incident (e.g., from monitoring alerts or Stellar explorer). +2. Run the export: + ```bash + python scripts/export_ledger_range.py --start-ledger --end-ledger + ``` +3. Inspect `raw` to see exactly which contract events arrived in that window. +4. Compare `normalized` against expected project state — mismatches between raw events and normalized output indicate a processing bug. +5. Re-run as many times as needed; the tool never modifies source data and always overwrites the output file with a fresh snapshot. + +## Python API + +```python +from src.ledger_export import LedgerRangeExporter + +exporter = LedgerRangeExporter( + start_ledger=1000, + end_ledger=2000, + output_dir="exports/ledger", +) +result = exporter.export() +# result.path, result.raw_count, result.normalized_counts, result.status +``` + +## Running Tests + +```bash +pytest tests/test_ledger_export.py -v +``` + +## Limitations + +- **Normalized coverage**: `project_views` and `project_milestones` are matched by `last_event_ledger`; `project_contributors` by `last_contribution_ledger`. Rows updated by earlier ledgers whose last-ledger pointer falls outside the range will not appear, even if they were affected by events within the range. +- **No DB required for tests**: All tests use mocks; a live database is only needed for actual incident debugging. +- **Large ranges**: Rows are loaded entirely into memory. For very large ranges (millions of events), increase available memory or narrow the range. diff --git a/apps/data-processing/scripts/export_ledger_range.py b/apps/data-processing/scripts/export_ledger_range.py new file mode 100644 index 00000000..c1e7e0fa --- /dev/null +++ b/apps/data-processing/scripts/export_ledger_range.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Export raw and normalized ledger data for incident debugging — Issue #883. + +Usage: + python scripts/export_ledger_range.py --start-ledger 1000 --end-ledger 2000 + python scripts/export_ledger_range.py --start-ledger 1000 --end-ledger 2000 \ + --output-dir /tmp/incident_exports + python scripts/export_ledger_range.py --start-ledger 500 --end-ledger 500 +""" + +import argparse +import json +import logging +import sys +from pathlib import Path + +# Allow running from repo root or scripts/ directory +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], +) +logger = logging.getLogger(__name__) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Export ledger-range data (raw events + normalized state) for incident debugging" + ) + parser.add_argument( + "--start-ledger", type=int, required=True, help="First ledger (inclusive)" + ) + parser.add_argument( + "--end-ledger", type=int, required=True, help="Last ledger (inclusive)" + ) + parser.add_argument( + "--output-dir", + default="exports/ledger", + help="Directory to write the export file (default: exports/ledger)", + ) + parser.add_argument( + "--database-url", default=None, help="Override DATABASE_URL env var" + ) + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + from src.ledger_export import LedgerRangeExporter, _validate_ledger_range + + try: + _validate_ledger_range(args.start_ledger, args.end_ledger) + except (TypeError, ValueError) as exc: + logger.error("Invalid ledger range: %s", exc) + sys.exit(1) + + exporter = LedgerRangeExporter( + start_ledger=args.start_ledger, + end_ledger=args.end_ledger, + output_dir=args.output_dir, + database_url=args.database_url, + ) + + result = exporter.export() + print(json.dumps(result.to_dict(), indent=2)) + + +if __name__ == "__main__": + main() diff --git a/apps/data-processing/src/ledger_export.py b/apps/data-processing/src/ledger_export.py new file mode 100644 index 00000000..53c7a0c6 --- /dev/null +++ b/apps/data-processing/src/ledger_export.py @@ -0,0 +1,274 @@ +""" +Ledger-Range Export Generator — Issue #883 + +Exports raw contract events and normalized project state (views, contributors, +milestones) for a specified Stellar ledger range to aid incident debugging. + +Output format (single JSON file): + { + "metadata": { + "startLedger": , + "endLedger": , + "exportTimestamp": "", + "exportVersion": "1" + }, + "raw": [...], # ContractEvent rows in [startLedger, endLedger] + "normalized": { + "project_views": [...], + "project_contributors": [...], + "project_milestones": [...] + } + } +""" + +import json +import logging +import os +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional + +from sqlalchemy import create_engine, select, and_ +from sqlalchemy.orm import sessionmaker + +from src.db.models import ( + ContractEvent, + ProjectContributor, + ProjectMilestone, + ProjectView, +) + +logger = logging.getLogger(__name__) + +EXPORT_VERSION = "1" + + +def _validate_ledger_range(start_ledger: int, end_ledger: int) -> None: + """Raise ValueError if the ledger range is invalid.""" + if not isinstance(start_ledger, int) or not isinstance(end_ledger, int): + raise TypeError("start_ledger and end_ledger must be integers") + if start_ledger < 0 or end_ledger < 0: + raise ValueError("Ledger numbers must be non-negative") + if start_ledger > end_ledger: + raise ValueError( + f"start_ledger ({start_ledger}) must be <= end_ledger ({end_ledger})" + ) + + +@dataclass +class LedgerExportResult: + """Result of a ledger-range export operation.""" + + path: str + start_ledger: int + end_ledger: int + raw_count: int + normalized_counts: Dict[str, int] + status: str + + def to_dict(self) -> Dict[str, Any]: + return { + "path": self.path, + "start_ledger": self.start_ledger, + "end_ledger": self.end_ledger, + "raw_count": self.raw_count, + "normalized_counts": self.normalized_counts, + "status": self.status, + } + + +class LedgerRangeExporter: + """ + Exports raw ContractEvent rows and normalized project-state tables + for a given Stellar ledger range. + + Repeated execution is idempotent: output files are overwritten on each + run without modifying source data. + """ + + def __init__( + self, + start_ledger: int, + end_ledger: int, + output_dir: str, + database_url: Optional[str] = None, + ) -> None: + _validate_ledger_range(start_ledger, end_ledger) + + self.start_ledger = start_ledger + self.end_ledger = end_ledger + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + db_url = database_url or os.getenv( + "DATABASE_URL", + "postgresql://postgres:postgres@localhost:5432/lumenpulse", + ) + engine = create_engine(db_url, pool_pre_ping=True, echo=False) + self.Session = sessionmaker(bind=engine, expire_on_commit=False) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _ledger_range_filter(self, column: Any) -> Any: + return and_(column >= self.start_ledger, column <= self.end_ledger) + + def _output_path(self) -> Path: + return ( + self.output_dir + / f"ledger_export_{self.start_ledger}_{self.end_ledger}.json" + ) + + # ------------------------------------------------------------------ + # Data collection + # ------------------------------------------------------------------ + + def _fetch_raw(self, session: Any) -> List[Dict[str, Any]]: + """Return ContractEvent rows within the ledger range.""" + rows = session.execute( + select(ContractEvent).where( + self._ledger_range_filter(ContractEvent.ledger) + ) + ).scalars().all() + + return [ + { + "id": r.id, + "contract_id": r.contract_id, + "event_id": r.event_id, + "ledger": r.ledger, + "event_type": r.event_type, + "project_id": r.project_id, + "contributor": r.contributor, + "amount": r.amount, + "milestone_id": r.milestone_id, + "status": r.status, + "topics": r.topics, + "raw_data": r.raw_data, + "timestamp": r.timestamp.isoformat() if r.timestamp else None, + } + for r in rows + ] + + def _fetch_normalized(self, session: Any) -> Dict[str, List[Dict[str, Any]]]: + """ + Return normalized project-state rows whose last_event_ledger falls + within the ledger range, plus all project milestones and contributors + whose last contributing ledger overlaps the range. + """ + # ProjectView: last_event_ledger in range + views = session.execute( + select(ProjectView).where( + self._ledger_range_filter(ProjectView.last_event_ledger) + ) + ).scalars().all() + + # ProjectContributor: last_contribution_ledger in range + contributors = session.execute( + select(ProjectContributor).where( + self._ledger_range_filter(ProjectContributor.last_contribution_ledger) + ) + ).scalars().all() + + # ProjectMilestone: last_event_ledger in range + milestones = session.execute( + select(ProjectMilestone).where( + self._ledger_range_filter(ProjectMilestone.last_event_ledger) + ) + ).scalars().all() + + return { + "project_views": [ + { + "id": v.id, + "project_id": v.project_id, + "contract_id": v.contract_id, + "owner": v.owner, + "total_contributions": v.total_contributions, + "unique_contributors": v.unique_contributors, + "status": v.status, + "last_event_ledger": v.last_event_ledger, + "extra_data": v.extra_data, + } + for v in views + ], + "project_contributors": [ + { + "id": c.id, + "project_id": c.project_id, + "contributor": c.contributor, + "total_contributed": c.total_contributed, + "first_contribution_ledger": c.first_contribution_ledger, + "last_contribution_ledger": c.last_contribution_ledger, + "extra_data": c.extra_data, + } + for c in contributors + ], + "project_milestones": [ + { + "id": m.id, + "project_id": m.project_id, + "milestone_id": m.milestone_id, + "status": m.status, + "approved_at": m.approved_at.isoformat() if m.approved_at else None, + "last_event_ledger": m.last_event_ledger, + "extra_data": m.extra_data, + } + for m in milestones + ], + } + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def export(self) -> LedgerExportResult: + """ + Run the full export: raw events + normalized state. + + Writes a single JSON file and returns a LedgerExportResult. + Safe to call multiple times; the output file is overwritten. + """ + with self.Session() as session: + raw = self._fetch_raw(session) + normalized = self._fetch_normalized(session) + + normalized_counts = {k: len(v) for k, v in normalized.items()} + + payload: Dict[str, Any] = { + "metadata": { + "startLedger": self.start_ledger, + "endLedger": self.end_ledger, + "exportTimestamp": datetime.now(timezone.utc).isoformat(), + "exportVersion": EXPORT_VERSION, + }, + "raw": raw, + "normalized": normalized, + } + + out_path = self._output_path() + with open(out_path, "w") as f: + json.dump(payload, f, indent=2, default=str) + + result = LedgerExportResult( + path=str(out_path), + start_ledger=self.start_ledger, + end_ledger=self.end_ledger, + raw_count=len(raw), + normalized_counts=normalized_counts, + status="completed", + ) + logger.info( + "Ledger export complete: ledgers %d–%d, %d raw events, " + "%d views / %d contributors / %d milestones → %s", + self.start_ledger, + self.end_ledger, + len(raw), + normalized_counts.get("project_views", 0), + normalized_counts.get("project_contributors", 0), + normalized_counts.get("project_milestones", 0), + out_path, + ) + return result diff --git a/apps/data-processing/tests/test_ledger_export.py b/apps/data-processing/tests/test_ledger_export.py new file mode 100644 index 00000000..27118f76 --- /dev/null +++ b/apps/data-processing/tests/test_ledger_export.py @@ -0,0 +1,349 @@ +""" +Tests for LedgerRangeExporter (issue #883). +""" + +import json +import os +import sys +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest + +# --------------------------------------------------------------------------- +# Stub out heavy dependencies before importing our module +# --------------------------------------------------------------------------- +for _mod in [ + "sqlalchemy", + "sqlalchemy.orm", + "src.db", + "src.db.models", +]: + if _mod not in sys.modules: + sys.modules[_mod] = MagicMock() + +import sqlalchemy as _sa + +_sa.create_engine = MagicMock() +_sa.select = MagicMock(return_value=MagicMock()) +_sa.and_ = MagicMock(return_value=MagicMock()) + +import sqlalchemy.orm as _orm + +_orm.sessionmaker = MagicMock() + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +_models_mock = sys.modules["src.db.models"] +_models_mock.ContractEvent = MagicMock() +_models_mock.ProjectView = MagicMock() +_models_mock.ProjectContributor = MagicMock() +_models_mock.ProjectMilestone = MagicMock() + +from src.ledger_export import ( # noqa: E402 + EXPORT_VERSION, + LedgerExportResult, + LedgerRangeExporter, + _validate_ledger_range, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_exporter(tmp_path, start=1000, end=2000): + with patch("src.ledger_export.create_engine"), patch("src.ledger_export.sessionmaker"): + return LedgerRangeExporter( + start_ledger=start, + end_ledger=end, + output_dir=str(tmp_path), + database_url="postgresql://mock/mock", + ) + + +def _mock_session(exporter, raw_rows=None, view_rows=None, contrib_rows=None, milestone_rows=None): + """ + Attach a mock session that returns specified rows for each query and + patch _ledger_range_filter to return a dummy MagicMock (avoids + MagicMock comparison issues with SQLAlchemy column mocks). + """ + mock_session = MagicMock() + mock_session.__enter__ = MagicMock(return_value=mock_session) + mock_session.__exit__ = MagicMock(return_value=False) + + all_results = [ + raw_rows or [], + view_rows or [], + contrib_rows or [], + milestone_rows or [], + ] + mock_session.execute.return_value.scalars.return_value.all.side_effect = all_results + exporter.Session = MagicMock(return_value=mock_session) + # Bypass SQLAlchemy column comparisons entirely + exporter._ledger_range_filter = MagicMock(return_value=MagicMock()) + return mock_session + + +def _fake_contract_event(ledger=1500): + r = MagicMock() + r.id = 1 + r.contract_id = "CABC" + r.event_id = "evt-1" + r.ledger = ledger + r.event_type = "contribution" + r.project_id = 42 + r.contributor = "GBOB" + r.amount = 100.0 + r.milestone_id = None + r.status = "active" + r.topics = [] + r.raw_data = {"key": "value"} + r.timestamp = datetime(2024, 1, 1, tzinfo=timezone.utc) + return r + + +def _fake_project_view(ledger=1500): + v = MagicMock() + v.id = 1 + v.project_id = 42 + v.contract_id = "CABC" + v.owner = "GALICE" + v.total_contributions = 100.0 + v.unique_contributors = 1 + v.status = "active" + v.last_event_ledger = ledger + v.extra_data = {} + return v + + +def _fake_contributor(ledger=1500): + c = MagicMock() + c.id = 1 + c.project_id = 42 + c.contributor = "GBOB" + c.total_contributed = 100.0 + c.first_contribution_ledger = ledger + c.last_contribution_ledger = ledger + c.extra_data = {} + return c + + +def _fake_milestone(ledger=1500): + m = MagicMock() + m.id = 1 + m.project_id = 42 + m.milestone_id = 1 + m.status = "pending" + m.approved_at = None + m.last_event_ledger = ledger + m.extra_data = {} + return m + + +# --------------------------------------------------------------------------- +# Tests: _validate_ledger_range +# --------------------------------------------------------------------------- + + +class TestValidateLedgerRange: + def test_valid_range(self): + _validate_ledger_range(1000, 2000) # no exception + + def test_start_equals_end(self): + _validate_ledger_range(500, 500) # single-ledger range is valid + + def test_start_greater_than_end_raises(self): + with pytest.raises(ValueError, match="must be <="): + _validate_ledger_range(2000, 1000) + + def test_negative_ledger_raises(self): + with pytest.raises(ValueError, match="non-negative"): + _validate_ledger_range(-1, 100) + + def test_non_integer_raises(self): + with pytest.raises(TypeError): + _validate_ledger_range("1000", 2000) # type: ignore[arg-type] + + +# --------------------------------------------------------------------------- +# Tests: LedgerExportResult +# --------------------------------------------------------------------------- + + +class TestLedgerExportResult: + def test_to_dict(self): + r = LedgerExportResult( + path="/tmp/ledger_export_1000_2000.json", + start_ledger=1000, + end_ledger=2000, + raw_count=3, + normalized_counts={"project_views": 1, "project_contributors": 1, "project_milestones": 0}, + status="completed", + ) + d = r.to_dict() + assert d["raw_count"] == 3 + assert d["status"] == "completed" + assert d["normalized_counts"]["project_views"] == 1 + + +# --------------------------------------------------------------------------- +# Tests: LedgerRangeExporter initialisation +# --------------------------------------------------------------------------- + + +class TestLedgerRangeExporterInit: + def test_output_dir_created(self, tmp_path): + out = tmp_path / "nested" / "debug" + with patch("src.ledger_export.create_engine"), patch("src.ledger_export.sessionmaker"): + LedgerRangeExporter(1000, 2000, str(out), database_url="postgresql://mock/mock") + assert out.exists() + + def test_ledger_range_stored(self, tmp_path): + exporter = _make_exporter(tmp_path, start=500, end=999) + assert exporter.start_ledger == 500 + assert exporter.end_ledger == 999 + + def test_invalid_range_raises_on_init(self, tmp_path): + with pytest.raises(ValueError): + with patch("src.ledger_export.create_engine"), patch("src.ledger_export.sessionmaker"): + LedgerRangeExporter(2000, 1000, str(tmp_path), database_url="postgresql://mock/mock") + + +# --------------------------------------------------------------------------- +# Tests: export() — output file structure +# --------------------------------------------------------------------------- + + +class TestExport: + def test_writes_json_file(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session(exporter, raw_rows=[_fake_contract_event()]) + + exporter.export() + + assert (tmp_path / "ledger_export_1000_2000.json").exists() + + def test_metadata_fields(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session(exporter) + + exporter.export() + + data = json.loads((tmp_path / "ledger_export_1000_2000.json").read_text()) + meta = data["metadata"] + assert meta["startLedger"] == 1000 + assert meta["endLedger"] == 2000 + assert meta["exportVersion"] == EXPORT_VERSION + assert "exportTimestamp" in meta + + def test_raw_section_contains_contract_events(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session(exporter, raw_rows=[_fake_contract_event(1500)]) + + exporter.export() + + data = json.loads((tmp_path / "ledger_export_1000_2000.json").read_text()) + assert len(data["raw"]) == 1 + assert data["raw"][0]["ledger"] == 1500 + assert data["raw"][0]["event_type"] == "contribution" + + def test_normalized_section_structure(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session( + exporter, + view_rows=[_fake_project_view()], + contrib_rows=[_fake_contributor()], + milestone_rows=[_fake_milestone()], + ) + + exporter.export() + + data = json.loads((tmp_path / "ledger_export_1000_2000.json").read_text()) + norm = data["normalized"] + assert "project_views" in norm + assert "project_contributors" in norm + assert "project_milestones" in norm + assert len(norm["project_views"]) == 1 + assert norm["project_views"][0]["project_id"] == 42 + + def test_returns_export_result(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session(exporter) + + result = exporter.export() + + assert isinstance(result, LedgerExportResult) + assert result.status == "completed" + assert result.start_ledger == 1000 + assert result.end_ledger == 2000 + + def test_result_counts_match_data(self, tmp_path): + exporter = _make_exporter(tmp_path) + _mock_session( + exporter, + raw_rows=[_fake_contract_event(), _fake_contract_event()], + view_rows=[_fake_project_view()], + ) + + result = exporter.export() + + assert result.raw_count == 2 + assert result.normalized_counts["project_views"] == 1 + assert result.normalized_counts["project_contributors"] == 0 + + def test_empty_range_exports_zero_records(self, tmp_path): + exporter = _make_exporter(tmp_path, start=9999, end=9999) + _mock_session(exporter) + + result = exporter.export() + + assert result.raw_count == 0 + assert all(v == 0 for v in result.normalized_counts.values()) + + def test_start_equals_end_single_ledger(self, tmp_path): + exporter = _make_exporter(tmp_path, start=500, end=500) + _mock_session(exporter, raw_rows=[_fake_contract_event(ledger=500)]) + + result = exporter.export() + + assert result.raw_count == 1 + data = json.loads((tmp_path / "ledger_export_500_500.json").read_text()) + assert data["metadata"]["startLedger"] == 500 + assert data["metadata"]["endLedger"] == 500 + + def test_repeated_execution_overwrites_file(self, tmp_path): + """Running export twice must produce a valid file (idempotent).""" + exporter = _make_exporter(tmp_path) + + for _ in range(2): + _mock_session(exporter, raw_rows=[_fake_contract_event()]) + exporter.export() + + data = json.loads((tmp_path / "ledger_export_1000_2000.json").read_text()) + assert data["metadata"]["startLedger"] == 1000 + + def test_missing_ledger_data_does_not_crash(self, tmp_path): + """Empty DB (no rows) must not raise an exception.""" + exporter = _make_exporter(tmp_path) + _mock_session(exporter) # all side_effect lists are empty + + result = exporter.export() # must not raise + + assert result.status == "completed" + + def test_raw_and_normalized_ledger_coverage(self, tmp_path): + """Events and normalized state both reference the same ledger.""" + exporter = _make_exporter(tmp_path, start=1500, end=1500) + _mock_session( + exporter, + raw_rows=[_fake_contract_event(ledger=1500)], + view_rows=[_fake_project_view(ledger=1500)], + ) + + exporter.export() + + data = json.loads((tmp_path / "ledger_export_1500_1500.json").read_text()) + assert data["raw"][0]["ledger"] == 1500 + assert data["normalized"]["project_views"][0]["last_event_ledger"] == 1500