Fix findings element validation, one-to-one fallback matching

igerber · claude · igerber · commit 2df9a88795c8 · 2026-03-22T14:27:54.000-04:00
P1: Filter non-dict elements from findings list in parse_review_state().
Entries like strings or integers are silently dropped instead of crashing
downstream code that calls f.get(...).

P2: Track consumed current candidates in merge_findings() pass 2b so
two prior no-location findings can't both match the same current finding.
Ensures one-to-one matching in the reverse fallback pass.

P2: Add 2 regression tests: non-dict findings filtering, duplicate
no-location findings one-to-one matching.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.claude/scripts/openai_review.py b/.claude/scripts/openai_review.py
@@ -391,6 +391,9 @@ def parse_review_state(path: str) -> "tuple[list[dict], int]":
             file=sys.stderr,
         )
         return ([], 0)
+    # Filter to dict elements only — non-dict entries (e.g., strings) would
+    # crash downstream code that calls f.get(...)
+    findings = [f for f in findings if isinstance(f, dict)]
 
     review_round = data.get("review_round", 0)
     if not isinstance(review_round, int):
@@ -669,12 +672,14 @@ def merge_findings(
         merged_pass2.append(f)
 
     # 2b: Unconsumed previous findings without file paths → try to match
-    # current findings that DO have file paths (reverse direction)
+    # current findings that DO have file paths (reverse direction).
+    # Track consumed current candidates to ensure one-to-one matching.
     current_by_fallback: dict[tuple, list[dict]] = {}
     for f in merged_pass2:
         _, fallback = _finding_keys(f)
         current_by_fallback.setdefault(fallback, []).append(f)
 
+    consumed_current_ids: set[str] = set()
     for f in previous:
         fid = f.get("id", "")
         if fid in consumed_ids:
@@ -684,9 +689,14 @@ def merge_findings(
         if has_file:
             continue  # Has file path — should have matched in pass 1 if possible
         # Previous finding without file path — try fallback against current
-        candidates = current_by_fallback.get(fallback, [])
+        # Exclude already-consumed current candidates for one-to-one matching
+        candidates = [
+            c for c in current_by_fallback.get(fallback, [])
+            if c.get("id", "") not in consumed_current_ids
+        ]
         if len(candidates) == 1:
             consumed_ids.add(fid)
+            consumed_current_ids.add(candidates[0].get("id", ""))
 
     # Mark unconsumed previous findings as addressed
     for f in previous:
diff --git a/tests/test_openai_review.py b/tests/test_openai_review.py
@@ -767,6 +767,20 @@ def test_non_int_round_defaults_to_zero(self, review_mod, tmp_path):
         assert findings == []
         assert round_num == 0
 
+    def test_non_dict_findings_filtered(self, review_mod, tmp_path):
+        """Non-dict elements in findings list are filtered out, not crash."""
+        state_file = tmp_path / "review-state.json"
+        state = {
+            "schema_version": 1,
+            "findings": ["oops", {"id": "R1-P1-1", "severity": "P1"}, 42],
+            "review_round": 1,
+        }
+        state_file.write_text(json.dumps(state))
+        findings, round_num = review_mod.parse_review_state(str(state_file))
+        assert len(findings) == 1
+        assert findings[0]["id"] == "R1-P1-1"
+        assert round_num == 1
+
 
 class TestWriteReviewState:
     def test_writes_valid_json(self, review_mod, tmp_path):
@@ -1070,6 +1084,28 @@ def test_multiple_findings_same_key(self, review_mod):
         assert len(open_findings) == 1
         assert len(addressed) == 1
 
+    def test_duplicate_no_location_findings_one_to_one(self, review_mod):
+        """Two prior no-location findings should not both match one current finding."""
+        previous = [
+            {"id": "R1-P1-1", "severity": "P1", "location": "",
+             "section": "Code Quality", "summary": "Missing NaN guard",
+             "status": "open"},
+            {"id": "R1-P1-2", "severity": "P1", "location": "",
+             "section": "Methodology", "summary": "Missing NaN guard",
+             "status": "open"},
+        ]
+        current = [
+            {"id": "R2-P1-1", "severity": "P1", "location": "foo.py:L10",
+             "section": "Code Quality", "summary": "Missing NaN guard",
+             "status": "open"},
+        ]
+        merged = review_mod.merge_findings(previous, current)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # One current + one prior matched = 1 open; one prior unmatched = 1 addressed
+        assert len(open_findings) == 1
+        assert len(addressed) == 1
+
     def test_previous_missing_location_current_has_location(self, review_mod):
         """Previous finding with no location, current has one → should match."""
         previous = [