Fix round 2 review findings: fallback matching, parser regex, docstring

igerber · claude · igerber · commit fd5b0b77a0b6 · 2026-03-22T10:40:07.000-04:00
- P1: Simplify _finding_key() to (severity, summary_fingerprint) — file path
  excluded from primary key to prevent false "addressed" when location shifts
  or disappears between review rounds
- P1: Fix parser regex to reliably match **Severity:** P1 format (4 capture
  groups covering **P1**, **Severity:** P1, **Severity: P1**, Severity: P1)
- P2: Fix _finding_key() docstring to match actual implementation
- P2: Fix test_matching_with_missing_location to assert correct behavior
  (no false addressed) instead of accepting the bug
- Add 3 positive parser format tests (bold, bold-label, plain-label)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.claude/scripts/openai_review.py b/.claude/scripts/openai_review.py
@@ -417,12 +417,15 @@ def parse_review_findings(
     counters: dict[str, int] = {}
 
     # Only match severity in explicit finding formats:
-    # - **P1** or **P0:** (bold, as used in review output)
-    # - Severity: P1 or Severity:** P1 (labeled field)
+    # - **P1** or **P0:** (bold severity, as used in review output)
+    # - **Severity:** P1 (bold label, severity after closing **)
+    # - Severity: P1 (plain labeled field)
     # - - **Severity:** P1 (bullet with labeled field)
     finding_sev_pattern = re.compile(
-        r"(?:\*\*(?:Severity:\s*)?)(P[0-3])(?:\*\*)"
-        r"|(?:Severity:\s*\*?\*?)(P[0-3])"
+        r"\*\*(P[0-3])\*\*"                        # **P1**
+        r"|\*\*Severity:\*\*\s*(P[0-3])"           # **Severity:** P1
+        r"|\*\*Severity:\s*(P[0-3])\*\*"           # **Severity: P1**
+        r"|(?<!\*)Severity:\s*(P[0-3])(?!\*)"       # Severity: P1 (not inside bold)
     )
     # Match file:line references like "diff_diff/foo.py:L123" or "foo.py:L45-L67"
     location_pattern = re.compile(
@@ -475,7 +478,13 @@ def parse_review_findings(
         if not sev_match:
             continue
 
-        severity = sev_match.group(1) or sev_match.group(2)
+        # Extract severity from whichever group matched
+        severity = (
+            sev_match.group(1)
+            or sev_match.group(2)
+            or sev_match.group(3)
+            or sev_match.group(4)
+        )
 
         # Extract a summary — text after the severity marker
         text_after_sev = line[sev_match.end() :].strip().lstrip(":—- ").strip()
@@ -510,19 +519,22 @@ def parse_review_findings(
     return findings
 
 
-def _finding_key(f: dict) -> "tuple[str, str, str]":
+def _finding_key(f: dict) -> "tuple[str, str]":
     """Compute a stable matching key for a finding.
 
-    Uses (severity, section, summary_fingerprint) where the fingerprint is
-    the first 50 chars of the summary, lowercased and stripped. This is more
-    stable than location-based matching since line numbers shift across revisions.
-    The file path from location is used as a secondary component when available.
+    Uses (severity, summary_fingerprint) where the fingerprint is the first
+    50 chars of the summary, lowercased and stripped. File path and section
+    are intentionally excluded from the primary key because:
+    - Line numbers shift across revisions
+    - The model may extract different locations or omit them entirely
+    - Section headings may vary between review rounds
+
+    This yields more false positives (matching unrelated findings with
+    similar descriptions) but avoids the worse problem of false negatives
+    (marking unresolved findings as addressed).
     """
     summary = f.get("summary", "").lower().strip()[:50]
-    # Extract just the file path from location (strip line numbers)
-    location = f.get("location", "")
-    file_path = location.split(":")[0] if location else ""
-    return (f.get("severity", ""), file_path, summary)
+    return (f.get("severity", ""), summary)
 
 
 def merge_findings(
diff --git a/tests/test_openai_review.py b/tests/test_openai_review.py
@@ -774,6 +774,27 @@ def test_finding_ids_follow_format(self, review_mod):
             assert f["id"].startswith("R2-")
             assert f["status"] == "open"
 
+    def test_parses_bold_severity_format(self, review_mod):
+        """**P1** format should be parsed."""
+        review_text = "**P1** Missing NaN guard in `foo.py:L10`\n"
+        findings = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_bold_label_format(self, review_mod):
+        """**Severity:** P1 format should be parsed."""
+        review_text = "- **Severity:** P1 — Missing NaN guard in `foo.py:L10`\n"
+        findings = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P1"
+
+    def test_parses_plain_label_format(self, review_mod):
+        """Severity: P2 format should be parsed."""
+        review_text = "Severity: P2 — Unused import in `bar.py:L5`\n"
+        findings = review_mod.parse_review_findings(review_text, 1)
+        assert len(findings) == 1
+        assert findings[0]["severity"] == "P2"
+
     def test_ignores_multi_severity_prose(self, review_mod):
         """Lines like 'P2/P3 items may exist' should not be parsed as findings."""
         review_text = (
@@ -876,7 +897,7 @@ def test_matching_with_shifted_line_numbers(self, review_mod):
         assert len(addressed) == 0
 
     def test_matching_with_missing_location(self, review_mod):
-        """Finding with no location should match on summary fingerprint."""
+        """Finding with no location should still match on summary fingerprint."""
         previous = [
             {"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
              "section": "Code Quality", "summary": "Missing NaN guard in staggered",
@@ -888,11 +909,11 @@ def test_matching_with_missing_location(self, review_mod):
              "status": "open"}
         ]
         merged = review_mod.merge_findings(previous, current)
-        # Location changed but summary matches — should NOT mark as addressed
-        # (this is a known limitation: different file path = different key)
-        # The finding stays open in current, and previous is marked addressed
-        # This is acceptable because the model will re-flag it if still present
-        assert any(f["status"] == "open" for f in merged)
+        open_findings = [f for f in merged if f["status"] == "open"]
+        addressed = [f for f in merged if f["status"] == "addressed"]
+        # Same severity + same summary = match. No false "addressed" record.
+        assert len(open_findings) == 1
+        assert len(addressed) == 0
 
     def test_multiple_findings_same_key(self, review_mod):
         """Multiple previous findings with same key should not overwrite each other."""