Skip to content

Commit fd5b0b7

Browse files
igerberclaude
andcommitted
Fix round 2 review findings: fallback matching, parser regex, docstring
- P1: Simplify _finding_key() to (severity, summary_fingerprint) — file path excluded from primary key to prevent false "addressed" when location shifts or disappears between review rounds - P1: Fix parser regex to reliably match **Severity:** P1 format (4 capture groups covering **P1**, **Severity:** P1, **Severity: P1**, Severity: P1) - P2: Fix _finding_key() docstring to match actual implementation - P2: Fix test_matching_with_missing_location to assert correct behavior (no false addressed) instead of accepting the bug - Add 3 positive parser format tests (bold, bold-label, plain-label) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c0904eb commit fd5b0b7

2 files changed

Lines changed: 53 additions & 20 deletions

File tree

.claude/scripts/openai_review.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,15 @@ def parse_review_findings(
417417
counters: dict[str, int] = {}
418418

419419
# Only match severity in explicit finding formats:
420-
# - **P1** or **P0:** (bold, as used in review output)
421-
# - Severity: P1 or Severity:** P1 (labeled field)
420+
# - **P1** or **P0:** (bold severity, as used in review output)
421+
# - **Severity:** P1 (bold label, severity after closing **)
422+
# - Severity: P1 (plain labeled field)
422423
# - - **Severity:** P1 (bullet with labeled field)
423424
finding_sev_pattern = re.compile(
424-
r"(?:\*\*(?:Severity:\s*)?)(P[0-3])(?:\*\*)"
425-
r"|(?:Severity:\s*\*?\*?)(P[0-3])"
425+
r"\*\*(P[0-3])\*\*" # **P1**
426+
r"|\*\*Severity:\*\*\s*(P[0-3])" # **Severity:** P1
427+
r"|\*\*Severity:\s*(P[0-3])\*\*" # **Severity: P1**
428+
r"|(?<!\*)Severity:\s*(P[0-3])(?!\*)" # Severity: P1 (not inside bold)
426429
)
427430
# Match file:line references like "diff_diff/foo.py:L123" or "foo.py:L45-L67"
428431
location_pattern = re.compile(
@@ -475,7 +478,13 @@ def parse_review_findings(
475478
if not sev_match:
476479
continue
477480

478-
severity = sev_match.group(1) or sev_match.group(2)
481+
# Extract severity from whichever group matched
482+
severity = (
483+
sev_match.group(1)
484+
or sev_match.group(2)
485+
or sev_match.group(3)
486+
or sev_match.group(4)
487+
)
479488

480489
# Extract a summary — text after the severity marker
481490
text_after_sev = line[sev_match.end() :].strip().lstrip(":—- ").strip()
@@ -510,19 +519,22 @@ def parse_review_findings(
510519
return findings
511520

512521

513-
def _finding_key(f: dict) -> "tuple[str, str, str]":
522+
def _finding_key(f: dict) -> "tuple[str, str]":
514523
"""Compute a stable matching key for a finding.
515524
516-
Uses (severity, section, summary_fingerprint) where the fingerprint is
517-
the first 50 chars of the summary, lowercased and stripped. This is more
518-
stable than location-based matching since line numbers shift across revisions.
519-
The file path from location is used as a secondary component when available.
525+
Uses (severity, summary_fingerprint) where the fingerprint is the first
526+
50 chars of the summary, lowercased and stripped. File path and section
527+
are intentionally excluded from the primary key because:
528+
- Line numbers shift across revisions
529+
- The model may extract different locations or omit them entirely
530+
- Section headings may vary between review rounds
531+
532+
This yields more false positives (matching unrelated findings with
533+
similar descriptions) but avoids the worse problem of false negatives
534+
(marking unresolved findings as addressed).
520535
"""
521536
summary = f.get("summary", "").lower().strip()[:50]
522-
# Extract just the file path from location (strip line numbers)
523-
location = f.get("location", "")
524-
file_path = location.split(":")[0] if location else ""
525-
return (f.get("severity", ""), file_path, summary)
537+
return (f.get("severity", ""), summary)
526538

527539

528540
def merge_findings(

tests/test_openai_review.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,27 @@ def test_finding_ids_follow_format(self, review_mod):
774774
assert f["id"].startswith("R2-")
775775
assert f["status"] == "open"
776776

777+
def test_parses_bold_severity_format(self, review_mod):
778+
"""**P1** format should be parsed."""
779+
review_text = "**P1** Missing NaN guard in `foo.py:L10`\n"
780+
findings = review_mod.parse_review_findings(review_text, 1)
781+
assert len(findings) == 1
782+
assert findings[0]["severity"] == "P1"
783+
784+
def test_parses_bold_label_format(self, review_mod):
785+
"""**Severity:** P1 format should be parsed."""
786+
review_text = "- **Severity:** P1 — Missing NaN guard in `foo.py:L10`\n"
787+
findings = review_mod.parse_review_findings(review_text, 1)
788+
assert len(findings) == 1
789+
assert findings[0]["severity"] == "P1"
790+
791+
def test_parses_plain_label_format(self, review_mod):
792+
"""Severity: P2 format should be parsed."""
793+
review_text = "Severity: P2 — Unused import in `bar.py:L5`\n"
794+
findings = review_mod.parse_review_findings(review_text, 1)
795+
assert len(findings) == 1
796+
assert findings[0]["severity"] == "P2"
797+
777798
def test_ignores_multi_severity_prose(self, review_mod):
778799
"""Lines like 'P2/P3 items may exist' should not be parsed as findings."""
779800
review_text = (
@@ -876,7 +897,7 @@ def test_matching_with_shifted_line_numbers(self, review_mod):
876897
assert len(addressed) == 0
877898

878899
def test_matching_with_missing_location(self, review_mod):
879-
"""Finding with no location should match on summary fingerprint."""
900+
"""Finding with no location should still match on summary fingerprint."""
880901
previous = [
881902
{"id": "R1-P1-1", "severity": "P1", "location": "foo.py:L10",
882903
"section": "Code Quality", "summary": "Missing NaN guard in staggered",
@@ -888,11 +909,11 @@ def test_matching_with_missing_location(self, review_mod):
888909
"status": "open"}
889910
]
890911
merged = review_mod.merge_findings(previous, current)
891-
# Location changed but summary matches — should NOT mark as addressed
892-
# (this is a known limitation: different file path = different key)
893-
# The finding stays open in current, and previous is marked addressed
894-
# This is acceptable because the model will re-flag it if still present
895-
assert any(f["status"] == "open" for f in merged)
912+
open_findings = [f for f in merged if f["status"] == "open"]
913+
addressed = [f for f in merged if f["status"] == "addressed"]
914+
# Same severity + same summary = match. No false "addressed" record.
915+
assert len(open_findings) == 1
916+
assert len(addressed) == 0
896917

897918
def test_multiple_findings_same_key(self, review_mod):
898919
"""Multiple previous findings with same key should not overwrite each other."""

0 commit comments

Comments
 (0)