Skip to content

Commit c0904eb

Browse files
igerberclaude
andcommitted
Fix review findings: harden finding matching, tighten parser, fix token budget
Address all P1/P2 findings from local AI review: - P1: merge_findings() now matches by (severity, file_path, summary_fingerprint) instead of (location, severity) to prevent false "addressed" on line shifts - P1: parse_review_findings() requires bold severity (**P1**) format, skips table rows, assessment lines, instructional prose, and multi-severity refs - P2: Use list-per-key in merge to handle duplicate findings without overwrite - P2: Refactor apply_token_budget() to take typed params; source files are always included (sticky); only import-context governed by budget - P2: Add --base-ref argument instead of brittle branch_info parsing - P2: Add 8 new tests covering parser rejection and merge edge cases Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7a25702 commit c0904eb

3 files changed

Lines changed: 267 additions & 107 deletions

File tree

.claude/commands/ai-review-local.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ pre-PR use. Designed for iterative review/revision cycles before submitting a PR
1818
- `deep`: Standard + import-graph expansion (files imported by changed files)
1919
- `--include-files <file1,file2,...>`: Extra files to include as read-only context
2020
(filenames resolve under `diff_diff/`, or use paths relative to repo root)
21-
- `--token-budget <n>`: Max estimated input tokens before dropping context (default: 200000)
21+
- `--token-budget <n>`: Max estimated input tokens before dropping import-context
22+
files (default: 200000). Changed source files are always included regardless of budget.
2223
- `--force-fresh`: Skip delta-diff mode, run a full fresh review even if previous state exists
2324
- `--full-registry`: Include the entire REGISTRY.md instead of selective sections
2425
- `--model <name>`: Override the OpenAI model (default: `gpt-5.4`)
@@ -215,6 +216,7 @@ python3 .claude/scripts/openai_review.py \
215216
--context "$context_level" \
216217
--review-state .claude/reviews/review-state.json \
217218
--commit-sha "$(git rev-parse HEAD)" \
219+
--base-ref "$comparison_ref" \
218220
[--previous-review .claude/reviews/local-review-previous.md] \
219221
[--delta-diff /tmp/ai-review-delta-diff.patch] \
220222
[--delta-changed-files /tmp/ai-review-delta-files.txt] \

.claude/scripts/openai_review.py

Lines changed: 118 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -409,11 +409,21 @@ def parse_review_findings(
409409
"""Parse AI review output for structured findings.
410410
411411
Extracts P0/P1/P2/P3 items with severity, section, summary, and location.
412+
Only parses lines where the severity appears in bold (**P1**) or as a
413+
labeled field (Severity: P1) to avoid ingesting instructional prose,
414+
assessment criteria, or previous-findings tables.
412415
"""
413416
findings: list[dict] = []
414417
counters: dict[str, int] = {}
415418

416-
severity_pattern = re.compile(r"\b(P[0-3])\b")
419+
# Only match severity in explicit finding formats:
420+
# - **P1** or **P0:** (bold, as used in review output)
421+
# - Severity: P1 or Severity:** P1 (labeled field)
422+
# - - **Severity:** P1 (bullet with labeled field)
423+
finding_sev_pattern = re.compile(
424+
r"(?:\*\*(?:Severity:\s*)?)(P[0-3])(?:\*\*)"
425+
r"|(?:Severity:\s*\*?\*?)(P[0-3])"
426+
)
417427
# Match file:line references like "diff_diff/foo.py:L123" or "foo.py:L45-L67"
418428
location_pattern = re.compile(
419429
r"(?:`?)([\w/]+\.py(?::L?\d+(?:-L?\d+)?)?)(?:`?)"
@@ -428,21 +438,45 @@ def parse_review_findings(
428438
if heading and "summary" not in heading.lower():
429439
current_section = heading
430440

431-
sev_match = severity_pattern.search(line)
432-
if not sev_match:
441+
# Skip table rows (finding tables from previous reviews)
442+
stripped = line.strip()
443+
if stripped.startswith("|") and stripped.endswith("|"):
433444
continue
434-
435-
severity = sev_match.group(1)
436-
# Skip lines that are just referencing severity in passing (e.g., "P2/P3 items")
437-
if re.search(r"P\d/P\d", line):
445+
# Skip lines referencing multiple severities in passing (e.g., "P2/P3 items")
446+
if re.search(r"P\d[/+]P\d", line):
438447
continue
439448
# Skip assessment criteria lines
440449
if any(
441450
marker in line
442-
for marker in ["⛔", "⚠️", "✅", "Blocker", "Needs changes", "Looks good"]
451+
for marker in [
452+
"\u26d4", "\u26a0\ufe0f", "\u2705",
453+
"Blocker", "Needs changes", "Looks good",
454+
"Path to Approval",
455+
]
456+
):
457+
continue
458+
# Skip instructional/guidance lines
459+
if any(
460+
phrase in line
461+
for phrase in [
462+
"findings are resolved",
463+
"findings have been addressed",
464+
"should be marked",
465+
"assessment should be",
466+
"does NOT need",
467+
"do NOT need",
468+
"P1+ findings",
469+
"P0/P1 findings",
470+
]
443471
):
444472
continue
445473

474+
sev_match = finding_sev_pattern.search(line)
475+
if not sev_match:
476+
continue
477+
478+
severity = sev_match.group(1) or sev_match.group(2)
479+
446480
# Extract a summary — text after the severity marker
447481
text_after_sev = line[sev_match.end() :].strip().lstrip(":—- ").strip()
448482
# Remove markdown bold markers
@@ -476,36 +510,51 @@ def parse_review_findings(
476510
return findings
477511

478512

513+
def _finding_key(f: dict) -> "tuple[str, str, str]":
514+
"""Compute a stable matching key for a finding.
515+
516+
Uses (severity, section, summary_fingerprint) where the fingerprint is
517+
the first 50 chars of the summary, lowercased and stripped. This is more
518+
stable than location-based matching since line numbers shift across revisions.
519+
The file path from location is used as a secondary component when available.
520+
"""
521+
summary = f.get("summary", "").lower().strip()[:50]
522+
# Extract just the file path from location (strip line numbers)
523+
location = f.get("location", "")
524+
file_path = location.split(":")[0] if location else ""
525+
return (f.get("severity", ""), file_path, summary)
526+
527+
479528
def merge_findings(
480529
previous: "list[dict]", current: "list[dict]"
481530
) -> "list[dict]":
482531
"""Merge findings across review rounds.
483532
484-
Match by location + severity. Previous findings absent from current
485-
are marked 'addressed'. New current findings are added as 'open'.
533+
Match by (severity, file_path, summary_fingerprint). Previous findings
534+
absent from current are marked 'addressed'. Supports multiple findings
535+
per key without overwriting.
486536
"""
487-
# Build lookup from previous findings by (location, severity)
488-
prev_by_key: dict[tuple[str, str], dict] = {}
537+
# Build lookup from previous findings — list per key to handle duplicates
538+
prev_by_key: dict[tuple, list[dict]] = {}
489539
for f in previous:
490-
key = (f.get("location", ""), f.get("severity", ""))
491-
prev_by_key[key] = f
540+
key = _finding_key(f)
541+
prev_by_key.setdefault(key, []).append(f)
492542

493-
# Track which previous findings were matched
494-
matched_keys: set[tuple[str, str]] = set()
495543
merged: list[dict] = []
496544

497545
for f in current:
498-
key = (f.get("location", ""), f.get("severity", ""))
499-
if key in prev_by_key:
500-
matched_keys.add(key)
546+
key = _finding_key(f)
547+
if key in prev_by_key and prev_by_key[key]:
548+
# Consume one match from the previous list
549+
prev_by_key[key].pop(0)
501550
# Keep the current finding (updated summary) with status open
502551
merged.append(f)
503552
else:
504553
merged.append(f)
505554

506-
# Mark unmatched previous findings as addressed
507-
for key, f in prev_by_key.items():
508-
if key not in matched_keys:
555+
# Mark unconsumed previous findings as addressed
556+
for remaining in prev_by_key.values():
557+
for f in remaining:
509558
addressed = dict(f)
510559
addressed["status"] = "addressed"
511560
merged.append(addressed)
@@ -521,40 +570,32 @@ def merge_findings(
521570

522571

523572
def apply_token_budget(
524-
sections: "dict[str, str]", budget: int
525-
) -> "tuple[dict[str, str], list[str]]":
526-
"""Apply token budget to prompt sections, dropping lowest-priority content first.
527-
528-
Sections are keyed by name. The 'import_files' key (if present) is a
529-
newline-separated list of <file>...</file> blocks that can be individually
530-
dropped, smallest first.
531-
532-
Returns (included_sections, dropped_names).
573+
mandatory_tokens: int,
574+
source_files_text: "str | None",
575+
import_context_text: "str | None",
576+
budget: int,
577+
) -> "tuple[str | None, str | None, list[str]]":
578+
"""Apply token budget, dropping lowest-priority context first.
579+
580+
Changed source files are always included (they are the highest-value
581+
context for catching sins of omission). Only import-context files are
582+
subject to the budget — they are included smallest-first until the
583+
budget is exhausted.
584+
585+
Returns (source_files_text, import_context_text, dropped_file_names).
533586
"""
534-
# Calculate tokens for all non-droppable sections
535-
mandatory_keys = [
536-
k for k in sections if k not in ("import_files", "source_files")
537-
]
538-
mandatory_tokens = sum(
539-
estimate_tokens(sections[k]) for k in mandatory_keys
540-
)
541-
542587
remaining = budget - mandatory_tokens
543-
included = {k: sections[k] for k in mandatory_keys}
544588
dropped: list[str] = []
545589

546-
# Include source files if present and budget allows
547-
if "source_files" in sections:
548-
src_tokens = estimate_tokens(sections["source_files"])
549-
if remaining >= src_tokens or remaining >= 0:
550-
# Always include source files (they're high value) but warn if over
551-
included["source_files"] = sections["source_files"]
552-
remaining -= src_tokens
590+
# Source files are always included (sticky — not budget-governed)
591+
if source_files_text:
592+
remaining -= estimate_tokens(source_files_text)
553593

554594
# Include import files individually, smallest first
555-
if "import_files" in sections and sections["import_files"].strip():
595+
final_import_text: "str | None" = None
596+
if import_context_text and import_context_text.strip():
556597
# Split into individual file blocks
557-
blocks = re.split(r"(?=<file )", sections["import_files"])
598+
blocks = re.split(r"(?=<file )", import_context_text)
558599
blocks = [b for b in blocks if b.strip()]
559600

560601
# Sort by size (smallest first)
@@ -573,7 +614,7 @@ def apply_token_budget(
573614
dropped.append(name)
574615

575616
if included_blocks:
576-
included["import_files"] = "\n".join(included_blocks)
617+
final_import_text = "\n".join(included_blocks)
577618

578619
if mandatory_tokens > budget:
579620
print(
@@ -582,7 +623,7 @@ def apply_token_budget(
582623
file=sys.stderr,
583624
)
584625

585-
return (included, dropped)
626+
return (source_files_text, final_import_text, dropped)
586627

587628

588629
# ---------------------------------------------------------------------------
@@ -1040,6 +1081,11 @@ def main() -> None:
10401081
default=None,
10411082
help="HEAD commit SHA (required when --review-state is set)",
10421083
)
1084+
parser.add_argument(
1085+
"--base-ref",
1086+
default="main",
1087+
help="Base branch name for review-state.json (default: main)",
1088+
)
10431089

10441090
args = parser.parse_args()
10451091

@@ -1180,40 +1226,32 @@ def main() -> None:
11801226
pass
11811227

11821228
# --- Token budget ---
1183-
budget_sections: dict[str, str] = {}
1184-
# Build a map of section name -> content for budget management
1185-
# (We'll pass individual texts to compile_prompt, but use the budget
1186-
# to decide whether to include import_context_text)
1187-
if import_context_text:
1188-
budget_sections["import_files"] = import_context_text
1189-
if source_files_text:
1190-
budget_sections["source_files"] = source_files_text
1191-
# Estimate mandatory content size
1192-
mandatory_est = estimate_tokens(criteria_text) + estimate_tokens(
1193-
registry_content
1194-
) + estimate_tokens(diff_text) + estimate_tokens(changed_files_text)
1229+
# Estimate mandatory content size (always included, not budget-governed)
1230+
mandatory_est = (
1231+
estimate_tokens(criteria_text)
1232+
+ estimate_tokens(registry_content)
1233+
+ estimate_tokens(diff_text)
1234+
+ estimate_tokens(changed_files_text)
1235+
)
11951236
if previous_review:
11961237
mandatory_est += estimate_tokens(previous_review)
11971238
if delta_diff_text:
11981239
mandatory_est += estimate_tokens(delta_diff_text)
1199-
budget_sections["_mandatory"] = "x" * (mandatory_est * 4) # placeholder
12001240

1201-
if budget_sections:
1202-
included, dropped = apply_token_budget(
1203-
budget_sections, args.token_budget
1241+
# Apply budget: source files are always included (sticky);
1242+
# only import-context files are dropped when over budget.
1243+
source_files_text, import_context_text, dropped = apply_token_budget(
1244+
mandatory_tokens=mandatory_est,
1245+
source_files_text=source_files_text,
1246+
import_context_text=import_context_text,
1247+
budget=args.token_budget,
1248+
)
1249+
if dropped:
1250+
print(
1251+
f"Warning: Token budget exceeded. Dropped import context files: "
1252+
f"{', '.join(dropped)}",
1253+
file=sys.stderr,
12041254
)
1205-
if "import_files" not in included:
1206-
import_context_text = None
1207-
elif "import_files" in included:
1208-
import_context_text = included["import_files"]
1209-
if "source_files" not in included:
1210-
source_files_text = None
1211-
if dropped:
1212-
print(
1213-
f"Warning: Token budget exceeded. Dropped import context files: "
1214-
f"{', '.join(dropped)}",
1215-
file=sys.stderr,
1216-
)
12171255

12181256
# --- Compile prompt ---
12191257
prompt = compile_prompt(
@@ -1285,7 +1323,7 @@ def main() -> None:
12851323
write_review_state(
12861324
path=args.review_state,
12871325
commit_sha=args.commit_sha,
1288-
base_ref=args.branch_info.split("/")[0] if "/" in args.branch_info else "main",
1326+
base_ref=args.base_ref,
12891327
branch=args.branch_info,
12901328
review_round=current_round,
12911329
findings=final_findings,

0 commit comments

Comments
 (0)