Skip to content

Commit 842e72f

Browse files
igerberclaude
andcommitted
Codex preflight: scan all paths in rename/copy --name-status lines
R8 review on PR #421: P1: `_scan_prompt_artifacts()` only inspected the substring after the first tab in each --name-status line, treating it as a single path. For rename (`R*`) and copy (`C*`) lines the format is multi-column: R100\told_path\tnew_path C100\told_path\tnew_path So `R100\tconfig/.env\tconfig/renamed.txt` was parsed as one pseudo-path "config/.env\tconfig/renamed.txt", whose os.path.basename is just "renamed.txt" — completely missing the sensitive old path. The diff body still contains the renamed file's prior content, so the abort gate should have fired but didn't. Fix: split each line on \t into status + N paths, scan EACH path independently for sensitive basenames. Empty paths skipped. Tests added (5 new, all assert the abort/detection that R8 said was unpinned): - test_changed_files_rename_old_sensitive_new_safe (R100\tconfig/.env\tconfig/renamed.txt → detect) - test_changed_files_copy_old_sensitive_new_safe (C100\tconfig/.env\tconfig/copy.txt → detect) - test_changed_files_rename_new_path_also_scanned (R100\tconfig/old.txt\tconfig/.env → detect) - test_delta_changed_files_rename_also_scanned (same pattern via delta_changed_files_text) - test_aborts_on_renamed_sensitive_file_in_changed_files (non-dry-run integration: rename triggers main()-gate abort) Tests: 252 pass (5 new). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5109720 commit 842e72f

2 files changed

Lines changed: 96 additions & 14 deletions

File tree

.claude/scripts/openai_review.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,25 +1425,37 @@ def _scan_prompt_artifacts(
14251425
if delta_diff_text and SECRET_CONTENT_PATTERN.search(delta_diff_text):
14261426
findings.append("[delta diff body]")
14271427

1428-
# Filename scan over changed-files lists. Each line is a name-status
1429-
# entry like "M\tfoo.py" or "D\tconfig/.env"; extract the path column.
1428+
# Filename scan over changed-files lists. Each line is a `git diff
1429+
# --name-status` entry:
1430+
# M\tfoo.py
1431+
# D\tconfig/.env
1432+
# R100\told/.env\tnew/file.txt (rename — TWO paths)
1433+
# C100\told/.env\tnew/copy.txt (copy — TWO paths)
1434+
# We must scan EACH path independently so an old-sensitive→new-safe
1435+
# rename still aborts (the diff body still contains the old file's
1436+
# content, and the rename target may incorporate it).
14301437
def _scan_changed_files(text: str, label: str) -> None:
14311438
for line in text.splitlines():
14321439
line = line.strip()
14331440
if not line:
14341441
continue
1435-
parts = line.split("\t", 1) if "\t" in line else line.split(None, 1)
1436-
path = parts[1] if len(parts) > 1 else parts[0]
1437-
basename_lc = os.path.basename(path).lower()
1438-
if any(
1439-
basename_lc.endswith(suffix)
1440-
for suffix in SENSITIVE_FILE_SAFE_SUFFIXES
1441-
):
1442-
continue
1443-
for pat in SENSITIVE_FILE_PATTERNS:
1444-
if fnmatch.fnmatch(basename_lc, pat):
1445-
findings.append(f"[{label}: {path}]")
1446-
break
1442+
fields = line.split("\t")
1443+
# First field is the status code (M/A/D/R*/C*/T/...); the
1444+
# remaining fields are 1+ paths.
1445+
paths = fields[1:] if len(fields) > 1 else fields[:1]
1446+
for path in paths:
1447+
if not path:
1448+
continue
1449+
basename_lc = os.path.basename(path).lower()
1450+
if any(
1451+
basename_lc.endswith(suffix)
1452+
for suffix in SENSITIVE_FILE_SAFE_SUFFIXES
1453+
):
1454+
continue
1455+
for pat in SENSITIVE_FILE_PATTERNS:
1456+
if fnmatch.fnmatch(basename_lc, pat):
1457+
findings.append(f"[{label}: {path}]")
1458+
break
14471459

14481460
_scan_changed_files(changed_files_text or "", "changed-files")
14491461
if delta_changed_files_text:

tests/test_openai_review.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2424,6 +2424,58 @@ def test_changed_files_skips_safe_variants(self, review_mod):
24242424
)
24252425
assert result == []
24262426

2427+
def test_changed_files_rename_old_sensitive_new_safe(self, review_mod):
2428+
"""`git diff --name-status` rename lines have TWO paths:
2429+
`R100\\told\\tnew`. If the OLD path is sensitive (e.g. config/.env
2430+
renamed to config/renamed.txt), the diff body still contains the
2431+
old file's contents — must abort. Earlier parser only inspected
2432+
the combined post-first-tab substring, missing this case."""
2433+
result = review_mod._scan_prompt_artifacts(
2434+
diff_text="",
2435+
previous_review=None,
2436+
delta_diff_text=None,
2437+
changed_files_text="R100\tconfig/.env\tconfig/renamed.txt",
2438+
delta_changed_files_text=None,
2439+
)
2440+
assert any(".env" in r for r in result), (
2441+
f"Expected sensitive OLD path to be detected; got: {result}"
2442+
)
2443+
2444+
def test_changed_files_copy_old_sensitive_new_safe(self, review_mod):
2445+
"""`C100\\told\\tnew` copy lines have the same structure as rename
2446+
lines — both paths must be scanned."""
2447+
result = review_mod._scan_prompt_artifacts(
2448+
diff_text="",
2449+
previous_review=None,
2450+
delta_diff_text=None,
2451+
changed_files_text="C100\tconfig/.env\tconfig/copy.txt",
2452+
delta_changed_files_text=None,
2453+
)
2454+
assert any(".env" in r for r in result)
2455+
2456+
def test_changed_files_rename_new_path_also_scanned(self, review_mod):
2457+
"""If the NEW path of a rename is sensitive (someone renamed
2458+
config.txt → .env), that also triggers."""
2459+
result = review_mod._scan_prompt_artifacts(
2460+
diff_text="",
2461+
previous_review=None,
2462+
delta_diff_text=None,
2463+
changed_files_text="R100\tconfig/old.txt\tconfig/.env",
2464+
delta_changed_files_text=None,
2465+
)
2466+
assert any(".env" in r for r in result)
2467+
2468+
def test_delta_changed_files_rename_also_scanned(self, review_mod):
2469+
"""Same logic applies to delta_changed_files_text."""
2470+
result = review_mod._scan_prompt_artifacts(
2471+
diff_text="",
2472+
previous_review=None,
2473+
delta_diff_text=None,
2474+
changed_files_text="",
2475+
delta_changed_files_text="R100\tconfig/.env\tconfig/safe.txt",
2476+
)
2477+
assert any(".env" in r for r in result)
2478+
24272479
def test_clean_inputs_return_empty(self, review_mod):
24282480
result = review_mod._scan_prompt_artifacts(
24292481
diff_text="--- a/foo\n+++ b/foo\n@@ -1 +1 @@\n-x\n+y\n",
@@ -2865,6 +2917,24 @@ def test_allow_secrets_overrides_prompt_artifact_gate(
28652917
assert "ABORT" not in captured.err
28662918
assert gate_env["codex_calls"]["called"]
28672919

2920+
def test_aborts_on_renamed_sensitive_file_in_changed_files(
2921+
self, gate_env, review_mod, capsys
2922+
):
2923+
"""`R100\\told_sensitive\\tnew_safe` rename: old path was a .env,
2924+
new is a safe rename. The diff body still contains the old
2925+
file's contents → must abort."""
2926+
(gate_env["tmp_path"] / "README.md").write_text("# clean repo")
2927+
gate_env["files"].write_text(
2928+
"R100\tconfig/.env\tconfig/renamed.txt\n"
2929+
)
2930+
with pytest.raises(SystemExit) as exc:
2931+
self._run(gate_env, review_mod)
2932+
assert exc.value.code == 1
2933+
captured = capsys.readouterr()
2934+
assert "ABORT" in captured.err
2935+
assert ".env" in captured.err
2936+
assert not gate_env["codex_calls"]["called"]
2937+
28682938

28692939
class TestExtractResponseText:
28702940
def test_prefers_output_text_field(self, review_mod):

0 commit comments

Comments
 (0)