Skip to content

Commit 7b0210d

Browse files
igerberclaude
andcommitted
Address PR #415 R1 review (1 P1 argparse safety, 1 P2 workflow contract test)
R1 P1 — Workflow passed PR title/body in separate-value argv form (--pr-title "$PR_TITLE"). A PR body starting with an option-looking token (e.g. "--foo", a YAML "---" header, or any "--flag" pattern) would be misparsed by argparse and break the AI review job. Switched to --key=value form ("--pr-title=$PR_TITLE" / "--pr-body=$PR_BODY") which argparse cannot reinterpret as a separate flag. R1 P2 — The PR claimed workflow-level migration to single-shot Responses API but had no regression test pinning the actual workflow surface. Added two new test classes: - TestWorkflowContract: asserts ai_pr_review.yml does NOT contain openai/codex-action, DOES contain "python3 .claude/scripts/openai_review.py", passes the required flag set (--ci-mode, --full-registry, --context standard, --model gpt-5.5, --review-criteria, --registry, --diff, --changed-files, --output, --branch-info, --repo-root), uses --key=value form for PR title/body, preserves the canonical <!-- ai-pr-review:codex:auto --> marker and the rerun marker pattern, and preserves the diff path-excludes for benchmarks/data/real and docs/tutorials. - TestMainCLIPropagation: runs main() in --dry-run mode with --ci-mode + adversarial option-looking PR title/body in --key=value form, asserts the PR Context section appears in the printed prompt with the literal values. Also extended TestCompilePromptWithPRContext with test_option_looking_pr_title_body_preserved_literally — verifies compile_prompt() preserves option-looking text as literal data, the companion library-level test for the workflow-level argparse fix. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c00812b commit 7b0210d

2 files changed

Lines changed: 169 additions & 2 deletions

File tree

.github/workflows/ai_pr_review.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,18 @@ jobs:
145145
146146
# Pin --model gpt-5.5 explicitly so future bumps to the script's
147147
# DEFAULT_MODEL don't silently ship to CI without review.
148+
# Use --key=value form for untrusted PR title/body so argparse
149+
# cannot misinterpret an option-looking value (e.g. a PR body
150+
# starting with "--") as a separate flag and break the job.
148151
ARGS=(--ci-mode --full-registry --context standard --model gpt-5.5
149152
--review-criteria .github/codex/prompts/pr_review.md
150153
--registry docs/methodology/REGISTRY.md
151154
--diff /tmp/pr-diff.patch
152155
--changed-files /tmp/pr-files.txt
153156
--output /tmp/review-output.md
154157
--branch-info "$BRANCH"
155-
--pr-title "$PR_TITLE"
156-
--pr-body "$PR_BODY"
158+
"--pr-title=$PR_TITLE"
159+
"--pr-body=$PR_BODY"
157160
--repo-root "$(pwd)")
158161
if [ "$IS_RERUN" = "true" ] && [ -f /tmp/previous-review.md ]; then
159162
ARGS+=(--previous-review /tmp/previous-review.md)

tests/test_openai_review.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import os
1111
import pathlib
1212
import subprocess
13+
import sys
1314

1415
import pytest
1516

@@ -577,6 +578,169 @@ def test_local_mode_ignores_pr_title_body(self, review_mod):
577578
assert "## PR Context" not in result
578579
assert "Should be ignored" not in result
579580

581+
def test_option_looking_pr_title_body_preserved_literally(self, review_mod):
582+
"""compile_prompt must preserve PR title/body text starting with `--`
583+
as literal data — not strip, mangle, or interpret it. Pairs with the
584+
workflow's --key=value argv form (PR #415 R1 P1) which prevents
585+
argparse from misparsing such values upstream."""
586+
adversarial_titles = ["--ci-mode hijack", "--help", "--pr-body=injected"]
587+
adversarial_bodies = ["--foo bar", "---\nyaml: header\n---", "--also-not-a-flag"]
588+
for title in adversarial_titles:
589+
for body in adversarial_bodies:
590+
result = review_mod.compile_prompt(
591+
criteria_text="C.",
592+
registry_content="R.",
593+
diff_text="D.",
594+
changed_files_text="M\tf.py",
595+
branch_info="b",
596+
previous_review=None,
597+
ci_mode=True,
598+
pr_title=title,
599+
pr_body=body,
600+
)
601+
assert title in result, (
602+
f"option-looking title {title!r} not preserved"
603+
)
604+
# Body is wrapped, so check inside the wrapper
605+
inside_wrapper = result.split('<pr-body untrusted="true">', 1)[1]
606+
inside_wrapper = inside_wrapper.split("</pr-body>", 1)[0]
607+
assert body in inside_wrapper, (
608+
f"option-looking body {body!r} not preserved"
609+
)
610+
611+
612+
# ---------------------------------------------------------------------------
613+
# Workflow contract — pin the CI single-shot migration claim
614+
# ---------------------------------------------------------------------------
615+
616+
617+
class TestWorkflowContract:
618+
"""Pins what ``.github/workflows/ai_pr_review.yml`` ships, so a future
619+
edit cannot accidentally reintroduce ``openai/codex-action``, drop
620+
``--ci-mode``, omit ``--full-registry``, stop passing PR context, or
621+
change the canonical comment marker without a visible test failure.
622+
623+
Regression coverage requested by PR #415 R1 P2 (claim-vs-shipped audit
624+
self-applied to the workflow surface)."""
625+
626+
@pytest.fixture(scope="class")
627+
def workflow_text(self):
628+
if _SCRIPT_PATH is None:
629+
pytest.skip("Could not resolve script path")
630+
assert _SCRIPT_PATH is not None # narrow for type checker
631+
repo_root = _SCRIPT_PATH.parent.parent.parent
632+
wf_path = repo_root / ".github" / "workflows" / "ai_pr_review.yml"
633+
if not wf_path.exists():
634+
pytest.skip("ai_pr_review.yml not found")
635+
return wf_path.read_text()
636+
637+
def test_codex_action_not_invoked(self, workflow_text):
638+
assert "openai/codex-action" not in workflow_text, (
639+
"Workflow must not reintroduce the Codex action — the migration "
640+
"moved CI to single-shot Responses API via openai_review.py."
641+
)
642+
643+
def test_invokes_python_review_script(self, workflow_text):
644+
assert "python3 .claude/scripts/openai_review.py" in workflow_text
645+
646+
def test_passes_required_flags(self, workflow_text):
647+
for flag in [
648+
"--ci-mode",
649+
"--full-registry",
650+
"--context standard",
651+
"--model gpt-5.5",
652+
"--review-criteria",
653+
"--registry",
654+
"--diff",
655+
"--changed-files",
656+
"--output",
657+
"--branch-info",
658+
"--repo-root",
659+
]:
660+
assert flag in workflow_text, f"Missing required flag {flag!r}"
661+
662+
def test_passes_pr_title_body_in_equals_form(self, workflow_text):
663+
"""Untrusted PR title/body MUST use --key=value form so argparse can't
664+
misinterpret an option-looking value. Separate-value form is forbidden."""
665+
assert '"--pr-title=$PR_TITLE"' in workflow_text
666+
assert '"--pr-body=$PR_BODY"' in workflow_text
667+
# And the unsafe forms must not appear
668+
assert '--pr-title "$PR_TITLE"' not in workflow_text
669+
assert '--pr-body "$PR_BODY"' not in workflow_text
670+
671+
def test_canonical_comment_marker_preserved(self, workflow_text):
672+
"""Backward-compat: historical PR canonical comments use the
673+
:codex: marker. Renaming would orphan them."""
674+
assert '<!-- ai-pr-review:codex:auto -->' in workflow_text
675+
assert 'ai-pr-review:codex:rerun:' in workflow_text
676+
677+
def test_diff_path_excludes_preserved(self, workflow_text):
678+
"""Large generated/data files must stay out of the unified diff to
679+
avoid blowing the model's input limit."""
680+
for exclude in [
681+
"':!benchmarks/data/real/*.json'",
682+
"':!benchmarks/data/real/*.csv'",
683+
"':!docs/tutorials/*.ipynb'",
684+
]:
685+
assert exclude in workflow_text, f"Missing diff exclude {exclude!r}"
686+
687+
688+
# ---------------------------------------------------------------------------
689+
# main() CLI propagation — pin the script's --ci-mode + PR-context flow
690+
# ---------------------------------------------------------------------------
691+
692+
693+
class TestMainCLIPropagation:
694+
"""Run main() in --dry-run mode with --ci-mode + --pr-title/--pr-body
695+
and assert the PR Context section appears in the printed prompt with the
696+
literal values. Regression coverage for PR #415 R1 P2."""
697+
698+
def test_main_dry_run_propagates_pr_context_via_equals_form(
699+
self, review_mod, monkeypatch, capsys, tmp_path
700+
):
701+
"""Equals-form CLI args (matches workflow) must reach compile_prompt."""
702+
# Minimal input files
703+
(tmp_path / "diff.patch").write_text("diff --git a/foo b/foo\n")
704+
(tmp_path / "files.txt").write_text("M\tdiff_diff/foo.py\n")
705+
# Use the real prompt so substitutions don't fire warnings
706+
if _SCRIPT_PATH is None:
707+
pytest.skip("Could not resolve script path")
708+
assert _SCRIPT_PATH is not None # narrow for type checker
709+
repo_root = _SCRIPT_PATH.parent.parent.parent
710+
criteria_path = repo_root / ".github" / "codex" / "prompts" / "pr_review.md"
711+
registry_path = repo_root / "docs" / "methodology" / "REGISTRY.md"
712+
if not criteria_path.exists() or not registry_path.exists():
713+
pytest.skip("Required prompt/registry files not present")
714+
715+
# Adversarial PR title/body that would break argparse without
716+
# the --key=value form.
717+
argv = [
718+
"openai_review.py",
719+
"--dry-run",
720+
"--ci-mode",
721+
"--full-registry",
722+
"--context", "minimal",
723+
"--review-criteria", str(criteria_path),
724+
"--registry", str(registry_path),
725+
"--diff", str(tmp_path / "diff.patch"),
726+
"--changed-files", str(tmp_path / "files.txt"),
727+
"--output", str(tmp_path / "out.md"),
728+
"--branch-info", "test-branch",
729+
"--pr-title=--option-looking-title",
730+
"--pr-body=--option-looking-body with --more --flags",
731+
]
732+
monkeypatch.setattr(sys, "argv", argv)
733+
734+
with pytest.raises(SystemExit) as exc_info:
735+
review_mod.main()
736+
assert exc_info.value.code == 0
737+
738+
captured = capsys.readouterr()
739+
# Dry-run prints the compiled prompt to stdout
740+
assert "## PR Context" in captured.out
741+
assert "--option-looking-title" in captured.out
742+
assert "--option-looking-body with --more --flags" in captured.out
743+
580744

581745
# ---------------------------------------------------------------------------
582746
# PREFIX_TO_SECTIONS mapping coverage

0 commit comments

Comments
 (0)