@@ -2674,8 +2674,10 @@ class TestWorkflowDoesNotExecutePRHeadCode:
26742674
26752675 FORBIDDEN_EXECUTION_PATTERNS = (
26762676 "pip install" ,
2677+ "pip3 install" ,
26772678 "pytest" ,
26782679 "npm install" ,
2680+ "npm ci" ,
26792681 "yarn install" ,
26802682 "cargo run" ,
26812683 "cargo test" ,
@@ -2706,44 +2708,123 @@ def workflow_text(self):
27062708 pytest .skip ("workflow not found" )
27072709 return wf .read_text ()
27082710
2711+ @staticmethod
2712+ def _extract_all_run_content (workflow_text ):
2713+ """Extract `run:` field content across ALL three GitHub Actions
2714+ scalar styles so the forbidden-pattern scan is fail-closed:
2715+
2716+ 1. Literal block scalar: `run: |` / `run: |-` / `run: |+`
2717+ 2. Folded block scalar: `run: >` / `run: >-` / `run: >+`
2718+ 3. Inline scalar: `run: <single-line-command>`
2719+
2720+ Returns a list of (label, content) tuples for error reporting.
2721+ Without inline-scalar coverage, `run: pytest` would bypass the
2722+ scan entirely (P1 from PR #436 R1)."""
2723+ import re
2724+
2725+ results = []
2726+
2727+ # Block scalars (literal `|` and folded `>`, optional chomping).
2728+ # Body lines are indented relative to the `run:` key; we accept
2729+ # 8+ spaces (next-step boundary is ` - ` at 6 spaces).
2730+ block_re = re .compile (
2731+ r"^\s+run:\s*[|>][-+]?\s*\n((?:^(?:[ ]{8,}|\s*$).*\n?)*)" ,
2732+ re .MULTILINE ,
2733+ )
2734+ for i , body in enumerate (block_re .findall (workflow_text )):
2735+ results .append ((f"run-block #{ i } " , body ))
2736+
2737+ # Inline scalars: `run: <cmd>` on a single line, where <cmd>
2738+ # does NOT start with `|` or `>` (those are block-scalar
2739+ # markers). Negative lookahead handles `run:|` (rare) too.
2740+ inline_re = re .compile (
2741+ r"^\s+run:[ \t]+(?![|>])([^\n]+)$" ,
2742+ re .MULTILINE ,
2743+ )
2744+ for i , line in enumerate (inline_re .findall (workflow_text )):
2745+ results .append ((f"run-inline #{ i } " , line ))
2746+
2747+ return results
2748+
27092749 def test_workflow_run_blocks_have_no_forbidden_execution_patterns (
27102750 self , workflow_text
27112751 ):
27122752 """If this fails, the CodeQL #14 dismissal is invalid. Either
27132753 remove the offending step or restructure per the dismissed plan
27142754 (checkout BASE_SHA only + git show for PR-head)."""
2715- import re
2716-
2717- # Match every `run: |` block's body. The body is the indented
2718- # content following `run: |` until the next step (next ` - `
2719- # at 6-space indent) or end of file. Body lines are at 10+ space
2720- # indent (the `run: |` itself is at 8-space step-property indent;
2721- # block scalars indent further).
2722- run_block_re = re .compile (
2723- r"^\s+run:\s*\|\s*\n((?:^(?:[ ]{8,}|\s*$).*\n?)*)" ,
2724- re .MULTILINE ,
2755+ run_contents = self ._extract_all_run_content (workflow_text )
2756+ assert run_contents , (
2757+ "No `run:` content found — extraction broke. The workflow "
2758+ "must contain at least the resolve-pr's downstream run "
2759+ "blocks; if extraction returns empty, the regex needs fixing."
27252760 )
2726- run_blocks = run_block_re .findall (workflow_text )
2727- assert run_blocks , "No `run:` blocks found — extraction regex broke"
27282761
27292762 violations = []
2730- for i , block in enumerate ( run_blocks ) :
2763+ for label , content in run_contents :
27312764 for pattern in self .FORBIDDEN_EXECUTION_PATTERNS :
2732- if pattern in block :
2765+ if pattern in content :
27332766 snippet = next (
2734- (line for line in block .splitlines () if pattern in line ),
2735- "" ,
2767+ (
2768+ line
2769+ for line in content .splitlines ()
2770+ if pattern in line
2771+ ),
2772+ content .strip ()[:120 ],
27362773 ).strip ()
27372774 violations .append (
2738- f"run-block # { i } : forbidden pattern { pattern !r} in: { snippet } "
2775+ f"{ label } : forbidden pattern { pattern !r} in: { snippet } "
27392776 )
27402777 assert not violations , (
27412778 "CodeQL #14 dismissal invalidated by forbidden execution "
2742- "patterns in workflow run blocks :\n " + "\n " .join (violations )
2779+ "patterns in workflow ` run:` content :\n " + "\n " .join (violations )
27432780 + "\n See `.github/workflows/ai_pr_review.yml` comment block "
27442781 "above the resolve-pr step for context."
27452782 )
27462783
2784+ def test_workflow_no_python_file_execution_against_workspace (
2785+ self , workflow_text
2786+ ):
2787+ """`python3 <path>.py` invocations against workspace-relative
2788+ paths execute PR-head Python file bytes — invalidating the
2789+ dismissal. Inline scripts (`python3 -c '...'`) and module
2790+ invocations (`python3 -m foo`) don't capture .py tokens, so
2791+ they're naturally excluded. /tmp/-prefixed paths are SAFE
2792+ because they're staged from BASE_SHA via `git show`."""
2793+ import re
2794+
2795+ run_contents = self ._extract_all_run_content (workflow_text )
2796+ assert run_contents , "No `run:` content extracted"
2797+
2798+ # Match `python` or `python3` followed by whitespace then a
2799+ # token ending in `.py`. Captures the full path token. We
2800+ # only flag captures that don't start with `/tmp/`.
2801+ py_exec_re = re .compile (r"\bpython3?\s+(\S+\.py)\b" )
2802+
2803+ violations = []
2804+ for label , content in run_contents :
2805+ for path in py_exec_re .findall (content ):
2806+ if path .startswith ("/tmp/" ):
2807+ continue
2808+ # Find the line for the snippet
2809+ snippet = next (
2810+ (
2811+ line
2812+ for line in content .splitlines ()
2813+ if path in line and "python" in line
2814+ ),
2815+ content .strip ()[:120 ],
2816+ ).strip ()
2817+ violations .append (
2818+ f"{ label } : workspace-relative python file execution "
2819+ f"{ path !r} in: { snippet } "
2820+ )
2821+ assert not violations , (
2822+ "CodeQL #14 dismissal invalidated by workspace-relative "
2823+ "python file execution. These would execute PR-head bytes; "
2824+ "use /tmp/-prefixed paths (BASE-staged via `git show`) "
2825+ "instead.\n " + "\n " .join (violations )
2826+ )
2827+
27472828 def test_workflow_dismissal_comment_block_present (self , workflow_text ):
27482829 """The comment block that documents the #14 dismissal must stay
27492830 attached to the workflow file. If a future edit removes it, the
@@ -2761,6 +2842,58 @@ def test_workflow_dismissal_comment_block_present(self, workflow_text):
27612842 "future maintainers can find it."
27622843 )
27632844
2845+ # ──────────────────────────────────────────────────────────────────
2846+ # Dismissal-invariant pins. The comment block above the resolve-pr
2847+ # step claims four invariants hold; the guard test above only pins
2848+ # invariant #1's "no execution" half. The tests below pin the
2849+ # remaining structural invariants. If any of these tests fails, the
2850+ # CodeQL #14 dismissal is invalid for the same reason a forbidden
2851+ # execution pattern would invalidate it.
2852+ # ──────────────────────────────────────────────────────────────────
2853+
2854+ def test_workflow_codex_step_uses_read_only_sandbox (self , workflow_text ):
2855+ """Invariant #1 (other half): Codex action runs sandbox: read-only.
2856+ If a future edit relaxes this to workspace-write or
2857+ danger-full-access, Codex could write or execute PR-head bytes
2858+ — the dismissal premise breaks."""
2859+ assert "sandbox: read-only" in workflow_text , (
2860+ "Codex step must use `sandbox: read-only` per the dismissal "
2861+ "rationale (comment block above resolve-pr step). Without "
2862+ "read-only sandbox, Codex can write or execute PR-head "
2863+ "content and the CodeQL #14 dismissal is invalid."
2864+ )
2865+
2866+ def test_workflow_resolve_pr_sets_head_sha_from_api (self , workflow_text ):
2867+ """Invariant #4: head_sha is API-pinned in the resolve-pr step.
2868+ If a future edit reads head_sha from the event payload (which
2869+ is mutable for issue_comment events) instead of the API, the
2870+ TOCTOU window grows."""
2871+ assert 'core.setOutput("head_sha", pr.data.head.sha)' in workflow_text , (
2872+ "resolve-pr step must pin `head_sha` from the API "
2873+ "(`pr.data.head.sha`), not from the event payload. See "
2874+ "dismissal rationale invariant #4 in the workflow comment "
2875+ "block above the resolve-pr step."
2876+ )
2877+
2878+ def test_workflow_comment_triggers_require_author_association (
2879+ self , workflow_text
2880+ ):
2881+ """Invariant #3: comment-triggered events (issue_comment,
2882+ pull_request_review_comment) require author_association in
2883+ OWNER/MEMBER/COLLABORATOR. If a future edit drops or weakens
2884+ this gate, random commenters could trigger the workflow."""
2885+ assert "github.event.comment.author_association == 'OWNER'" in workflow_text
2886+ assert "github.event.comment.author_association == 'MEMBER'" in workflow_text
2887+ assert (
2888+ "github.event.comment.author_association == 'COLLABORATOR'"
2889+ in workflow_text
2890+ ), (
2891+ "Comment-triggered events must check author_association is "
2892+ "OWNER/MEMBER/COLLABORATOR per dismissal invariant #3. "
2893+ "Without this gate, any GitHub user commenting on a PR "
2894+ "could trigger the workflow with secrets in scope."
2895+ )
2896+
27642897
27652898class TestExtractResponseText :
27662899 def test_prefers_output_text_field (self , review_mod ):
0 commit comments