@@ -2717,6 +2717,29 @@ class TestWorkflowDoesNotExecutePRHeadCode:
27172717 "/tmp/notebook_md_extract.py" : "tools/notebook_md_extract.py" ,
27182718 }
27192719
2720+ # Literal allowlist of single-line `python3 -c <body>` payloads.
2721+ # Any single-line `python3 -c '<body>'` whose body is NOT in this
2722+ # set is classified as `python_c_unsafe` and fails the
2723+ # python-file-execution test.
2724+ #
2725+ # R9 fix for PR #436: prior versions blanket-exempted all
2726+ # `python3 -c` invocations. A future edit could write
2727+ # python3 -c 'exec(open("diff_diff/evil.py").read())'
2728+ # which would NOT have been classified as a python_exec
2729+ # (because `-c` disabled script-mode classification), silently
2730+ # bypassing the dismissal.
2731+ #
2732+ # Initially empty because the workflow's existing `-c` bodies
2733+ # (PR_TITLE / PR_BODY / PREV_REVIEW / SANITIZED_PROSE
2734+ # sanitization at lines 248-283, 403-408, 466-471 of
2735+ # ai_pr_review.yml) are MULTI-LINE strings — shlex can't
2736+ # tokenize them from a single physical line, so they're
2737+ # invisible to the line-by-line classifier and exempt by
2738+ # virtue of not being detected. Any FUTURE single-line `-c`
2739+ # body would be detected and must be added here with explicit
2740+ # review of why the body is safe.
2741+ ALLOWED_PYTHON_C_PAYLOADS = ()
2742+
27202743 @pytest .fixture
27212744 def workflow_text (self ):
27222745 assert _SCRIPT_PATH is not None
@@ -2939,6 +2962,10 @@ def _classify_shell_line(line):
29392962 LEADING_KEYWORDS = {
29402963 "if" , "then" , "else" , "elif" , "do" , "while" , "for" ,
29412964 "done" , "fi" , "until" , "case" , "esac" ,
2965+ # R9 fix for #436: shell negation `!` in command position.
2966+ # `if ! python3 evil.py; then ... fi` would otherwise have
2967+ # cmd=`!`, evading every classifier branch.
2968+ "!" ,
29422969 }
29432970 # R8 fix for #436: shell group-delimiter tokens. shlex with
29442971 # `punctuation_chars=True` tokenizes `(` and `)` as separate
@@ -3018,9 +3045,22 @@ def _classify_shell_line(line):
30183045 # PR #436. Without this, `bash -c "python3
30193046 # diff_diff/evil.py"` would classify only as `bash`
30203047 # (no python_exec), bypassing the allowlist.
3048+ #
3049+ # R9 fix: also handle COMPOUND short flags that contain
3050+ # `c` — `-lc`, `-ec`, `-exc`, etc. are valid bash
3051+ # shorthand for "set various options AND -c". So any
3052+ # short-flag bundle (single `-` not `--`) containing `c`
3053+ # in the chars after `-` triggers the inline-script
3054+ # recursion.
30213055 if cmd in ("bash" , "sh" ):
30223056 for i in range (1 , len (tokens )):
3023- if tokens [i ] == "-c" and i + 1 < len (tokens ):
3057+ t = tokens [i ]
3058+ is_c_flag = (
3059+ t .startswith ("-" )
3060+ and not t .startswith ("--" )
3061+ and "c" in t [1 :]
3062+ )
3063+ if is_c_flag and i + 1 < len (tokens ):
30243064 inner = tokens [i + 1 ]
30253065 seg_actions .extend (
30263066 TestWorkflowDoesNotExecutePRHeadCode ._classify_shell_line (inner )
@@ -3031,27 +3071,27 @@ def _classify_shell_line(line):
30313071 # positional regardless of extension — `python3 /tmp/foo.py.bak`
30323072 # IS a python execution of /tmp/foo.py.bak (allowlist
30333073 # check then differentiates allowed from forbidden).
3034- # If `-c` or `-m` is seen, the script-execution path is
3035- # bypassed (inline string / module mode); subsequent
3036- # positionals are args to that mode, not a script path.
3037- #
3038- # NOTE: `python3 -c '<inline-payload>'` is NOT inspected
3039- # for what the payload itself does (e.g.,
3040- # `python3 -c 'exec(open("diff_diff/evil.py").read())'`).
3041- # Catching that would require Python AST inspection or a
3042- # literal allowlist of every existing `-c` body in the
3043- # workflow — both brittle. The current workflow's `-c`
3044- # payloads operate on sanitized env vars (PR_TITLE etc.)
3045- # set from base-controlled values; the dismissal accepts
3046- # the residual that future `-c` payloads could embed
3047- # malicious shell. Surface to user before iterating
3048- # further on this if it re-fires.
3074+ # `-c <body>` is captured as a `python_c_payload` action
3075+ # for the literal-allowlist test; `-m` skips both flag
3076+ # and module-name (no python_exec).
30493077 elif cmd in ("python" , "python3" , "python2" ):
30503078 script_mode_disabled = False
30513079 i = 1
30523080 while i < len (tokens ):
30533081 t = tokens [i ]
3054- if t in FLAGS_WITH_ARG :
3082+ if t == "-c" and i + 1 < len (tokens ):
3083+ # R9 fix for #436: capture -c body for the
3084+ # literal-allowlist check. Without this, a
3085+ # future single-line
3086+ # python3 -c 'exec(open("evil.py").read())'
3087+ # would be silently accepted.
3088+ seg_actions .append (
3089+ ("python_c_payload" , tokens [i + 1 ])
3090+ )
3091+ script_mode_disabled = True
3092+ i += 2
3093+ continue
3094+ if t == "-m" :
30553095 script_mode_disabled = True
30563096 i += 2
30573097 continue
@@ -3138,21 +3178,35 @@ def test_workflow_python_file_execution_uses_only_allowlisted_paths(
31383178 joined = self ._join_shell_continuations (content .splitlines ())
31393179 for _start_idx , joined_line in joined :
31403180 for action , target in self ._classify_shell_line (joined_line ):
3141- if action != "python_exec" :
3142- continue
3143- if target in self .ALLOWED_TMP_PYTHON_EXECUTIONS :
3144- continue
3145- violations .append (
3146- f"{ label } : non-allowlisted python file "
3147- f"execution { target !r} in: { joined_line .strip ()[:120 ]} "
3148- )
3181+ if action == "python_exec" :
3182+ if target in self .ALLOWED_TMP_PYTHON_EXECUTIONS :
3183+ continue
3184+ violations .append (
3185+ f"{ label } : non-allowlisted python file "
3186+ f"execution { target !r} in: { joined_line .strip ()[:120 ]} "
3187+ )
3188+ elif action == "python_c_payload" :
3189+ # R9 fix: literal allowlist of -c bodies.
3190+ # Existing multi-line bodies aren't tokenized
3191+ # by shlex so they're invisible (exempt).
3192+ # New single-line bodies must be added to
3193+ # ALLOWED_PYTHON_C_PAYLOADS with explicit
3194+ # safety review.
3195+ if target in self .ALLOWED_PYTHON_C_PAYLOADS :
3196+ continue
3197+ violations .append (
3198+ f"{ label } : non-allowlisted python -c "
3199+ f"payload { target [:80 ]!r} in: "
3200+ f"{ joined_line .strip ()[:120 ]} "
3201+ )
31493202 assert not violations , (
3150- "CodeQL #14 dismissal invalidated by python file execution "
3151- "of non-allowlisted paths. Either use a path in "
3152- "ALLOWED_TMP_PYTHON_EXECUTIONS (after staging it from "
3153- "BASE_SHA via `git show`), refactor to `python3 -c '...'` "
3154- "with sanitized env vars, or add the new path to the "
3155- "allowlist explicitly with a BASE_SHA staging command.\n "
3203+ "CodeQL #14 dismissal invalidated by python execution. "
3204+ "Either: (a) use a path in ALLOWED_TMP_PYTHON_EXECUTIONS "
3205+ "after staging from BASE_SHA; (b) for `-c` payloads, add "
3206+ "to ALLOWED_PYTHON_C_PAYLOADS with explicit review of "
3207+ "why the payload is safe (no exec/eval/open of "
3208+ "non-/tmp paths); (c) refactor to a base-staged `/tmp` "
3209+ "script.\n "
31563210 + "\n " .join (violations )
31573211 )
31583212
@@ -3584,6 +3638,60 @@ def cp_targets(line):
35843638 'bash -c "( python3 diff_diff/evil.py )"'
35853639 ) == ["diff_diff/evil.py" ]
35863640
3641+ def test_classify_handles_bash_compound_flags_and_shell_negation (self ):
3642+ """R9 regression: bash/sh `-c`-containing compound flags
3643+ (`-lc`, `-ec`, `-exc`) recurse like bare `-c`. Shell
3644+ negation `!` in command position is stripped so the
3645+ following command is classified."""
3646+ cls = self ._classify_shell_line
3647+
3648+ def py_targets (line ):
3649+ return [t for a , t in cls (line ) if a == "python_exec" ]
3650+
3651+ # bash compound flags containing `c`
3652+ assert py_targets ('bash -lc "python3 diff_diff/evil.py"' ) == ["diff_diff/evil.py" ]
3653+ assert py_targets ('bash -ec "python3 diff_diff/evil.py"' ) == ["diff_diff/evil.py" ]
3654+ assert py_targets ('bash -exc "python3 diff_diff/evil.py"' ) == ["diff_diff/evil.py" ]
3655+ assert py_targets ('sh -lc "python3 diff_diff/evil.py"' ) == ["diff_diff/evil.py" ]
3656+ # sh -c without bundle
3657+ assert py_targets ('sh -c "python3 diff_diff/evil.py"' ) == ["diff_diff/evil.py" ]
3658+ # bash flags without `c` should NOT trigger recursion
3659+ # (no -c means no inline-script body)
3660+ assert py_targets ("bash -l" ) == []
3661+ assert py_targets ("bash -i" ) == []
3662+
3663+ # Shell negation `!` in command position
3664+ assert py_targets ("if ! python3 diff_diff/evil.py; then echo ok; fi" ) == ["diff_diff/evil.py" ]
3665+ assert py_targets ("! python3 diff_diff/evil.py" ) == ["diff_diff/evil.py" ]
3666+
3667+ def test_classify_python_c_payload_against_allowlist (self ):
3668+ """R9 regression: `python3 -c <body>` is captured as a
3669+ `python_c_payload` action with the body as target. The
3670+ python-file-execution test then rejects any body not in
3671+ ALLOWED_PYTHON_C_PAYLOADS.
3672+
3673+ Without this, `python3 -c 'exec(open("evil.py").read())'`
3674+ would be silently exempted (script_mode_disabled prevented
3675+ any python_exec action from being recorded)."""
3676+ cls = self ._classify_shell_line
3677+
3678+ def c_payloads (line ):
3679+ return [t for a , t in cls (line ) if a == "python_c_payload" ]
3680+
3681+ # Single-line -c body captured
3682+ assert c_payloads ('python3 -c \' exec(open("diff_diff/evil.py").read())\' ' ) == [
3683+ 'exec(open("diff_diff/evil.py").read())'
3684+ ]
3685+ assert c_payloads ("python3 -c 'print(1)'" ) == ["print(1)" ]
3686+ # -m doesn't capture
3687+ assert c_payloads ("python3 -m unittest discover" ) == []
3688+ # No -c at all
3689+ assert c_payloads ("python3 /tmp/foo.py" ) == []
3690+ # -c inside bash -c recursion
3691+ assert c_payloads (
3692+ 'bash -c "python3 -c \' exec(open(\\ "evil.py\\ ").read())\' "'
3693+ ) == ['exec(open("evil.py").read())' ]
3694+
35873695 def test_classify_git_show_redirect (self ):
35883696 """The BASE_SHA staging command must produce a
35893697 git_show_redirect action with (source, dest) tuple, matched
0 commit comments