PR #464 R2 polish: thread df_name through agent_workflow (codex P1)

igerber · claude · igerber · commit 3968f0c232c9 · 2026-05-18T19:55:23.000-04:00
CI codex review on the rebased SHA flagged a newly-identified P1:
agent_workflow() hardcoded the dataframe symbol `df` into every emitted
call. A caller who does `panel = pd.read_parquet(...)` and then calls
diff_diff.agent_workflow(panel, ...) gets back a script that references
`df` and NameErrors on first execution.

Fix: add `df_name: str = "df"` parameter, threaded into:
- profile_call (`diff_diff.profile_panel({df_name}, ...)`)
- both Step 3 fit example branches (CallawaySantAnna + DifferenceInDifferences)

Default `df_name="df"` preserves prior behavior verbatim. Caller binds
their dataframe to any identifier and passes the name; emitted script
runs in their namespace without renaming.

Tests added:
- `test_df_name_templates_into_script`: default vs `df_name="panel"`,
  static AST scan confirms no bare `df` Name node remains.
- `test_df_name_panel_script_executes_in_panel_namespace`: stubs
  `diff_diff` with MagicMock, exec's the panel-named script in a
  namespace where `panel` exists and `df` does not — NameError on
  the dataframe symbol would fail the test.

Total tests: 21 → 23 in test_agent_workflow.py.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/agent_workflow.py b/diff_diff/agent_workflow.py
@@ -69,6 +69,7 @@ def agent_workflow(
     treatment: str,
     outcome: str,
     first_treat: Optional[str] = None,
+    df_name: str = "df",
     verbose: bool = True,
 ) -> Dict[str, Any]:
     """Print the recommended diff-diff workflow with your column names wired in.
@@ -99,6 +100,18 @@ def agent_workflow(
         example, matching the actual fit signatures (passing
         ``treatment=`` to CallawaySantAnna's ``.fit()`` would raise
         TypeError).
+    df_name : str, default ``"df"``
+        Identifier under which the caller's dataframe is bound in
+        their namespace. Templated verbatim into the emitted script
+        as the first positional argument of every call
+        (``profile_panel({df_name}, ...)``,
+        ``<Estimator>().fit({df_name}, ...)``) so the script is
+        directly executable when the caller's local variable matches.
+        If the caller has ``panel = pd.read_parquet(...)``, passing
+        ``df_name="panel"`` produces a script that references
+        ``panel`` instead of ``df``. Must be a valid Python identifier
+        (not enforced; non-identifier values produce a script that
+        won't parse).
     verbose : bool, default True
         If True, print the script to stdout. The dict is always
         returned regardless.
@@ -140,7 +153,7 @@ def agent_workflow(
     del df  # intentionally unused: orchestrator templates from column names only
 
     profile_call = (
-        f"diff_diff.profile_panel(df, "
+        f"diff_diff.profile_panel({df_name}, "
         f"{_join_kwargs(unit=unit, time=time, treatment=treatment, outcome=outcome)})"
     )
     guide_call = 'diff_diff.get_llm_guide("autonomous")'
@@ -163,7 +176,7 @@ def agent_workflow(
         fit_example_kwargs = _join_kwargs(
             outcome=outcome, unit=unit, time=time, first_treat=first_treat
         )
-        fit_example_call = f"diff_diff.CallawaySantAnna().fit(df, {fit_example_kwargs})"
+        fit_example_call = f"diff_diff.CallawaySantAnna().fit({df_name}, {fit_example_kwargs})"
         step3_label_lines = [
             "Step 3 - Fit. Your data has `first_treat` -> staggered structure.",
             "`first_treat` alone does NOT identify a single estimator; pick by",
@@ -178,7 +191,9 @@ def agent_workflow(
         fit_example_kwargs = _join_kwargs(
             outcome=outcome, unit=unit, time=time, treatment=treatment
         )
-        fit_example_call = f"diff_diff.DifferenceInDifferences().fit(df, {fit_example_kwargs})"
+        fit_example_call = (
+            f"diff_diff.DifferenceInDifferences().fit({df_name}, {fit_example_kwargs})"
+        )
         step3_label_lines = [
             "Step 3 - Fit. Pick a candidate from Step 2's patterns based on your",
             "treatment/time shape. The example below shows the simple 2x2 case",
diff --git a/tests/test_agent_workflow.py b/tests/test_agent_workflow.py
@@ -242,6 +242,75 @@ def test_emitted_script_prints_report(df):
     assert ".full_report())" in script
 
 
+def test_df_name_templates_into_script(df):
+    """Caller can rename the dataframe symbol in the emitted script.
+
+    Default (df_name="df"): script references `df`.
+    Custom (df_name="panel"): every emitted call uses `panel` and no
+    bare `df` identifier appears in the runnable code paths.
+    """
+    import ast
+
+    out_default = diff_diff.agent_workflow(
+        df,
+        unit="firm_id",
+        time="year",
+        treatment="treated",
+        outcome="logwage",
+        verbose=False,
+    )
+    out_panel = diff_diff.agent_workflow(
+        df,
+        unit="firm_id",
+        time="year",
+        treatment="treated",
+        outcome="logwage",
+        df_name="panel",
+        verbose=False,
+    )
+    # Default behavior preserved.
+    assert "profile_panel(df," in out_default["script"]
+    # Custom name flows through profile_call AND fit_example_call.
+    assert "profile_panel(panel," in out_panel["script"]
+    assert ".fit(panel," in out_panel["script"]
+    # Static reference scan: parse the panel-script and confirm no `df`
+    # Name node exists — catches template drift where a `df` reference
+    # slips in outside the templated points.
+    tree = ast.parse(out_panel["script"])
+    names = {n.id for n in ast.walk(tree) if isinstance(n, ast.Name)}
+    assert "df" not in names, (
+        f"emitted script with df_name='panel' still references `df`: "
+        f"identifier names found = {sorted(names)}"
+    )
+    assert "panel" in names
+
+
+def test_df_name_panel_script_executes_in_panel_namespace(df):
+    """The emitted script must resolve all names in a namespace where
+    `panel` exists and `df` does not. We stub out `diff_diff` with a
+    MagicMock so calls don't actually fit; the test is purely about
+    symbol resolution, not numerical correctness — if the script still
+    referenced `df` anywhere in runnable code, exec() would NameError.
+    """
+    import unittest.mock
+
+    out = diff_diff.agent_workflow(
+        df,
+        unit="firm_id",
+        time="year",
+        treatment="treated",
+        outcome="logwage",
+        df_name="panel",
+        verbose=False,
+    )
+    ns = {
+        "diff_diff": unittest.mock.MagicMock(),
+        "panel": "sentinel_df_object",
+        # Deliberately no `df` key — script must not reference it.
+    }
+    exec(compile(out["script"], "<test_df_name>", "exec"), ns)
+
+
 def test_does_not_inspect_df():
     # Pure orchestrator: a structurally-empty DataFrame must still produce
     # the templated script (no df inspection happens).