|
| 1 | +"""Contract test for the agent-discoverability surface (issue #461). |
| 2 | +
|
| 3 | +This is a static snapshot test of the four contract surfaces that PR |
| 4 | +#464 introduced for LLM-agent discovery: |
| 5 | +
|
| 6 | +1. ``__all__`` membership of agent-facing primitives |
| 7 | +2. ``dir(diff_diff)`` head-first ordering (via the ``_OrderedName`` trick) |
| 8 | +3. Top-level ``__doc__`` content (first paragraph names the recommended |
| 9 | + call; the 5-step workflow primitives all appear) |
| 10 | +4. ``agent_workflow()`` output references the canonical downstream |
| 11 | + primitives by name |
| 12 | +
|
| 13 | +It also locks the ``__dir__()`` invariants (head matches |
| 14 | +``_AGENT_FACING_ORDER``, tail is alphabetic by ``str``, module dunders |
| 15 | +are preserved, ``inspect.getmembers`` parity). |
| 16 | +
|
| 17 | +Closes the ``__dir__`` contract-test deferral row from PR #464's |
| 18 | +``TODO.md``. |
| 19 | +
|
| 20 | +No live API calls, no subprocess, no live agents — purely string/identity |
| 21 | +assertions runnable in the default ``pytest`` suite. |
| 22 | +""" |
| 23 | + |
| 24 | +from __future__ import annotations |
| 25 | + |
| 26 | +import inspect |
| 27 | + |
| 28 | +import pandas as pd |
| 29 | +import pytest |
| 30 | + |
| 31 | +import diff_diff |
| 32 | +from diff_diff import _AGENT_FACING_ORDER |
| 33 | + |
| 34 | +# --------------------------------------------------------------------------- |
| 35 | +# __all__ membership |
| 36 | +# --------------------------------------------------------------------------- |
| 37 | + |
| 38 | + |
| 39 | +def test_agent_facing_names_in_all(): |
| 40 | + """The named primitives must remain in the public API surface. |
| 41 | +
|
| 42 | + Catches an export pruning that would silently remove an agent-facing |
| 43 | + name from ``from diff_diff import *``. |
| 44 | + """ |
| 45 | + required = { |
| 46 | + "agent_workflow", |
| 47 | + "profile_panel", |
| 48 | + "get_llm_guide", |
| 49 | + "practitioner_next_steps", |
| 50 | + "BusinessReport", |
| 51 | + } |
| 52 | + assert required <= set( |
| 53 | + diff_diff.__all__ |
| 54 | + ), f"missing from __all__: {required - set(diff_diff.__all__)}" |
| 55 | + |
| 56 | + |
| 57 | +def test_estimator_class_names_importable(): |
| 58 | + """Class-name renames silently break agent recognition. |
| 59 | +
|
| 60 | + The canonical staggered estimators + the simple-2x2 case must remain |
| 61 | + importable under their documented names; the orchestrator's Step 3 |
| 62 | + examples and ``llms-autonomous.txt`` routing matrix reference them |
| 63 | + by these literal identifiers. |
| 64 | + """ |
| 65 | + from diff_diff import ( # noqa: F401 |
| 66 | + CallawaySantAnna, |
| 67 | + ChaisemartinDHaultfoeuille, |
| 68 | + ContinuousDiD, |
| 69 | + DifferenceInDifferences, |
| 70 | + HeterogeneousAdoptionDiD, |
| 71 | + HonestDiD, |
| 72 | + ImputationDiD, |
| 73 | + PreTrendsPower, |
| 74 | + SunAbraham, |
| 75 | + TwoWayFixedEffects, |
| 76 | + WooldridgeDiD, |
| 77 | + ) |
| 78 | + |
| 79 | + |
| 80 | +# --------------------------------------------------------------------------- |
| 81 | +# __dir__() head-first ordering + _OrderedName invariants |
| 82 | +# --------------------------------------------------------------------------- |
| 83 | + |
| 84 | + |
| 85 | +def test_dir_head_matches_agent_facing_order(): |
| 86 | + """``dir(diff_diff)`` must surface ``_AGENT_FACING_ORDER`` at the |
| 87 | + head, IN THE DECLARED ORDER. |
| 88 | +
|
| 89 | + Anchors to the contract (the override's curated tuple) rather than |
| 90 | + a fixed slice length: if a future change adds or trims the head |
| 91 | + tuple, this test follows it. Catches the failure mode where |
| 92 | + ``__dir__()`` is dropped, mis-ordered, or where the |
| 93 | + ``_OrderedName`` ``__lt__`` is broken. |
| 94 | + """ |
| 95 | + names = dir(diff_diff) |
| 96 | + head_size = len(_AGENT_FACING_ORDER) |
| 97 | + assert names[:head_size] == list(_AGENT_FACING_ORDER), ( |
| 98 | + f"dir() head does not match _AGENT_FACING_ORDER. " |
| 99 | + f"Got: {names[:head_size]!r}. " |
| 100 | + f"Expected: {list(_AGENT_FACING_ORDER)!r}." |
| 101 | + ) |
| 102 | + |
| 103 | + |
| 104 | +def test_dir_tail_alphabetic_by_str(): |
| 105 | + """The non-head portion of ``dir()`` should stay alphabetic when |
| 106 | + keyed by ``str``. |
| 107 | +
|
| 108 | + The ``_OrderedName`` head members compare with custom ``__lt__`` |
| 109 | + (priority then alphabetic); tail elements are plain strings sorted |
| 110 | + by CPython's ``PyList_Sort``. ``sorted(tail, key=str)`` is the |
| 111 | + canonical recovery key in case any downstream tooling re-sorts. |
| 112 | + """ |
| 113 | + names = dir(diff_diff) |
| 114 | + tail = names[len(_AGENT_FACING_ORDER) :] |
| 115 | + assert tail == sorted(tail, key=str) |
| 116 | + |
| 117 | + |
| 118 | +def test_dir_returns_full_module_namespace(): |
| 119 | + """``dir(diff_diff)`` must enumerate the full module namespace. |
| 120 | +
|
| 121 | + Restricting to ``__all__`` would drop module dunders (``__doc__``, |
| 122 | + ``__name__``, ``__file__``) and break ``inspect.getmembers`` |
| 123 | + consumers. The override returns ``[_OrderedName(n) for n in |
| 124 | + globals()]`` to preserve that compatibility. |
| 125 | + """ |
| 126 | + names = dir(diff_diff) |
| 127 | + for dunder in ("__doc__", "__name__", "__file__", "__all__"): |
| 128 | + assert dunder in names, f"{dunder!r} missing from dir() output" |
| 129 | + |
| 130 | + |
| 131 | +def test_getmembers_parity_with_default_module_dir(): |
| 132 | + """``inspect.getmembers(diff_diff)`` should return the same set of |
| 133 | + names as ``dir(diff_diff)``, with ``__doc__`` accessible. |
| 134 | +
|
| 135 | + Catches regressions where ``__dir__`` is reduced to ``__all__`` only. |
| 136 | + """ |
| 137 | + dir_names = set(dir(diff_diff)) |
| 138 | + gm_names = {name for name, _ in inspect.getmembers(diff_diff)} |
| 139 | + assert dir_names == gm_names, ( |
| 140 | + f"dir() and inspect.getmembers() disagree by " f"{sorted(dir_names ^ gm_names)[:5]}" |
| 141 | + ) |
| 142 | + # And the steering surface must be accessible. |
| 143 | + assert diff_diff.__doc__ is not None |
| 144 | + assert "agent_workflow" in diff_diff.__doc__.lower() |
| 145 | + |
| 146 | + |
| 147 | +# --------------------------------------------------------------------------- |
| 148 | +# _OrderedName subclass invariants |
| 149 | +# --------------------------------------------------------------------------- |
| 150 | + |
| 151 | + |
| 152 | +def test_ordered_name_isinstance_str(): |
| 153 | + """Every ``dir()`` element must still be ``isinstance(..., str)`` so |
| 154 | + consumers that type-check don't break. |
| 155 | + """ |
| 156 | + for name in dir(diff_diff): |
| 157 | + assert isinstance( |
| 158 | + name, str |
| 159 | + ), f"dir() element {name!r} is type {type(name).__name__}, not a str subclass" |
| 160 | + |
| 161 | + |
| 162 | +def test_ordered_name_str_methods_work(): |
| 163 | + """The head ``_OrderedName`` instances must support all the str |
| 164 | + operations downstream tooling relies on (upper, eq, hash for dict |
| 165 | + keys, ``in`` membership, f-string interpolation). |
| 166 | + """ |
| 167 | + head = dir(diff_diff)[: len(_AGENT_FACING_ORDER)] |
| 168 | + for n in head: |
| 169 | + assert n.upper() == str(n).upper() |
| 170 | + assert n == str(n) |
| 171 | + assert {n: 1}.get(n) == 1 |
| 172 | + assert n in [str(n)] |
| 173 | + assert f"{n}" == str(n) |
| 174 | + |
| 175 | + |
| 176 | +# --------------------------------------------------------------------------- |
| 177 | +# __doc__ first-paragraph contract |
| 178 | +# --------------------------------------------------------------------------- |
| 179 | + |
| 180 | + |
| 181 | +def test_doc_first_paragraph_names_agent_workflow(): |
| 182 | + """``help(diff_diff)`` opens with ``__doc__``; the first non-blank |
| 183 | + paragraph must name ``agent_workflow``. |
| 184 | +
|
| 185 | + Catches a docstring rewrite that drops the recommended-call hint |
| 186 | + from the top-of-help surface. |
| 187 | + """ |
| 188 | + doc = diff_diff.__doc__ |
| 189 | + assert doc is not None |
| 190 | + first_block = doc.strip().split("\n\n")[0] |
| 191 | + assert "agent_workflow" in first_block.lower() |
| 192 | + |
| 193 | + |
| 194 | +def test_doc_names_canonical_workflow_helpers(): |
| 195 | + """The full 5-step workflow's primitive names must remain reachable |
| 196 | + from ``help(diff_diff)``. |
| 197 | +
|
| 198 | + Catches a docstring trim that removes references to the downstream |
| 199 | + helpers an agent following the doc would call next. |
| 200 | + """ |
| 201 | + assert diff_diff.__doc__ is not None |
| 202 | + doc_lower = diff_diff.__doc__.lower() |
| 203 | + for name in ( |
| 204 | + "profile_panel", |
| 205 | + "get_llm_guide", |
| 206 | + "practitioner_next_steps", |
| 207 | + "businessreport", |
| 208 | + ): |
| 209 | + assert name in doc_lower, f"{name!r} missing from __doc__" |
| 210 | + |
| 211 | + |
| 212 | +# --------------------------------------------------------------------------- |
| 213 | +# agent_workflow() output references the canonical primitives |
| 214 | +# --------------------------------------------------------------------------- |
| 215 | + |
| 216 | + |
| 217 | +def test_agent_workflow_output_names_canonical_helpers(): |
| 218 | + """Calling ``agent_workflow()`` must still produce a script that |
| 219 | + names the four downstream primitives. Catches the orchestrator |
| 220 | + content drifting away from the helpers it advertises. |
| 221 | + """ |
| 222 | + df = pd.DataFrame({"u": [1], "t": [0], "tr": [0], "y": [0.0]}) |
| 223 | + out = diff_diff.agent_workflow( |
| 224 | + df, |
| 225 | + unit="u", |
| 226 | + time="t", |
| 227 | + treatment="tr", |
| 228 | + outcome="y", |
| 229 | + verbose=False, |
| 230 | + ) |
| 231 | + for name in ( |
| 232 | + "profile_panel", |
| 233 | + "get_llm_guide", |
| 234 | + "practitioner_next_steps", |
| 235 | + "BusinessReport", |
| 236 | + ): |
| 237 | + assert name in out["script"], f"{name!r} missing from agent_workflow script" |
| 238 | + |
| 239 | + |
| 240 | +def test_agent_workflow_fit_candidates_resolve_on_diff_diff(): |
| 241 | + """Every estimator advertised in ``agent_workflow().fit_candidates`` |
| 242 | + must be a real attribute on the ``diff_diff`` namespace. |
| 243 | +
|
| 244 | + Mirrors the per-PR test in ``test_agent_workflow.py``; here we |
| 245 | + re-assert as part of the discoverability contract so a rename |
| 246 | + that escapes the per-PR suite is still caught at the surface |
| 247 | + level. |
| 248 | + """ |
| 249 | + df = pd.DataFrame({"u": [1], "t": [0], "tr": [0], "y": [0.0]}) |
| 250 | + out = diff_diff.agent_workflow( |
| 251 | + df, |
| 252 | + unit="u", |
| 253 | + time="t", |
| 254 | + treatment="tr", |
| 255 | + outcome="y", |
| 256 | + verbose=False, |
| 257 | + ) |
| 258 | + missing = [n for n in out["fit_candidates"] if not hasattr(diff_diff, n)] |
| 259 | + assert not missing, f"fit_candidates not on diff_diff namespace: {missing}" |
| 260 | + |
| 261 | + |
| 262 | +# --------------------------------------------------------------------------- |
| 263 | +# Cross-surface sanity (all four agent-facing entrypoints callable) |
| 264 | +# --------------------------------------------------------------------------- |
| 265 | + |
| 266 | + |
| 267 | +@pytest.mark.parametrize( |
| 268 | + "name", |
| 269 | + sorted( |
| 270 | + { |
| 271 | + "agent_workflow", |
| 272 | + "profile_panel", |
| 273 | + "get_llm_guide", |
| 274 | + "practitioner_next_steps", |
| 275 | + "BusinessReport", |
| 276 | + } |
| 277 | + ), |
| 278 | +) |
| 279 | +def test_agent_facing_entrypoint_callable(name): |
| 280 | + """Each agent-facing primitive must remain a callable attribute on |
| 281 | + the top-level package. |
| 282 | +
|
| 283 | + Catches an accidental replacement of one of these names with a |
| 284 | + module or constant (which would silently break the agent's |
| 285 | + ``help(name)`` follow-up). |
| 286 | + """ |
| 287 | + obj = getattr(diff_diff, name) |
| 288 | + assert callable(obj), f"{name!r} is not callable on the diff_diff namespace" |
0 commit comments