From e258ecdd7b3c2b7e46f2ade8f1e3e1e30b939327 Mon Sep 17 00:00:00 2001 From: igerber Date: Thu, 14 May 2026 18:37:12 -0400 Subject: [PATCH 1/2] Fix #406 holistic audit residuals: alias-doc completeness + test helper bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Holistic re-audit of merged #406 (inference-field aliases on staggered result classes) + #428 (post-merge cleanup adding TripleDifferenceResults to alias mapping examples). Per-PR CI on #428 couldn't see the combined post-PR holistic state. Local agentic codex review surfaced residuals across 4 rounds (R1-R4); a 5th round flagged Sphinx autosummary regen which is auto-handled on next docs build (not addressed here). **Test helper (R2)** — `_required_init_kwargs()` in `tests/test_result_aliases.py` had two bugs that are masked today but brittle as result dataclasses evolve: - Default-factory not honored: the `f.default is not f.default_factory and f.default is not MISSING` check returns False for factory-only fields (where default is MISSING and default_factory is a real callable), so the helper pre-filled those fields with a sentinel and the factory never ran. Replaced with explicit `_dc.MISSING` checks on both default and default_factory. - Dispatch order: `"float"` matched before `"Tuple"`, so `Tuple[float, float]` annotations were classified as scalar and got `0.0` instead of `(0.0, 0.0)`. Reordered the synthetic-value dispatch so container annotations (Tuple/List/Dict/DataFrame/ndarray) are checked before the scalar fallback. **Read-only assertion (R1)** — `test_aliases_are_read_only` checked `setattr`-fails on the flat aliases but not on the new `ContinuousDiDResults.overall_*` aliases. Extended the Continuous-specific branch. **Bundled guide documentation (R3)** — REGISTRY and CHANGELOG made the flat alias contract official, but bundled `practitioner.py`, `llms-practitioner.txt`, and `llms-full.txt` documented only the canonical `overall_*` / `overall_att_*` / `avg_*` names. Added a single "Flat-alias compatibility note" under the `## Results Objects` header in `llms-full.txt` (avoids per-class table bloat); extended the result-class snippet in `llms-practitioner.txt`; clarified the Step-4 covariate-comparison snippet in `practitioner.py`. **Registry scope correction (R4)** — the top-level REGISTRY note said "every scalar treatment-effect result class" exposes flat aliases, but flat-native classes (`DiDResults`, `SyntheticDiDResults`, `TROPResults`, `TripleDifferenceResults`, `HeterogeneousAdoptionDiDResults`) already carry these as native dataclass fields — they're unchanged by the alias contract. Narrowed the note to scope only the prefixed families. **Typo fix (R4)** — the R3 `llms-practitioner.txt` note named a nonexistent `overall_att_att` field. Replaced with the actual canonical field names (`overall_att` is the point estimate; `overall_att_se` etc. are the inference fields). 5 files, +51/-13. No behavior change; all edits are documentation alignment and test helper / test coverage hardening on the surface #406 + #428 already established. --- diff_diff/guides/llms-full.txt | 12 +++++++++ diff_diff/guides/llms-practitioner.txt | 10 +++++++ diff_diff/practitioner.py | 3 ++- docs/methodology/REGISTRY.md | 2 +- tests/test_result_aliases.py | 37 ++++++++++++++++++-------- 5 files changed, 51 insertions(+), 13 deletions(-) diff --git a/diff_diff/guides/llms-full.txt b/diff_diff/guides/llms-full.txt index 6c4adbc7..6b703651 100644 --- a/diff_diff/guides/llms-full.txt +++ b/diff_diff/guides/llms-full.txt @@ -967,6 +967,18 @@ results = bacon_decompose(data, outcome='y', unit='id', time='t', first_treat='f ## Results Objects +**Flat-alias compatibility note.** Every staggered result class in this +section (those with canonical `overall_*` / `overall_att_*` / `avg_*` +prefixed inference fields) ALSO exposes the unprefixed flat names +`att` / `se` / `conf_int` / `p_value` / `t_stat` as read-only `@property` +aliases over the canonical fields. The canonical prefixed fields remain +the documented and computed surface; the flat aliases are pure +read-throughs for compatibility with external adapters that +`getattr(res, "se", None)`-style query the inference surface (e.g. +`balance.interop.diff_diff.as_balance_diagnostic()`). Tables below list +the canonical names; assume the flat aliases are present on every +staggered class unless explicitly noted otherwise. + ### DiDResults Returned by `DifferenceInDifferences.fit()` and `TwoWayFixedEffects.fit()`. diff --git a/diff_diff/guides/llms-practitioner.txt b/diff_diff/guides/llms-practitioner.txt index e691a28d..3e5b8084 100644 --- a/diff_diff/guides/llms-practitioner.txt +++ b/diff_diff/guides/llms-practitioner.txt @@ -70,6 +70,16 @@ results.cohort_effects # Per-cohort effects (via to_dataframe(level='coho # Other staggered (ImputationDiD, TwoStageDiD, etc.): results.overall_att # Overall ATT results.overall_se # Standard error + +# Flat-alias compatibility: every staggered class above (canonical +# `overall_*` / `overall_att_*` / `avg_*` prefixed) also exposes the +# unprefixed flat names `att` / `se` / `conf_int` / `p_value` / +# `t_stat` as read-only `@property` aliases over the canonical fields. +# The canonical prefixed names remain the documented and computed +# surface; the flat aliases are pure read-throughs for compatibility +# with adapters that `getattr(res, "se", None)`-style query inference. +results.att # alias of overall_att / avg_att (or overall_att for ContinuousDiD ATT side) +results.se # alias of overall_se / avg_se (or overall_att_se for ContinuousDiD ATT side) ``` --- diff --git a/diff_diff/practitioner.py b/diff_diff/practitioner.py index 11f26ce6..128f378a 100644 --- a/diff_diff/practitioner.py +++ b/diff_diff/practitioner.py @@ -271,7 +271,8 @@ def _covariates_step() -> Dict[str, Any]: "# Re-estimate without covariates and compare:\n" "result_no_cov = estimator.fit(data, ..., covariates=None)\n" "# Compare ATT with and without covariates.\n" - "# Use .att (basic DiD) or .overall_att (staggered estimators)." + "# Use .att (basic DiD; also a read-only flat-alias on staggered\n" + "# classes) or .overall_att (canonical name on staggered results)." ), priority="medium", step_name="robustness", diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index af10e544..2470afde 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -2,7 +2,7 @@ This document provides the academic foundations and key implementation requirements for each estimator in diff-diff. It serves as a reference for contributors and users who want to understand the theoretical basis of the methods. -**Result-class field naming.** Headline scalar inference fields appear under one of four native naming patterns: flat `att` / `se` / `conf_int` / `p_value` / `t_stat` (`DiDResults`, `SyntheticDiDResults`, `TROPResults`, `TripleDifferenceResults`, `HeterogeneousAdoptionDiDResults`); `overall_*` (`CallawaySantAnnaResults` and the rest of the staggered family); `overall_att_*` (`ContinuousDiDResults`, where `att` and `acrt` are parallel response curves); and `avg_*` (`MultiPeriodDiDResults`). Every scalar treatment-effect result class covered by this naming contract additionally exposes the flat `att` / `se` / `conf_int` / `p_value` / `t_stat` names as read-only `@property` aliases for adapter / external-consumer compatibility (see PR for v3.3.3, motivated by `balance.interop.diff_diff`); `ContinuousDiDResults` further exposes `overall_*` aliases pointing at the ATT side. The native field is canonical for documentation, semantics, and computation — aliases are pure read-throughs and inherit the `safe_inference()` joint-NaN consistency contract automatically. Because aliases are `@property` descriptors (not dataclass fields), they do NOT appear in `dataclasses.fields()` or `dataclasses.asdict()` output, and assignment to an alias raises `AttributeError`; serializers and field-walkers continue to see only the canonical field set. +**Result-class field naming.** Headline scalar inference fields appear under one of four native naming patterns: flat `att` / `se` / `conf_int` / `p_value` / `t_stat` (`DiDResults`, `SyntheticDiDResults`, `TROPResults`, `TripleDifferenceResults`, `HeterogeneousAdoptionDiDResults`); `overall_*` (`CallawaySantAnnaResults` and the rest of the staggered family); `overall_att_*` (`ContinuousDiDResults`, where `att` and `acrt` are parallel response curves); and `avg_*` (`MultiPeriodDiDResults`). Result classes in the prefixed `overall_*` / `overall_att_*` / `avg_*` families additionally expose the flat `att` / `se` / `conf_int` / `p_value` / `t_stat` names as read-only `@property` aliases over their canonical fields, for adapter / external-consumer compatibility (see PR for v3.3.3, motivated by `balance.interop.diff_diff`). The flat-native classes (`DiDResults`, `SyntheticDiDResults`, `TROPResults`, `TripleDifferenceResults`, `HeterogeneousAdoptionDiDResults`) already carry these names as native dataclass fields and are unchanged by this contract. `ContinuousDiDResults` further exposes `overall_*` aliases pointing at the ATT side (so `result.overall_se` reads `result.overall_att_se`, etc.). The native field is canonical for documentation, semantics, and computation — aliases are pure read-throughs and inherit the `safe_inference()` joint-NaN consistency contract automatically. Because aliases are `@property` descriptors (not dataclass fields), they do NOT appear in `dataclasses.fields()` or `dataclasses.asdict()` output, and assignment to an alias raises `AttributeError`; serializers and field-walkers continue to see only the canonical field set. ## Table of Contents diff --git a/tests/test_result_aliases.py b/tests/test_result_aliases.py index 0f033458..bfbe8465 100644 --- a/tests/test_result_aliases.py +++ b/tests/test_result_aliases.py @@ -60,24 +60,26 @@ def _required_init_kwargs(cls, overrides): Lets us build a minimal result instance for alias-mechanic tests without having to enumerate every estimator-specific field. Sentinel values for untouched fields are deliberately uninteresting (empty containers, zeros) - -- they are not exercised by these tests.""" + -- they are not exercised by these tests. + """ + import dataclasses as _dc + kwargs = {} for f in fields(cls): if f.name in overrides: continue - # Skip fields with defaults; we only need to fill required positionals. - if f.default is not f.default_factory and f.default is not getattr( - __import__("dataclasses"), "MISSING", None - ): - # Field has a default value; let the dataclass apply it. + # A field is REQUIRED iff both default and default_factory are MISSING. + # When default_factory is set (e.g. list/dict factory), the dataclass + # will apply it; we must NOT pre-fill the field with a sentinel or we + # block the factory. + if f.default is not _dc.MISSING or f.default_factory is not _dc.MISSING: continue # Required field — supply a type-compatible sentinel. + # Order container annotations BEFORE the scalar `"float"` / `"int"` + # branches so that ``Tuple[float, float]`` is not mis-classified as + # scalar (``"float" in "Tuple[float, float]"`` is True). ann = str(f.type) - if "float" in ann: - kwargs[f.name] = 0.0 - elif "int" in ann: - kwargs[f.name] = 0 - elif "Tuple" in ann or "tuple" in ann: + if "Tuple" in ann or "tuple" in ann: kwargs[f.name] = (0.0, 0.0) elif "List" in ann or "list" in ann: kwargs[f.name] = [] @@ -87,6 +89,10 @@ def _required_init_kwargs(cls, overrides): kwargs[f.name] = pd.DataFrame() elif "ndarray" in ann or "np.ndarray" in ann: kwargs[f.name] = np.array([]) + elif "float" in ann: + kwargs[f.name] = 0.0 + elif "int" in ann: + kwargs[f.name] = 0 else: kwargs[f.name] = None kwargs.update(overrides) @@ -304,6 +310,15 @@ def test_aliases_are_read_only(cls, ovr): for name in ("att", "se", "conf_int", "p_value", "t_stat"): with pytest.raises(AttributeError): setattr(res, name, object()) + # ContinuousDiDResults also exposes overall_se / overall_conf_int / + # overall_p_value / overall_t_stat as read-only aliases over the + # ATT-side canonical fields (no parallel `overall_att` alias is needed + # because `overall_att_att` would be confusing; the flat `att` covers + # that one). These must also reject assignment. + if cls.__name__ == "ContinuousDiDResults": + for name in ("overall_se", "overall_conf_int", "overall_p_value", "overall_t_stat"): + with pytest.raises(AttributeError): + setattr(res, name, object()) @pytest.mark.parametrize( From 106c1a0c7390756cc9d5d78f74c7784839b4ca2c Mon Sep 17 00:00:00 2001 From: igerber Date: Thu, 14 May 2026 18:45:43 -0400 Subject: [PATCH 2/2] R6 (CI): add direct regression test for _required_init_kwargs helper CI AI review on #437 noted that the R2 helper fix is only exercised indirectly via the alias-construction tests. Because the affected result classes use list/dict default factories and the tuple-valued CI fields are overridden in test setup, a future regression in default_factory handling or tuple-vs-float dispatch could slip through. Added test_required_init_kwargs_handles_default_factory_and_tuple_dispatch that uses a tiny local dataclass with both a required Tuple[float, float] field and a default_factory=list field. Asserts: - the Tuple field gets a tuple sentinel, NOT a scalar 0.0 (locks the dispatch-order fix: Tuple/List/Dict/DataFrame/ndarray before float) - the default_factory field is OMITTED from kwargs so the factory runs at construction time (locks the MISSING-on-default_factory fix) - the instance constructs successfully and the factory produced an empty list (round-trip proof) --- tests/test_result_aliases.py | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/test_result_aliases.py b/tests/test_result_aliases.py index bfbe8465..9f91a9f6 100644 --- a/tests/test_result_aliases.py +++ b/tests/test_result_aliases.py @@ -99,6 +99,46 @@ def _required_init_kwargs(cls, overrides): return kwargs +def test_required_init_kwargs_handles_default_factory_and_tuple_dispatch(): + """Pin the two `_required_init_kwargs()` fixes from PR #437 R2 directly. + + The helper had two latent bugs masked by the specific fields exercised + by the alias tests: factory-only required fields were pre-filled (so + the factory never ran), and the type-dispatch checked ``"float"`` + before ``"Tuple"`` (so ``Tuple[float, float]`` annotations matched the + scalar branch). Both fixes are exercised here against a small local + dataclass so the contract stays pinned independent of production + result-dataclass shape. + """ + from dataclasses import dataclass, field + from typing import Tuple + + @dataclass + class _Probe: + # Required, must be filled with a tuple sentinel — NOT 0.0 — even + # though "float" appears in the annotation as a substring. + ci: Tuple[float, float] + # default_factory-backed: must be OMITTED from kwargs so the + # factory runs at construction time. + items: list = field(default_factory=list) + # default-valued: must also be omitted from kwargs. + x: float = 1.5 + + kwargs = _required_init_kwargs(_Probe, overrides={}) + assert kwargs == {"ci": (0.0, 0.0)}, ( + f"_required_init_kwargs() must (a) supply a tuple sentinel for " + f"Tuple[float, float] required fields (not a scalar 0.0), and " + f"(b) omit default_factory / default fields so the dataclass " + f"applies them at construction time. Got: {kwargs!r}" + ) + # Round-trip: instance construction must succeed and the factory + # must have produced an empty list (not been displaced by a sentinel). + inst = _Probe(**kwargs) + assert inst.ci == (0.0, 0.0) + assert inst.items == [] + assert inst.x == 1.5 + + def _assert_pattern_b_aliases(res, *, att, se, t_stat, p_value, conf_int): """Pattern B: 5 flat aliases mapping to the overall_* canonical fields.""" assert _alias_equal(res.att, att), f"att alias != overall_att ({res.att} vs {att})"