|
25 | 25 | diff_diff.honest_did - Sensitivity analysis for parallel trends violations |
26 | 26 | """ |
27 | 27 |
|
| 28 | +import warnings |
28 | 29 | from dataclasses import dataclass, field |
29 | 30 | from typing import Any, Dict, List, Literal, Optional, Tuple, Union |
30 | 31 |
|
@@ -88,6 +89,89 @@ def _compute_nis_acceptance_prob( |
88 | 89 | return float(np.clip(accept_prob, 0.0, 1.0)) |
89 | 90 |
|
90 | 91 |
|
| 92 | +def _coerce_relative_times_from_reference( |
| 93 | + estimated_pre_periods: List[Any], |
| 94 | + reference_period: Any, |
| 95 | +) -> Optional[np.ndarray]: |
| 96 | + """ |
| 97 | + Convert ``estimated_pre_periods`` to Roth-style relative-time offsets |
| 98 | + from a numeric / Period / datetime ``reference_period``. |
| 99 | +
|
| 100 | + Returns ``np.ndarray`` of float relative times when conversion succeeds, |
| 101 | + or ``None`` when the labels are genuinely non-numeric / unordered |
| 102 | + (string period IDs, categoricals, etc.). In the ``None`` case, the |
| 103 | + caller's downstream linear-violation weight construction falls back to |
| 104 | + the legacy count-based normalized direction — the reported MDV is then |
| 105 | + NOT in Roth's γ units. We emit a ``UserWarning`` so the user knows |
| 106 | + the γ-unit contract did not hold and can re-fit with numeric labels. |
| 107 | +
|
| 108 | + Supported regimes: |
| 109 | +
|
| 110 | + - Numeric (``int`` / ``float`` / ``np.int64``): direct ``float()`` |
| 111 | + coercion gives the correct relative offset. |
| 112 | + - ``pandas.Period`` / ``pandas.Timestamp`` / ``np.datetime64``: period |
| 113 | + arithmetic returns an offset / ``Timedelta`` that we coerce to a |
| 114 | + float via ``.n`` (for Period frequencies) or ``.days`` (for |
| 115 | + Timedelta-like). The result is in units of the reference's |
| 116 | + frequency for Period, days for Timestamp / datetime64 — the linear |
| 117 | + γ-units scale is per-unit-of-frequency. |
| 118 | + - Anything else (string period IDs, categoricals with no ordering, |
| 119 | + mixed types): returns ``None`` with a warning. |
| 120 | + """ |
| 121 | + # Path 1: direct float coercion (numeric scalars). |
| 122 | + try: |
| 123 | + ref_float = float(reference_period) |
| 124 | + return np.asarray( |
| 125 | + [float(p) - ref_float for p in estimated_pre_periods], |
| 126 | + dtype=float, |
| 127 | + ) |
| 128 | + except (TypeError, ValueError): |
| 129 | + pass |
| 130 | + |
| 131 | + # Path 2: pandas.Period / pandas.Timestamp / datetime64 — try |
| 132 | + # subtraction-based offset arithmetic. |
| 133 | + try: |
| 134 | + diffs = [p - reference_period for p in estimated_pre_periods] |
| 135 | + floats: List[float] = [] |
| 136 | + for d in diffs: |
| 137 | + # pandas.tseries.offsets.* or pandas.Period offset — has `.n`. |
| 138 | + n_attr = getattr(d, "n", None) |
| 139 | + if n_attr is not None: |
| 140 | + floats.append(float(n_attr)) |
| 141 | + continue |
| 142 | + # pandas.Timedelta / numpy.timedelta64 — convert to days. |
| 143 | + days_attr = getattr(d, "days", None) |
| 144 | + if days_attr is not None: |
| 145 | + floats.append(float(days_attr)) |
| 146 | + continue |
| 147 | + # Bare numpy.timedelta64 fallback. |
| 148 | + try: |
| 149 | + floats.append(float(d / np.timedelta64(1, "D"))) |
| 150 | + continue |
| 151 | + except (TypeError, ValueError): |
| 152 | + raise TypeError( |
| 153 | + f"cannot coerce difference {d!r} of type {type(d).__name__} " |
| 154 | + "to float days/periods" |
| 155 | + ) |
| 156 | + return np.asarray(floats, dtype=float) |
| 157 | + except (TypeError, ValueError): |
| 158 | + pass |
| 159 | + |
| 160 | + # Path 3: genuinely non-numeric labels — warn and fall back to legacy. |
| 161 | + warnings.warn( |
| 162 | + f"PreTrendsPower: reference_period {reference_period!r} (type " |
| 163 | + f"{type(reference_period).__name__}) is not numeric or datetime-like, " |
| 164 | + "so per-period relative times cannot be derived. Linear-violation " |
| 165 | + "weights will use the legacy count-based [n_pre-1, ..., 0]/||·||_2 " |
| 166 | + "direction; the reported MDV is NOT in Roth (2022) γ units. Re-fit " |
| 167 | + "with numeric period labels (int year, pandas.Period, datetime) to " |
| 168 | + "obtain γ-unit MDV.", |
| 169 | + UserWarning, |
| 170 | + stacklevel=3, |
| 171 | + ) |
| 172 | + return None |
| 173 | + |
| 174 | + |
91 | 175 | def _extract_event_study_vcov_subblock( |
92 | 176 | results: Any, |
93 | 177 | pre_periods: List[int], |
@@ -914,27 +998,27 @@ def _extract_pre_period_params( |
914 | 998 | # For MultiPeriodDiDResults, period identifiers are generic |
915 | 999 | # (often calendar years, sometimes pre-shifted relative times). |
916 | 1000 | # Roth's δ_t = γ·t convention needs RELATIVE offsets from the |
917 | | - # treatment / reference period. Derive them from |
918 | | - # `results.reference_period` when numeric: |
919 | | - # relative_times = estimated_pre_periods - reference_period |
920 | | - # If `reference_period` is None or non-numeric (string, categorical), |
921 | | - # return None so `_get_violation_weights('linear')` falls back to |
922 | | - # the legacy count-based [n_pre-1, ..., 0] / ||·||_2 direction |
923 | | - # (the pre-PR-B shipped behavior; preserves backwards-compat for |
924 | | - # MPD callers that don't expose a numeric reference period). |
| 1001 | + # treatment / reference period. Three label-type regimes: |
| 1002 | + # |
| 1003 | + # 1. Numeric (int / float / np.int64) — direct float() coercion |
| 1004 | + # gives the correct relative offset. |
| 1005 | + # 2. pandas.Period — period arithmetic works on the Period |
| 1006 | + # object directly (``p - ref`` returns ordinal-difference); |
| 1007 | + # we cast via the `n` attribute on the resulting offset for |
| 1008 | + # sub-period frequencies. Datetime-like labels (Timestamp, |
| 1009 | + # np.datetime64) are caught the same way and converted to |
| 1010 | + # days via numpy timedelta semantics. |
| 1011 | + # 3. Genuinely non-numeric / unordered labels (string period |
| 1012 | + # IDs, categoricals without a ranking) — emit an explicit |
| 1013 | + # UserWarning and fall back to the legacy count-based |
| 1014 | + # [n_pre-1, ..., 0] / ||·||_2 normalized direction. The |
| 1015 | + # reported MDV under this fallback is NOT in Roth's γ |
| 1016 | + # units; users on non-numeric labels who need γ-unit MDV |
| 1017 | + # should re-fit with numeric period labels. |
925 | 1018 | ref = getattr(results, "reference_period", None) |
926 | 1019 | relative_times: Optional[np.ndarray] = None |
927 | 1020 | if ref is not None: |
928 | | - try: |
929 | | - ref_float = float(ref) |
930 | | - relative_times = np.asarray( |
931 | | - [float(p) - ref_float for p in estimated_pre_periods], |
932 | | - dtype=float, |
933 | | - ) |
934 | | - except (TypeError, ValueError): |
935 | | - # Non-numeric labels (string period IDs, etc.) — fall |
936 | | - # back to legacy normalized linear direction. |
937 | | - relative_times = None |
| 1021 | + relative_times = _coerce_relative_times_from_reference(estimated_pre_periods, ref) |
938 | 1022 | return effects, ses, vcov, n_pre, relative_times, covariance_source |
939 | 1023 |
|
940 | 1024 | # Try CallawaySantAnnaResults |
|
0 commit comments