@@ -1854,6 +1854,48 @@ def _significance_phrase(p: Optional[float], alpha: float) -> str:
18541854 return "the confidence interval includes zero; the data are consistent with no effect"
18551855
18561856
1857+ def _smallest_failing_grid_m (sens : Dict [str , Any ]) -> Optional [float ]:
1858+ """If the smallest evaluated M on the HonestDiD sensitivity grid
1859+ already has the robust CI including zero, return that M. Returns
1860+ ``None`` when the grid is missing or when the smallest evaluated
1861+ point is still robust — in the latter case ``breakdown_M`` is an
1862+ interpolated threshold between grid points, not a statement about
1863+ the smallest grid point itself.
1864+
1865+ Matches the twin helper in ``diagnostic_report.py``; keep the two
1866+ in sync for cross-surface parity.
1867+ """
1868+ grid_points = sens .get ("grid" ) or []
1869+ sorted_grid = sorted (
1870+ (p for p in grid_points if isinstance (p .get ("M" ), (int , float ))),
1871+ key = lambda p : p ["M" ],
1872+ )
1873+ if not sorted_grid :
1874+ return None
1875+ smallest = sorted_grid [0 ]
1876+ if not smallest .get ("robust_to_zero" , True ):
1877+ return float (smallest ["M" ])
1878+ return None
1879+
1880+
1881+ def _sentence_first_upper (text : str ) -> str :
1882+ """Uppercase only the first character of ``text``, preserving all
1883+ other casing. Unlike ``str.capitalize()``, which lowercases every
1884+ character after the first, this keeps user-supplied abbreviations
1885+ and proper nouns intact.
1886+
1887+ Examples
1888+ --------
1889+ >>> _sentence_first_upper("the NJ minimum-wage increase")
1890+ 'The NJ minimum-wage increase'
1891+ >>> _sentence_first_upper("Castle Doctrine law adoption")
1892+ 'Castle Doctrine law adoption'
1893+ """
1894+ if not text :
1895+ return text
1896+ return text [0 ].upper () + text [1 :]
1897+
1898+
18571899def _direction_verb (effect : float , outcome_direction : Optional [str ]) -> str :
18581900 """Return a direction-aware verb for the headline sentence.
18591901
@@ -1929,7 +1971,16 @@ def _render_headline_sentence(schema: Dict[str, Any]) -> str:
19291971 # is not actually available.
19301972 ci_str = " (inference unavailable: confidence interval is undefined for this fit)"
19311973 by_clause = f" by { magnitude } " if effect != 0 else ""
1932- return f"{ treatment .capitalize ()} { verb } { outcome } { by_clause } { ci_str } ."
1974+ # Round-1 BR/DR canonical-validation (2026-04-19): Python's
1975+ # ``str.capitalize()`` lowercases everything except the first
1976+ # character, so ``"the NJ minimum-wage increase".capitalize()``
1977+ # returns ``"The nj minimum-wage increase"`` — flattening the
1978+ # ``NJ`` abbreviation. Real canonical datasets (Card-Krueger,
1979+ # Castle Doctrine) carry proper-noun / acronym tokens in the
1980+ # user-supplied ``treatment_label``, so preserve user casing and
1981+ # only ensure the first character is uppercase.
1982+ treatment_sentence = _sentence_first_upper (treatment )
1983+ return f"{ treatment_sentence } { verb } { outcome } { by_clause } { ci_str } ."
19331984
19341985
19351986def _render_summary (schema : Dict [str , Any ]) -> str :
@@ -2088,11 +2139,33 @@ def _render_summary(schema: Dict[str, Any]) -> str:
20882139 f"pre-period variation."
20892140 )
20902141 elif isinstance (bkd , (int , float )):
2091- sentences .append (
2092- f"HonestDiD: the result is fragile — the confidence interval "
2093- f"includes zero once violations reach { bkd :.2g} x the "
2094- f"pre-period variation."
2095- )
2142+ # Round-1 BR/DR canonical-validation (2026-04-19) then
2143+ # tightened per CI review on PR #341 R1:
2144+ # ``breakdown_M`` is the smallest M at which the robust
2145+ # CI includes zero (interpolated between grid points) —
2146+ # not a claim about any specific grid point. Earlier fix
2147+ # keyed off ``bkd <= 0.05`` which incorrectly asserted
2148+ # "smallest grid point fails" even for grids that start
2149+ # at M=0 where the smallest evaluated point is still
2150+ # robust (e.g., grid=[0, 0.25, ...] with bkd=0.03). The
2151+ # "smallest grid point" wording is only accurate when
2152+ # the smallest evaluated M on the grid itself fails
2153+ # (``robust_to_zero == False``); otherwise fall through
2154+ # to the numeric multiplier.
2155+ smallest_failed_m = _smallest_failing_grid_m (sens )
2156+ if smallest_failed_m is not None :
2157+ sentences .append (
2158+ "HonestDiD: the result is fragile — the confidence "
2159+ "interval includes zero even at the smallest M "
2160+ f"evaluated on the sensitivity grid (M = "
2161+ f"{ smallest_failed_m :.2g} )."
2162+ )
2163+ else :
2164+ sentences .append (
2165+ f"HonestDiD: the result is fragile — the confidence "
2166+ f"interval includes zero once violations reach { bkd :.2g} x "
2167+ f"the pre-period variation."
2168+ )
20962169
20972170 # Sample sentence. For fits with a dynamic comparison set (CS /
20982171 # ContinuousDiD / StaggeredTripleDiff / EfficientDiD /
0 commit comments