Skip to content

Commit 335029a

Browse files
committed
Address code review feedback for PR #68
- Remove unused imports (hashlib, os, Optional) from datasets.py - Clear notebook outputs for cleaner version control - Improve seed comments for better clarity on reproducibility
1 parent e9b297e commit 335029a

2 files changed

Lines changed: 42 additions & 10 deletions

File tree

diff_diff/datasets.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88
for subsequent use.
99
"""
1010

11-
import hashlib
12-
import os
1311
from io import StringIO
1412
from pathlib import Path
15-
from typing import Dict, Optional
13+
from typing import Dict
1614
from urllib.error import HTTPError, URLError
1715
from urllib.request import urlopen
1816

@@ -162,7 +160,7 @@ def _construct_card_krueger_data() -> pd.DataFrame:
162160
Uses aggregated data that preserves the key DiD estimates.
163161
"""
164162
# Representative sample based on published summary statistics
165-
np.random.seed(1994) # Year of publication
163+
np.random.seed(1994) # Card-Krueger publication year, for reproducibility
166164

167165
stores = []
168166
store_id = 1
@@ -307,7 +305,7 @@ def _construct_castle_doctrine_data() -> pd.DataFrame:
307305
308306
This is a fallback when the online source is unavailable.
309307
"""
310-
np.random.seed(2013) # Year of Cheng-Hoekstra publication
308+
np.random.seed(2013) # Cheng-Hoekstra publication year, for reproducibility
311309

312310
# States and their Castle Doctrine adoption years
313311
# 0 = never adopted during the study period
@@ -456,7 +454,7 @@ def _construct_divorce_laws_data() -> pd.DataFrame:
456454
457455
This is a fallback when the online source is unavailable.
458456
"""
459-
np.random.seed(2006) # Year of Stevenson-Wolfers
457+
np.random.seed(2006) # Stevenson-Wolfers publication year, for reproducibility
460458

461459
# State adoption years for unilateral divorce (from Wolfers 2006)
462460
# 0 = never adopted or adopted before 1968
@@ -603,7 +601,7 @@ def _construct_mpdta_data() -> pd.DataFrame:
603601
604602
This replicates the simulated dataset used in Callaway-Sant'Anna tutorials.
605603
"""
606-
np.random.seed(2021) # Year of CS publication
604+
np.random.seed(2021) # Callaway-Sant'Anna publication year, for reproducibility
607605

608606
n_counties = 500
609607
years = [2003, 2004, 2005, 2006, 2007]

docs/tutorials/09_real_world_examples.ipynb

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,24 @@
464464
"id": "cell-22",
465465
"metadata": {},
466466
"outputs": [],
467-
"source": "# Callaway-Sant'Anna estimation\ncs = CallawaySantAnna(\n control_group='never_treated',\n n_bootstrap=199,\n seed=42\n)\n\nresults_cs = cs.fit(\n castle,\n outcome='homicide_rate',\n unit='state',\n time='year',\n first_treat='first_treat'\n)\n\nprint(results_cs.summary())"
467+
"source": [
468+
"# Callaway-Sant'Anna estimation\n",
469+
"cs = CallawaySantAnna(\n",
470+
" control_group='never_treated',\n",
471+
" n_bootstrap=199,\n",
472+
" seed=42\n",
473+
")\n",
474+
"\n",
475+
"results_cs = cs.fit(\n",
476+
" castle,\n",
477+
" outcome='homicide_rate',\n",
478+
" unit='state',\n",
479+
" time='year',\n",
480+
" first_treat='first_treat'\n",
481+
")\n",
482+
"\n",
483+
"print(results_cs.summary())"
484+
]
468485
},
469486
{
470487
"cell_type": "code",
@@ -647,7 +664,24 @@
647664
"id": "cell-32",
648665
"metadata": {},
649666
"outputs": [],
650-
"source": "# Callaway-Sant'Anna estimation\ncs_divorce = CallawaySantAnna(\n control_group='never_treated',\n n_bootstrap=199,\n seed=42\n)\n\nresults_divorce = cs_divorce.fit(\n divorce,\n outcome='divorce_rate',\n unit='state',\n time='year',\n first_treat='first_treat'\n)\n\nprint(results_divorce.summary())"
667+
"source": [
668+
"# Callaway-Sant'Anna estimation\n",
669+
"cs_divorce = CallawaySantAnna(\n",
670+
" control_group='never_treated',\n",
671+
" n_bootstrap=199,\n",
672+
" seed=42\n",
673+
")\n",
674+
"\n",
675+
"results_divorce = cs_divorce.fit(\n",
676+
" divorce,\n",
677+
" outcome='divorce_rate',\n",
678+
" unit='state',\n",
679+
" time='year',\n",
680+
" first_treat='first_treat'\n",
681+
")\n",
682+
"\n",
683+
"print(results_divorce.summary())"
684+
]
651685
},
652686
{
653687
"cell_type": "code",
@@ -791,4 +825,4 @@
791825
},
792826
"nbformat": 4,
793827
"nbformat_minor": 5
794-
}
828+
}

0 commit comments

Comments
 (0)