161161 "execution_count": null,
162162 "metadata": {},
163163 "outputs": [],
164- "source": "# Fit TROP with automatic tuning via LOOCV\ntrop_est = TROP(\n lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n n_bootstrap=50, # Reduced bootstrap replications for SE\n seed=42\n)\n\n# Note: TROP infers treatment periods from the treatment indicator column.\n# The 'treated' column should be an absorbing state (D=1 for all periods\n# during and after treatment starts).\n\n# For SDID comparison later, we keep post_periods for SDID\npost_periods = list(range(n_pre, n_pre + n_post))\n\nresults = trop_est.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n)\n\nprint(results.summary())"
164+ "source": [
165+ "# Fit TROP with automatic tuning via LOOCV\n",
166+ "trop_est = TROP(\n",
167+ " lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n",
168+ " lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n",
169+ " lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n",
170+ " n_bootstrap=50, # Reduced bootstrap replications for SE\n",
171+ " seed=42\n",
172+ ")\n",
173+ "\n",
174+ "# Note: TROP infers treatment periods from the treatment indicator column.\n",
175+ "# The 'treated' column should be an absorbing state (D=1 for all periods\n",
176+ "# during and after treatment starts).\n",
177+ "\n",
178+ "# For SDID comparison later, we keep post_periods for SDID\n",
179+ "post_periods = list(range(n_pre, n_pre + n_post))\n",
180+ "\n",
181+ "results = trop_est.fit(\n",
182+ " df,\n",
183+ " outcome='outcome',\n",
184+ " treatment='treated',\n",
185+ " unit='unit',\n",
186+ " time='period'\n",
187+ ")\n",
188+ "\n",
189+ "print(results.summary())"
190+ ]
165191 },
166192 {
167193 "cell_type": "code",
217243 "execution_count": null,
218244 "metadata": {},
219245 "outputs": [],
220- "source": "# Effect of different nuclear norm regularization levels\nprint(\"Effect of nuclear norm regularization (λ_nn):\")\nprint(\"=\"*65)\nprint(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\nprint(\"-\"*65)\n\nfor lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n trop_fixed = TROP(\n lambda_time_grid=[1.0], # Fixed\n lambda_unit_grid=[1.0], # Fixed\n lambda_nn_grid=[lambda_nn], # Vary this\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n )\n \n res = trop_fixed.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n bias = res.att - true_att\n print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
246+ "source": [
247+ "# Effect of different nuclear norm regularization levels\n",
248+ "print(\"Effect of nuclear norm regularization (λ_nn):\")\n",
249+ "print(\"=\"*65)\n",
250+ "print(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\n",
251+ "print(\"-\"*65)\n",
252+ "\n",
253+ "for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n",
254+ " trop_fixed = TROP(\n",
255+ " lambda_time_grid=[1.0], # Fixed\n",
256+ " lambda_unit_grid=[1.0], # Fixed\n",
257+ " lambda_nn_grid=[lambda_nn], # Vary this\n",
258+ " n_bootstrap=20, # Reduced for faster execution\n",
259+ " seed=42\n",
260+ " )\n",
261+ " \n",
262+ " res = trop_fixed.fit(\n",
263+ " df,\n",
264+ " outcome='outcome',\n",
265+ " treatment='treated',\n",
266+ " unit='unit',\n",
267+ " time='period'\n",
268+ " )\n",
269+ " \n",
270+ " bias = res.att - true_att\n",
271+ " print(f\"{lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\")"
272+ ]
221273 },
222274 {
223275 "cell_type": "markdown",
353405 "execution_count": null,
354406 "metadata": {},
355407 "outputs": [],
356- "source": "# SDID (no factor adjustment)\n# Note: SDID uses 'treat' (unit-level ever-treated indicator)\nsdid = SyntheticDiD(\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\n# SDID still uses post_periods parameter\nsdid_results = sdid.fit(\n df,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=post_periods\n)\n\n# TROP (with factor adjustment)\n# Note: TROP uses 'treated' (observation-level treatment indicator)\n# and infers treatment periods automatically\ntrop_est2 = TROP(\n lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\ntrop_results = trop_est2.fit(\n df,\n outcome='outcome',\n treatment='treated', # Observation-level indicator\n unit='unit',\n time='period'\n)\n\nprint(\"Comparison: SDID vs TROP\")\nprint(\"=\"*60)\nprint(f\"True ATT: {true_att:.4f}\")\nprint()\nprint(f\"Synthetic DiD (no factor adjustment):\")\nprint(f\" ATT: {sdid_results.att:.4f}\")\nprint(f\" SE: {sdid_results.se:.4f}\")\nprint(f\" Bias: {sdid_results.att - true_att:.4f}\")\nprint()\nprint(f\"TROP (with factor adjustment):\")\nprint(f\" ATT: {trop_results.att:.4f}\")\nprint(f\" SE: {trop_results.se:.4f}\")\nprint(f\" Bias: {trop_results.att - true_att:.4f}\")\nprint(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
408+ "source": [
409+ "# SDID (no factor adjustment)\n",
410+ "# Note: SDID uses 'treat' (unit-level ever-treated indicator)\n",
411+ "sdid = SyntheticDiD(\n",
412+ " n_bootstrap=50, # Reduced for faster execution\n",
413+ " seed=42\n",
414+ ")\n",
415+ "\n",
416+ "# SDID still uses post_periods parameter\n",
417+ "sdid_results = sdid.fit(\n",
418+ " df,\n",
419+ " outcome='outcome',\n",
420+ " treatment='treat', # Unit-level ever-treated indicator\n",
421+ " unit='unit',\n",
422+ " time='period',\n",
423+ " post_periods=post_periods\n",
424+ ")\n",
425+ "\n",
426+ "# TROP (with factor adjustment)\n",
427+ "# Note: TROP uses 'treated' (observation-level treatment indicator)\n",
428+ "# and infers treatment periods automatically\n",
429+ "trop_est2 = TROP(\n",
430+ " lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n",
431+ " n_bootstrap=50, # Reduced for faster execution\n",
432+ " seed=42\n",
433+ ")\n",
434+ "\n",
435+ "trop_results = trop_est2.fit(\n",
436+ " df,\n",
437+ " outcome='outcome',\n",
438+ " treatment='treated', # Observation-level indicator\n",
439+ " unit='unit',\n",
440+ " time='period'\n",
441+ ")\n",
442+ "\n",
443+ "print(\"Comparison: SDID vs TROP\")\n",
444+ "print(\"=\"*60)\n",
445+ "print(f\"True ATT: {true_att:.4f}\")\n",
446+ "print()\n",
447+ "print(f\"Synthetic DiD (no factor adjustment):\")\n",
448+ "print(f\" ATT: {sdid_results.att:.4f}\")\n",
449+ "print(f\" SE: {sdid_results.se:.4f}\")\n",
450+ "print(f\" Bias: {sdid_results.att - true_att:.4f}\")\n",
451+ "print()\n",
452+ "print(f\"TROP (with factor adjustment):\")\n",
453+ "print(f\" ATT: {trop_results.att:.4f}\")\n",
454+ "print(f\" SE: {trop_results.se:.4f}\")\n",
455+ "print(f\" Bias: {trop_results.att - true_att:.4f}\")\n",
456+ "print(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
457+ ]
357458 },
358459 {
359460 "cell_type": "markdown",
369470 "execution_count": null,
370471 "metadata": {},
371472 "outputs": [],
372- "source": "# Monte Carlo comparison (reduced for faster tutorial execution)\nn_sims = 5 # Reduced from 20 for faster validation\ntrop_estimates = []\nsdid_estimates = []\n\nprint(f\"Running {n_sims} simulations...\")\n\nfor sim in range(n_sims):\n # Generate new data using the library function\n # (includes both 'treated' and 'treat' columns)\n sim_data = generate_factor_data(\n n_units=50,\n n_pre=10,\n n_post=5,\n n_treated=10,\n n_factors=2,\n treatment_effect=2.0,\n factor_strength=1.5,\n noise_sd=0.5,\n seed=100 + sim\n )\n \n # TROP (uses observation-level 'treated')\n # Note: TROP infers treatment periods from the treatment indicator\n try:\n trop_m = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0],\n lambda_nn_grid=[0.1],\n n_bootstrap=10, \n seed=42 + sim\n )\n trop_res = trop_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n trop_estimates.append(trop_res.att)\n except Exception as e:\n print(f\"TROP failed on sim {sim}: {e}\")\n \n # SDID (uses unit-level 'treat')\n # Note: SDID still uses post_periods parameter\n try:\n sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n sdid_res = sdid_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=list(range(10, 15))\n )\n sdid_estimates.append(sdid_res.att)\n except Exception as e:\n print(f\"SDID failed on sim {sim}: {e}\")\n\nprint(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\nprint(\"=\"*60)\nprint(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\nprint(\"-\"*60)\n\nif trop_estimates:\n trop_mean = np.mean(trop_estimates)\n trop_bias = trop_mean - true_att\n trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n\nif sdid_estimates:\n sdid_mean = np.mean(sdid_estimates)\n sdid_bias = sdid_mean - true_att\n sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
473+ "source": [
474+ "# Monte Carlo comparison (reduced for faster tutorial execution)\n",
475+ "n_sims = 5 # Reduced from 20 for faster validation\n",
476+ "trop_estimates = []\n",
477+ "sdid_estimates = []\n",
478+ "\n",
479+ "print(f\"Running {n_sims} simulations...\")\n",
480+ "\n",
481+ "for sim in range(n_sims):\n",
482+ " # Generate new data using the library function\n",
483+ " # (includes both 'treated' and 'treat' columns)\n",
484+ " sim_data = generate_factor_data(\n",
485+ " n_units=50,\n",
486+ " n_pre=10,\n",
487+ " n_post=5,\n",
488+ " n_treated=10,\n",
489+ " n_factors=2,\n",
490+ " treatment_effect=2.0,\n",
491+ " factor_strength=1.5,\n",
492+ " noise_sd=0.5,\n",
493+ " seed=100 + sim\n",
494+ " )\n",
495+ " \n",
496+ " # TROP (uses observation-level 'treated')\n",
497+ " # Note: TROP infers treatment periods from the treatment indicator\n",
498+ " try:\n",
499+ " trop_m = TROP(\n",
500+ " lambda_time_grid=[1.0],\n",
501+ " lambda_unit_grid=[1.0],\n",
502+ " lambda_nn_grid=[0.1],\n",
503+ " n_bootstrap=10, \n",
504+ " seed=42 + sim\n",
505+ " )\n",
506+ " trop_res = trop_m.fit(\n",
507+ " sim_data,\n",
508+ " outcome='outcome',\n",
509+ " treatment='treated',\n",
510+ " unit='unit',\n",
511+ " time='period'\n",
512+ " )\n",
513+ " trop_estimates.append(trop_res.att)\n",
514+ " except Exception as e:\n",
515+ " print(f\"TROP failed on sim {sim}: {e}\")\n",
516+ " \n",
517+ " # SDID (uses unit-level 'treat')\n",
518+ " # Note: SDID still uses post_periods parameter\n",
519+ " try:\n",
520+ " sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n",
521+ " sdid_res = sdid_m.fit(\n",
522+ " sim_data,\n",
523+ " outcome='outcome',\n",
524+ " treatment='treat', # Unit-level ever-treated indicator\n",
525+ " unit='unit',\n",
526+ " time='period',\n",
527+ " post_periods=list(range(10, 15))\n",
528+ " )\n",
529+ " sdid_estimates.append(sdid_res.att)\n",
530+ " except Exception as e:\n",
531+ " print(f\"SDID failed on sim {sim}: {e}\")\n",
532+ "\n",
533+ "print(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\n",
534+ "print(\"=\"*60)\n",
535+ "print(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\n",
536+ "print(\"-\"*60)\n",
537+ "\n",
538+ "if trop_estimates:\n",
539+ " trop_mean = np.mean(trop_estimates)\n",
540+ " trop_bias = trop_mean - true_att\n",
541+ " trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n",
542+ " print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n",
543+ "\n",
544+ "if sdid_estimates:\n",
545+ " sdid_mean = np.mean(sdid_estimates)\n",
546+ " sdid_bias = sdid_mean - true_att\n",
547+ " sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n",
548+ " print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
549+ ]
373550 },
374551 {
375552 "cell_type": "code",
407584 "execution_count": null,
408585 "metadata": {},
409586 "outputs": [],
410- "source": "# One-liner estimation with default tuning grid\n# Note: TROP infers treatment periods from the treatment indicator\nquick_results = trop(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period',\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n)\n\nprint(f\"Quick estimation:\")\nprint(f\" ATT: {quick_results.att:.4f}\")\nprint(f\" SE: {quick_results.se:.4f}\")\nprint(f\" λ_time: {quick_results.lambda_time:.2f}\")\nprint(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\nprint(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\nprint(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
587+ "source": [
588+ "# One-liner estimation with default tuning grid\n",
589+ "# Note: TROP infers treatment periods from the treatment indicator\n",
590+ "quick_results = trop(\n",
591+ " df,\n",
592+ " outcome='outcome',\n",
593+ " treatment='treated',\n",
594+ " unit='unit',\n",
595+ " time='period',\n",
596+ " n_bootstrap=20, # Reduced for faster execution\n",
597+ " seed=42\n",
598+ ")\n",
599+ "\n",
600+ "print(f\"Quick estimation:\")\n",
601+ "print(f\" ATT: {quick_results.att:.4f}\")\n",
602+ "print(f\" SE: {quick_results.se:.4f}\")\n",
603+ "print(f\" λ_time: {quick_results.lambda_time:.2f}\")\n",
604+ "print(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\n",
605+ "print(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\n",
606+ "print(f\" Effective rank: {quick_results.effective_rank:.2f}\")"
607+ ]
411608 },
412609 {
413610 "cell_type": "markdown",
425622 "execution_count": null,
426623 "metadata": {},
427624 "outputs": [],
428- "source": "# Compare variance estimation methods\nprint(\"Variance estimation comparison:\")\nprint(\"=\"*50)\n\nfor method in ['bootstrap', 'jackknife']:\n trop_var = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0], \n lambda_nn_grid=[0.1],\n variance_method=method,\n n_bootstrap=30, # Reduced for faster execution\n seed=42\n )\n \n res = trop_var.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n print(f\"\\n{method.capitalize()}:\")\n print(f\" ATT: {res.att:.4f}\")\n print(f\" SE: {res.se:.4f}\")\n print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
625+ "source": [
626+ "# Compare variance estimation methods\n",
627+ "print(\"Variance estimation comparison:\")\n",
628+ "print(\"=\"*50)\n",
629+ "\n",
630+ "for method in ['bootstrap', 'jackknife']:\n",
631+ " trop_var = TROP(\n",
632+ " lambda_time_grid=[1.0],\n",
633+ " lambda_unit_grid=[1.0], \n",
634+ " lambda_nn_grid=[0.1],\n",
635+ " variance_method=method,\n",
636+ " n_bootstrap=30, # Reduced for faster execution\n",
637+ " seed=42\n",
638+ " )\n",
639+ " \n",
640+ " res = trop_var.fit(\n",
641+ " df,\n",
642+ " outcome='outcome',\n",
643+ " treatment='treated',\n",
644+ " unit='unit',\n",
645+ " time='period'\n",
646+ " )\n",
647+ " \n",
648+ " print(f\"\\n{method.capitalize()}:\")\n",
649+ " print(f\" ATT: {res.att:.4f}\")\n",
650+ " print(f\" SE: {res.se:.4f}\")\n",
651+ " print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\")"
652+ ]
429653 },
430654 {
431655 "cell_type": "markdown",
508732 },
509733 "nbformat": 4,
510734 "nbformat_minor": 4
511- }
735+ }
0 commit comments