diff --git a/notebooks/AdultIncome_LongRun_Alpha_Tracking.ipynb b/notebooks/AdultIncome_LongRun_Alpha_Tracking.ipynb index f63a90e..90e8dc7 100644 --- a/notebooks/AdultIncome_LongRun_Alpha_Tracking.ipynb +++ b/notebooks/AdultIncome_LongRun_Alpha_Tracking.ipynb @@ -25,9 +25,15 @@ { "cell_type": "code", "source": [ - "from pathlib import Path\n\n", + "from pathlib import Path\n", + "import os\n", + "import platform\n", + "\n", "RANDOM_STATE = 42\n", "\n", + "CPU_THREADS = max(1, (os.cpu_count() or 1) - 1)\n", + "IS_APPLE_SILICON = platform.system() == 'Darwin' and platform.machine() == 'arm64'\n", + "\n", "params = {\n", " 'max_depth': 4,\n", " 'eta': 0.05,\n", @@ -39,14 +45,18 @@ " 'gamma': 1.0,\n", " 'objective': 'binary:logistic',\n", " 'eval_metric': 'logloss',\n", + " # Apple Silicon (M1/M2/M3) benefits from CPU histogram + multithreading.\n", + " # XGBoost GPU backends are not used on macOS ARM, so tune CPU threads instead.\n", " 'tree_method': 'hist',\n", + " 'nthread': CPU_THREADS,\n", " 'seed': RANDOM_STATE,\n", "}\n", "\n", "TOTAL_ROUNDS = 1200\n", - "CHUNK_SIZE = 50\n", + "CHUNK_SIZE = 25\n", "N_STEPS = TOTAL_ROUNDS // CHUNK_SIZE\n", - "CHECKPOINT_EVERY_STEPS = 2\n", + "CHECKPOINT_EVERY_STEPS = 1\n", + "RESUME_FROM_CHECKPOINT = True\n", "\n", "in_colab = False\n", "try:\n", @@ -74,6 +84,8 @@ "\n", "print(f'TOTAL_ROUNDS={TOTAL_ROUNDS}, CHUNK_SIZE={CHUNK_SIZE}, N_STEPS={N_STEPS}')\n", "print(f'CHECKPOINT_EVERY_STEPS={CHECKPOINT_EVERY_STEPS}')\n", + "print(f'RESUME_FROM_CHECKPOINT={RESUME_FROM_CHECKPOINT}')\n", + "print(f'CPU_THREADS={CPU_THREADS} | IS_APPLE_SILICON={IS_APPLE_SILICON}')\n", "print(f'Results CSV path: {results_path_csv}')\n" ], "metadata": { @@ -992,51 +1004,95 @@ " num_boost_round=current_round,\n", " multiclass='error',\n", " )\n", - " return layer\n\n", + " return layer\n", + "\n", "def compute_alpha_from_layer(layer):\n", " watcher = ww.WeightWatcher(model=layer)\n", " df = watcher.analyze(randomize=True, detX=True)\n", - " return float(df['alpha'].iloc[0]), df\n\n", + " return float(df['alpha'].iloc[0]), df\n", + "\n", "rows = []\n", "bst = None\n", - "W_LIST = ['W1', 'W2', 'W7', 'W8', 'W9', 'W10']\n\n", - "for step in range(1, N_STEPS + 1):\n", + "start_step = 1\n", + "W_LIST = ['W1', 'W2', 'W7', 'W8', 'W9', 'W10']\n", + "\n", + "if RESUME_FROM_CHECKPOINT and results_path_csv.exists() and latest_model_path.exists():\n", + " prior_df = pd.read_csv(results_path_csv)\n", + " if not prior_df.empty:\n", + " last_round = int(prior_df['boosting_round'].max())\n", + " if (last_round % CHUNK_SIZE) != 0:\n", + " raise ValueError(\n", + " f'Cannot resume: last_round={last_round} is not divisible by CHUNK_SIZE={CHUNK_SIZE}.'\n", + " )\n", + " if last_round >= TOTAL_ROUNDS:\n", + " print(\n", + " f'[RESUME] Existing checkpoint already reached TOTAL_ROUNDS ({last_round}/{TOTAL_ROUNDS}). '\n", + " 'No additional training needed.'\n", + " )\n", + " results_df = prior_df.sort_values('boosting_round').reset_index(drop=True)\n", + " rows = results_df.to_dict('records')\n", + " start_step = N_STEPS + 1\n", + " else:\n", + " bst = xgb.Booster()\n", + " bst.load_model(str(latest_model_path))\n", + " results_df = prior_df.sort_values('boosting_round').reset_index(drop=True)\n", + " rows = results_df.to_dict('records')\n", + " start_step = (last_round // CHUNK_SIZE) + 1\n", + " print(\n", + " f'[RESUME] Loaded checkpoint at round {last_round}. '\n", + " f'Resuming from round {start_step * CHUNK_SIZE}.'\n", + " )\n", + "\n", + "for step in range(start_step, N_STEPS + 1):\n", " bst = xgb.train(\n", " params=params,\n", " dtrain=dtrain,\n", " num_boost_round=CHUNK_SIZE,\n", " xgb_model=bst,\n", " verbose_eval=False,\n", - " )\n\n", - " current_round = step * CHUNK_SIZE\n\n", + " )\n", + "\n", + " current_round = step * CHUNK_SIZE\n", + "\n", " y_prob = bst.predict(dtest)\n", " y_pred = (y_prob >= 0.5).astype(int)\n", - " test_acc = accuracy_score(y_test_np, y_pred)\n\n", + " test_acc = accuracy_score(y_test_np, y_pred)\n", + "\n", " row = {\n", " 'boosting_round': current_round,\n", " 'test_accuracy': test_acc,\n", - " }\n\n", + " }\n", + "\n", " alpha_msg = []\n", " for W_name in W_LIST:\n", " layer = build_layer_for_W(bst, W_name, current_round)\n", " alpha_value, _ = compute_alpha_from_layer(layer)\n", " row[f'alpha_{W_name}'] = alpha_value\n", - " alpha_msg.append(f\"alpha_{W_name}={alpha_value:.3f}\")\n\n", + " alpha_msg.append(f\"alpha_{W_name}={alpha_value:.3f}\")\n", + "\n", " rows.append(row)\n", - " results_df = pd.DataFrame(rows)\n\n", + " results_df = pd.DataFrame(rows)\n", + "\n", " should_checkpoint = (step % CHECKPOINT_EVERY_STEPS == 0) or (step == N_STEPS)\n", " if should_checkpoint:\n", " results_df.to_csv(results_path_csv, index=False)\n", " try:\n", " results_df.to_feather(results_path_feather)\n", " except Exception as e:\n", - " print(f'[WARN] Feather save skipped: {e}')\n\n", + " print(f'[WARN] Feather save skipped: {e}')\n", + "\n", " model_path = models_dir / f'adult_income_longrun_round_{current_round}.json'\n", " bst.save_model(str(model_path))\n", - " bst.save_model(str(latest_model_path))\n\n", + " bst.save_model(str(latest_model_path))\n", + "\n", " print(f'[CHECKPOINT] saved metrics -> {results_path_csv}')\n", - " print(f'[CHECKPOINT] saved model -> {model_path}')\n\n", - " print(f\"Round {current_round:4d} | acc={test_acc:.4f} | \" + ', '.join(alpha_msg))\n\n", + " print(f'[CHECKPOINT] saved model -> {model_path}')\n", + "\n", + " print(f\"Round {current_round:4d} | acc={test_acc:.4f} | \" + ', '.join(alpha_msg))\n", + "\n", + "if 'results_df' not in locals():\n", + " results_df = pd.DataFrame(rows)\n", + "\n", "results_df.tail()\n" ], "id": "wDfxKYkYQ7Aa"