|
153 | 153 | "### Load the Config.yml file that contains information that is used across this pipeline" |
154 | 154 | ] |
155 | 155 | }, |
| 156 | + { |
| 157 | + "cell_type": "code", |
| 158 | + "execution_count": null, |
| 159 | + "metadata": { |
| 160 | + "tags": [] |
| 161 | + }, |
| 162 | + "outputs": [], |
| 163 | + "source": [] |
| 164 | + }, |
| 165 | + { |
| 166 | + "cell_type": "code", |
| 167 | + "execution_count": null, |
| 168 | + "metadata": { |
| 169 | + "tags": [] |
| 170 | + }, |
| 171 | + "outputs": [], |
| 172 | + "source": [ |
| 173 | + "from glob import glob\n", |
| 174 | + "from utils import load_config\n", |
| 175 | + "\n", |
| 176 | + "s3 = boto3.client('s3')\n", |
| 177 | + "\n", |
| 178 | + "config = load_config('config.yml')\n", |
| 179 | + "\n", |
| 180 | + "source_dir = config['scripts']['source_dir']\n", |
| 181 | + "bucket = config['aws']['s3_bucket']\n", |
| 182 | + "prefix = config['aws']['s3_prefix']\n", |
| 183 | + "\n", |
| 184 | + "files = glob(os.path.join(source_dir, \"*.py\")) + glob(os.path.join(source_dir, \"*.txt\"))\n", |
| 185 | + "\n", |
| 186 | + "for file in files:\n", |
| 187 | + " s3.upload_file(file, bucket, f\"{prefix}/{file}\")\n", |
| 188 | + " print(file, bucket, f\"{prefix}/{file}\")" |
| 189 | + ] |
| 190 | + }, |
156 | 191 | { |
157 | 192 | "cell_type": "code", |
158 | 193 | "execution_count": null, |
|
176 | 211 | "## initialize the sagemaker session, region, role bucket and pipeline session\n", |
177 | 212 | "session = sagemaker.session.Session()\n", |
178 | 213 | "region = session.boto_region_name\n", |
179 | | - "pipeline_session = PipelineSession()\n", |
| 214 | + "pipeline_session = PipelineSession(default_bucket=config['aws']['s3_bucket'])\n", |
180 | 215 | "ci = boto3.client('sts').get_caller_identity()\n", |
181 | 216 | "\n", |
182 | 217 | "role_name = config['aws']['sagemaker_execution_role_name']\n", |
|
229 | 264 | "# query for the training job, write it to query_training.py\n", |
230 | 265 | "fpath: str = os.path.join(config['scripts']['source_dir'], config['scripts']['query'])\n", |
231 | 266 | "logger.info(f\"writing training query to {fpath}\")\n", |
232 | | - "Path(fpath).write_text(f\"TRAINING_DATA_QUERY=\\\"\\\"\\\"{config['training_step']['query']}\\\"\\\"\\\"\")\n", |
| 267 | + "\n", |
| 268 | + "q = f\"\"\"\n", |
| 269 | + "TRAINING_TRUE_QUERY=\\\"\\\"\\\"{config['training_step']['query_true']}\\\"\\\"\\\"\n", |
| 270 | + "\\n\n", |
| 271 | + "TRAINING_NON_TRUE_QUERY=\\\"\\\"\\\"{config['training_step']['query_non_true']}\\\"\\\"\\\"\n", |
| 272 | + "\"\"\"\n", |
| 273 | + "\n", |
| 274 | + "Path(fpath).write_text(q)\n", |
| 275 | + "\n", |
233 | 276 | "\n", |
234 | 277 | "# approval status for trained model\n", |
235 | 278 | "model_approval_status = ParameterString(\n", |
|
312 | 355 | "# A managed processor comes with a preconfigured container, so only specifying version is required.\n", |
313 | 356 | "est_cls = sagemaker.sklearn.estimator.SKLearn\n", |
314 | 357 | "\n", |
| 358 | + "nw_cfg = config['aws']['network_config']\n", |
| 359 | + "\n", |
| 360 | + "network_config = sagemaker.network.NetworkConfig(\n", |
| 361 | + " enable_network_isolation=nw_cfg['enable_network_isolation'],\n", |
| 362 | + " security_group_ids=nw_cfg['security_group_ids'], \n", |
| 363 | + " subnets=nw_cfg['subnets']\n", |
| 364 | + ")\n", |
| 365 | + "\n", |
315 | 366 | "sklearn_processor = FrameworkProcessor(\n", |
316 | 367 | " estimator_cls=est_cls,\n", |
317 | 368 | " framework_version=config['training_step']['sklearn_framework_version'],\n", |
|
320 | 371 | " instance_count=config['data_processing_step']['instance_count'],\n", |
321 | 372 | " tags=config['data_processing_step']['tags'], \n", |
322 | 373 | " sagemaker_session=pipeline_session,\n", |
323 | | - " base_job_name=config['pipeline']['base_job_name'], )\n", |
| 374 | + " base_job_name=config['pipeline']['base_job_name'], \n", |
| 375 | + " network_config=network_config\n", |
| 376 | + ")\n", |
324 | 377 | "\n", |
325 | 378 | "outputs_preprocessor = [\n", |
326 | 379 | " ProcessingOutput(\n", |
|
435 | 488 | " \"features\": config['training_step']['training_features'],\n", |
436 | 489 | " \"target\": config['training_step']['training_target'],\n", |
437 | 490 | " },\n", |
438 | | - " tags=config['training_step']['tags']\n", |
| 491 | + " tags=config['training_step']['tags'],\n", |
| 492 | + " output_path=f\"s3://{bucket}/{prefix}\",\n", |
439 | 493 | ")\n", |
440 | 494 | "\n", |
441 | 495 | "# Create Hyperparameter tuner object. Ranges from https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html\n", |
|
538 | 592 | " )\n", |
539 | 593 | " )\n", |
540 | 594 | " ],\n", |
541 | | - " code = config['scripts']['evaluation'],\n", |
| 595 | + " code = f\"s3://{bucket}/{prefix}/{config['scripts']['evaluation']}\",\n", |
542 | 596 | " property_files=[evaluation_report],\n", |
543 | 597 | " job_arguments=[\n", |
544 | 598 | " \"--target\", target_parameter,\n", |
|
559 | 613 | "The model is registered with the model Registry with approval status set to PendingManualApproval, this means the model cannot be deployed on a SageMaker Endpoint unless its status in the registry is changed to Approved manually via the SageMaker console, programmatically or through a Lambda function." |
560 | 614 | ] |
561 | 615 | }, |
| 616 | + { |
| 617 | + "cell_type": "code", |
| 618 | + "execution_count": null, |
| 619 | + "metadata": {}, |
| 620 | + "outputs": [], |
| 621 | + "source": [] |
| 622 | + }, |
562 | 623 | { |
563 | 624 | "cell_type": "code", |
564 | 625 | "execution_count": null, |
|
704 | 765 | " step_preprocess_data, \n", |
705 | 766 | " step_tuning, \n", |
706 | 767 | " step_evaluate_model, \n", |
707 | | - " step_cond],\n", |
| 768 | + " step_cond\n", |
| 769 | + " ],\n", |
708 | 770 | ")" |
709 | 771 | ] |
710 | 772 | }, |
|
1408 | 1470 | ], |
1409 | 1471 | "instance_type": "ml.t3.medium", |
1410 | 1472 | "kernelspec": { |
1411 | | - "display_name": "Python 3", |
| 1473 | + "display_name": "Python 3 (Data Science 3.0)", |
1412 | 1474 | "language": "python", |
1413 | | - "name": "python3" |
| 1475 | + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1" |
1414 | 1476 | }, |
1415 | 1477 | "language_info": { |
1416 | 1478 | "codemirror_mode": { |
|
1422 | 1484 | "name": "python", |
1423 | 1485 | "nbconvert_exporter": "python", |
1424 | 1486 | "pygments_lexer": "ipython3", |
1425 | | - "version": "3.11.5" |
| 1487 | + "version": "3.10.6" |
1426 | 1488 | } |
1427 | 1489 | }, |
1428 | 1490 | "nbformat": 4, |
|
0 commit comments