From ba7c9e42d8d33e15e0b527aef34a1c39d32ba60d Mon Sep 17 00:00:00 2001
From: Kyle Lesinger <kdl0040@uah.edu>
Date: Mon, 16 Mar 2026 16:07:35 -0500
Subject: [PATCH] add workflow for changing event name and adding to metadata

---
 Jupyterhub/extract_eventname_metadata.ipynb | 950 ++++++++++++++++++++
 _quarto.yml                                 |   2 +
 2 files changed, 952 insertions(+)
 create mode 100644 Jupyterhub/extract_eventname_metadata.ipynb
diff --git a/Jupyterhub/extract_eventname_metadata.ipynb b/Jupyterhub/extract_eventname_metadata.ipynb
new file mode 100644
index 0000000..50afc26
--- /dev/null
+++ b/Jupyterhub/extract_eventname_metadata.ipynb
@@ -0,0 +1,950 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "title: S3 GeoTIFF Event Metadata Extractor\n",
+    "description: This notebook processes GeoTIFF files from S3 that contain activation events in their filenames and extracts event metadata.\n",
+    "author: \n",
+    "  - NASA Disasters Team\n",
+    "date: March 16, 2026\n",
+    "execute:\n",
+    "   freeze: true\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook processes GeoTIFF files from S3 that contain activation events in their filenames.\n",
+    "\n",
+    "**Workflow:**\n",
+    "\n",
+    "1. Read files from AWS S3\n",
+    "2. Detect activation event pattern: `YYYYMM_<EVENT>_<HAZARD>_*.tif`\n",
+    "3. Add event metadata to GeoTIFF attributes\n",
+    "4. Move file to new S3 location\n",
+    "5. Optionally delete from original location"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configuration\n",
+    "\n",
+    "Set your S3 bucket names and paths here."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# S3 Configuration\n",
+    "SOURCE_BUCKET = \"nasa-disasters\"\n",
+    "SOURCE_PREFIX = \"drcs_activations_new/Landsat/burnRatio\"  # Path within source bucket\n",
+    "\n",
+    "DESTINATION_BUCKET = \"nasa-disasters-dev\"\n",
+    "DESTINATION_PREFIX = \"ProgramData/Landsat/burnRatio/\"  # Path within destination bucket (must have / at the very end of it!!!)\n",
+    "\n",
+    "# AWS Region\n",
+    "AWS_REGION = \"us-west-2\"\n",
+    "\n",
+    "# Processing options\n",
+    "DELETE_AFTER_MOVE = False  # Set to True to delete original files after successful processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Libraries"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Required Libraries\n",
+    "\n",
+    "Before running this notebook, install the required Python libraries:\n",
+    "\n",
+    "```bash\n",
+    "# Install core dependencies\n",
+    "pip install boto3 rasterio\n",
+    "\n",
+    "# Install GDAL (required for COG operations)\n",
+    "# On macOS with Homebrew:\n",
+    "brew install gdal\n",
+    "pip install gdal\n",
+    "\n",
+    "# On Ubuntu/Debian:\n",
+    "sudo apt-get install gdal-bin libgdal-dev\n",
+    "pip install gdal==$(gdal-config --version)\n",
+    "\n",
+    "# On conda:\n",
+    "conda install -c conda-forge gdal\n",
+    "\n",
+    "# Install rio-cogeo for COG validation\n",
+    "pip install rio-cogeo\n",
+    "```\n",
+    "\n",
+    "**Note:** GDAL can be tricky to install. If you encounter issues, use conda instead of pip for all geospatial libraries:\n",
+    "\n",
+    "```bash\n",
+    "conda install -c conda-forge boto3 rasterio gdal rio-cogeo\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Libraries imported successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "import boto3\n",
+    "import re\n",
+    "import io\n",
+    "from typing import Dict, Optional, Tuple, List\n",
+    "import rasterio\n",
+    "from rasterio.io import MemoryFile\n",
+    "from datetime import datetime\n",
+    "from osgeo import gdal\n",
+    "from rio_cogeo.cogeo import cog_validate, cog_translate\n",
+    "from rio_cogeo.profiles import cog_profiles\n",
+    "\n",
+    "# Enable GDAL exceptions\n",
+    "gdal.UseExceptions()\n",
+    "\n",
+    "print(\"Libraries imported successfully\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## AWS Session and Role Assumption\n",
+    "\n",
+    "Assume the current role that's already active in the account."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Current AWS Identity:\n",
+      "  Account: 515966502221\n",
+      "  ARN: arn:aws:sts::515966502221:assumed-role/disasters-prod/botocore-session-1773693036\n",
+      "  User ID: AROAXQIQAAVGWA2LHXCPW:botocore-session-1773693036\n",
+      "\n",
+      "S3 client created successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create boto3 session using current credentials\n",
+    "session = boto3.Session(region_name=AWS_REGION)\n",
+    "\n",
+    "# Get STS client to verify current identity\n",
+    "sts_client = session.client('sts')\n",
+    "identity = sts_client.get_caller_identity()\n",
+    "\n",
+    "print(f\"Current AWS Identity:\")\n",
+    "print(f\"  Account: {identity['Account']}\")\n",
+    "print(f\"  ARN: {identity['Arn']}\")\n",
+    "print(f\"  User ID: {identity['UserId']}\")\n",
+    "\n",
+    "# Create S3 client\n",
+    "s3_client = session.client('s3')\n",
+    "\n",
+    "print(\"\\nS3 client created successfully\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Helper Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Helper functions defined successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "def detect_activation_event(filename: str) -> Optional[Dict[str, str]]:\n",
+    "    \"\"\"\n",
+    "    Detect activation event pattern in filename.\n",
+    "    \n",
+    "    Pattern: YYYYMM_<EVENT>_<HAZARD>_*.tif\n",
+    "    Example: 202401_EMSR123_FLOOD_extent.tif\n",
+    "    \n",
+    "    Args:\n",
+    "        filename: The filename to check\n",
+    "        \n",
+    "    Returns:\n",
+    "        Dictionary with event metadata if pattern matches, None otherwise\n",
+    "    \"\"\"\n",
+    "    # Pattern: YYYYMM_<HAZARD>_<EVENT>_*.tif\n",
+    "    pattern = r'^(\\d{6})_([A-Za-z0-9]+)_([A-Za-z0-9]+)_(.*)\\.tif$'\n",
+    "    \n",
+    "    match = re.match(pattern, filename)\n",
+    "    \n",
+    "    if match:\n",
+    "        year_month, hazard, location, remainder = match.groups()\n",
+    "        \n",
+    "        return {\n",
+    "            'year_month': year_month,\n",
+    "            'hazard': hazard,\n",
+    "            'location': location,\n",
+    "            'remainder': remainder,\n",
+    "            'original_filename': filename\n",
+    "        }\n",
+    "    \n",
+    "    return None\n",
+    "\n",
+    "\n",
+    "def list_s3_files(bucket: str, prefix: str, pattern: str = '.tif') -> List[str]:\n",
+    "    \"\"\"\n",
+    "    List all files in S3 bucket matching the pattern.\n",
+    "    \n",
+    "    Args:\n",
+    "        bucket: S3 bucket name\n",
+    "        prefix: Prefix (folder path) within bucket\n",
+    "        pattern: File extension or pattern to match\n",
+    "        \n",
+    "    Returns:\n",
+    "        List of S3 keys\n",
+    "    \"\"\"\n",
+    "    files = []\n",
+    "    paginator = s3_client.get_paginator('list_objects_v2')\n",
+    "    \n",
+    "    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):\n",
+    "        if 'Contents' in page:\n",
+    "            for obj in page['Contents']:\n",
+    "                key = obj['Key']\n",
+    "                if pattern in key:\n",
+    "                    files.append(key)\n",
+    "    \n",
+    "    return files\n",
+    "\n",
+    "\n",
+    "def read_geotiff_from_s3(bucket: str, key: str) -> Tuple[bytes, dict]:\n",
+    "    \"\"\"\n",
+    "    Read GeoTIFF file from S3 into memory.\n",
+    "    \n",
+    "    Args:\n",
+    "        bucket: S3 bucket name\n",
+    "        key: S3 object key\n",
+    "        \n",
+    "    Returns:\n",
+    "        Tuple of (file bytes, rasterio metadata)\n",
+    "    \"\"\"\n",
+    "    # Download file to memory\n",
+    "    obj = s3_client.get_object(Bucket=bucket, Key=key)\n",
+    "    file_bytes = obj['Body'].read()\n",
+    "    \n",
+    "    # Read metadata with rasterio\n",
+    "    with MemoryFile(file_bytes) as memfile:\n",
+    "        with memfile.open() as dataset:\n",
+    "            metadata = dataset.meta.copy()\n",
+    "            tags = dataset.tags()\n",
+    "            \n",
+    "    return file_bytes, metadata, tags\n",
+    "\n",
+    "\n",
+    "def validate_cog(file_bytes: bytes, filename: str = \"temp.tif\") -> Tuple[bool, dict]:\n",
+    "    \"\"\"\n",
+    "    Validate if a GeoTIFF is a valid Cloud-Optimized GeoTIFF (COG).\n",
+    "    \n",
+    "    Args:\n",
+    "        file_bytes: GeoTIFF file bytes\n",
+    "        filename: Optional filename for display purposes\n",
+    "        \n",
+    "    Returns:\n",
+    "        Tuple of (is_valid: bool, validation_info: dict)\n",
+    "    \"\"\"\n",
+    "    vsimem_path = f'/vsimem/{filename}'\n",
+    "    \n",
+    "    try:\n",
+    "        # Write to virtual memory\n",
+    "        gdal.FileFromMemBuffer(vsimem_path, file_bytes)\n",
+    "        \n",
+    "        # Validate COG structure\n",
+    "        is_valid, errors, warnings = cog_validate(vsimem_path)\n",
+    "        \n",
+    "        # Get detailed info\n",
+    "        ds = gdal.Open(vsimem_path)\n",
+    "        if ds:\n",
+    "            metadata = {\n",
+    "                'is_cog': is_valid,\n",
+    "                'errors': errors,\n",
+    "                'warnings': warnings,\n",
+    "                'width': ds.RasterXSize,\n",
+    "                'height': ds.RasterYSize,\n",
+    "                'bands': ds.RasterCount,\n",
+    "                'compression': ds.GetMetadataItem('COMPRESSION', 'IMAGE_STRUCTURE') or 'None',\n",
+    "                'tiled': ds.GetMetadataItem('INTERLEAVE', 'IMAGE_STRUCTURE') == 'PIXEL',\n",
+    "                'blocksize': (ds.GetRasterBand(1).GetBlockSize() if ds.RasterCount > 0 else (None, None)),\n",
+    "                'overviews': ds.GetRasterBand(1).GetOverviewCount() if ds.RasterCount > 0 else 0\n",
+    "            }\n",
+    "            ds = None\n",
+    "        else:\n",
+    "            metadata = {'is_cog': False, 'errors': ['Could not open file'], 'warnings': []}\n",
+    "        \n",
+    "        # Clean up\n",
+    "        gdal.Unlink(vsimem_path)\n",
+    "        \n",
+    "        return is_valid, metadata\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        gdal.Unlink(vsimem_path)\n",
+    "        return False, {'is_cog': False, 'errors': [str(e)], 'warnings': []}\n",
+    "\n",
+    "\n",
+    "def add_metadata_and_create_cog(file_bytes: bytes, event_metadata: Dict[str, str]) -> bytes:\n",
+    "    \"\"\"\n",
+    "    Rebuild GeoTIFF as a valid COG with metadata using rio-cogeo library.\n",
+    "    \n",
+    "    Preserves all original compression settings (compression type, level, predictor)\n",
+    "    from the source file and only adds activation event metadata.\n",
+    "    \n",
+    "    Args:\n",
+    "        file_bytes: Original GeoTIFF file bytes\n",
+    "        event_metadata: Dictionary with event information\n",
+    "        \n",
+    "    Returns:\n",
+    "        Modified GeoTIFF file bytes as a valid COG with original compression settings\n",
+    "    \"\"\"\n",
+    "    input_path = '/vsimem/input.tif'\n",
+    "    output_path = '/vsimem/output_cog.tif'\n",
+    "    \n",
+    "    try:\n",
+    "        # Write input to virtual memory\n",
+    "        gdal.FileFromMemBuffer(input_path, file_bytes)\n",
+    "        \n",
+    "        # Read original file settings with rasterio\n",
+    "        print(\"    Reading original file settings...\")\n",
+    "        with MemoryFile(file_bytes) as memfile:\n",
+    "            with memfile.open() as src:\n",
+    "                original_profile = src.profile.copy()\n",
+    "        \n",
+    "        # Extract compression settings from original file\n",
+    "        compress = original_profile.get('compress', 'ZSTD')\n",
+    "        predictor = original_profile.get('predictor', None)\n",
+    "        \n",
+    "        # Get compression level if available\n",
+    "        compress_level = None\n",
+    "        if 'zstd_level' in original_profile:\n",
+    "            compress_level = original_profile['zstd_level']\n",
+    "        elif 'zlevel' in original_profile:  # For DEFLATE compression\n",
+    "            compress_level = original_profile['zlevel']\n",
+    "        \n",
+    "        print(f\"    Original settings: compress={compress}, predictor={predictor}, level={compress_level}\")\n",
+    "        print(\"    Rebuilding as COG (preserving original compression settings)...\")\n",
+    "        \n",
+    "        # Prepare metadata tags\n",
+    "        tags = {\n",
+    "            'ACTIVATION_EVENT': f\"{event_metadata['year_month']}_{event_metadata['hazard']}_{event_metadata['location']}\",\n",
+    "            'YEAR_MONTH': event_metadata['year_month'],\n",
+    "            'HAZARD': event_metadata['hazard'],\n",
+    "            'LOCATION': event_metadata['location'],\n",
+    "            'PROCESSING_DATE': datetime.utcnow().isoformat(),\n",
+    "            'PROCESSOR': 'S3 GeoTIFF COG Processor v1.0'\n",
+    "        }\n",
+    "        \n",
+    "        # Create COG profile using original compression settings\n",
+    "        cog_profile = {\n",
+    "            'driver': 'GTiff',\n",
+    "            'interleave': 'pixel',\n",
+    "            'tiled': True,\n",
+    "            'blockxsize': 512,\n",
+    "            'blockysize': 512,\n",
+    "            'compress': compress\n",
+    "        }\n",
+    "        \n",
+    "        # Add compression level if present\n",
+    "        if compress_level is not None:\n",
+    "            if compress.upper() == 'ZSTD':\n",
+    "                cog_profile['zstd_level'] = compress_level\n",
+    "            elif compress.upper() in ['DEFLATE', 'LZW']:\n",
+    "                cog_profile['zlevel'] = compress_level\n",
+    "        \n",
+    "        # Add predictor if present in original\n",
+    "        if predictor is not None:\n",
+    "            cog_profile['predictor'] = predictor\n",
+    "            print(f\"    Using original predictor: {predictor}\")\n",
+    "        \n",
+    "        # Use cog_translate to create proper COG\n",
+    "        cog_translate(\n",
+    "            source=input_path,\n",
+    "            dst_path=output_path,\n",
+    "            dst_kwargs=cog_profile,\n",
+    "            add_mask=False,\n",
+    "            overview_level=4,\n",
+    "            overview_resampling='average',\n",
+    "            web_optimized=True,\n",
+    "            additional_cog_metadata=tags,\n",
+    "            quiet=False\n",
+    "        )\n",
+    "        \n",
+    "        print(\"    COG creation complete with embedded metadata\")\n",
+    "        \n",
+    "        # Read output from virtual memory\n",
+    "        vsi_file = gdal.VSIFOpenL(output_path, 'rb')\n",
+    "        if not vsi_file:\n",
+    "            raise Exception(\"Could not read output COG from virtual memory\")\n",
+    "        \n",
+    "        gdal.VSIFSeekL(vsi_file, 0, 2)  # Seek to end\n",
+    "        file_size = gdal.VSIFTellL(vsi_file)\n",
+    "        gdal.VSIFSeekL(vsi_file, 0, 0)  # Seek to start\n",
+    "        \n",
+    "        output_bytes = gdal.VSIFReadL(1, file_size, vsi_file)\n",
+    "        gdal.VSIFCloseL(vsi_file)\n",
+    "        \n",
+    "        # Clean up virtual files\n",
+    "        gdal.Unlink(input_path)\n",
+    "        gdal.Unlink(output_path)\n",
+    "        \n",
+    "        print(f\"    Output size: {len(output_bytes):,} bytes\")\n",
+    "        \n",
+    "        return output_bytes\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        # Print full traceback for debugging\n",
+    "        import traceback\n",
+    "        print(f\"\\n    ERROR DETAILS:\")\n",
+    "        traceback.print_exc()\n",
+    "        \n",
+    "        # Clean up on error\n",
+    "        try:\n",
+    "            gdal.Unlink(input_path)\n",
+    "        except:\n",
+    "            pass\n",
+    "        try:\n",
+    "            gdal.Unlink(output_path)\n",
+    "        except:\n",
+    "            pass\n",
+    "        \n",
+    "        raise Exception(f\"COG creation failed: {str(e)}\")\n",
+    "\n",
+    "\n",
+    "def upload_to_s3(file_bytes: bytes, bucket: str, key: str) -> bool:\n",
+    "    \"\"\"\n",
+    "    Upload file bytes to S3.\n",
+    "    \n",
+    "    Args:\n",
+    "        file_bytes: File content as bytes\n",
+    "        bucket: Destination S3 bucket\n",
+    "        key: Destination S3 key\n",
+    "        \n",
+    "    Returns:\n",
+    "        True if successful\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        s3_client.put_object(\n",
+    "            Bucket=bucket,\n",
+    "            Key=key,\n",
+    "            Body=file_bytes,\n",
+    "            ContentType='image/tiff'\n",
+    "        )\n",
+    "        return True\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error uploading to S3: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "\n",
+    "def delete_from_s3(bucket: str, key: str) -> bool:\n",
+    "    \"\"\"\n",
+    "    Delete file from S3.\n",
+    "    \n",
+    "    Args:\n",
+    "        bucket: S3 bucket name\n",
+    "        key: S3 object key\n",
+    "        \n",
+    "    Returns:\n",
+    "        True if successful\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        s3_client.delete_object(Bucket=bucket, Key=key)\n",
+    "        return True\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error deleting from S3: {e}\")\n",
+    "        return False\n",
+    "\n",
+    "\n",
+    "print(\"Helper functions defined successfully\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test Pattern Detection\n",
+    "\n",
+    "Test the filename pattern detection with some examples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Testing pattern detection:\n",
+      "\n",
+      "✓ 202302_Earthquake_Turkiye_Turkey_UNW_2022-04-06_to_2023-02-08_day.tif\n",
+      "  Year/Month: 202302\n",
+      "  Location: Turkiye\n",
+      "  Hazard: Earthquake\n",
+      "  Remainder: Turkey_UNW_2022-04-06_to_2023-02-08_day\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test cases\n",
+    "test_filenames = [\n",
+    "    \"202302_Earthquake_Turkiye_Turkey_UNW_2022-04-06_to_2023-02-08_day.tif\"\n",
+    "]\n",
+    "\n",
+    "print(\"Testing pattern detection:\\n\")\n",
+    "for filename in test_filenames:\n",
+    "    result = detect_activation_event(filename)\n",
+    "    if result:\n",
+    "        print(f\"✓ {filename}\")\n",
+    "        print(f\"  Year/Month: {result['year_month']}\")\n",
+    "        print(f\"  Location: {result['location']}\")\n",
+    "        print(f\"  Hazard: {result['hazard']}\")\n",
+    "        print(f\"  Remainder: {result['remainder']}\")\n",
+    "    else:\n",
+    "        print(f\"✗ {filename} - No match\")\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## List Files in Source Bucket"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Listing files in s3://nasa-disasters/drcs_activations_new/Landsat/burnRatio\n",
+      "\n",
+      "Found 1 .tif files:\n",
+      "  ✓ drcs_activations_new/Landsat/burnRatio/202501_Fire_CA_LC09_NBR_182831_041036_2025-01-14_day.tif - Location: CA, Hazard: Fire\n"
+     ]
+    }
+   ],
+   "source": [
+    "# List all .tif files in source bucket\n",
+    "print(f\"Listing files in s3://{SOURCE_BUCKET}/{SOURCE_PREFIX}\\n\")\n",
+    "\n",
+    "tif_files = list_s3_files(SOURCE_BUCKET, SOURCE_PREFIX, '.tif')\n",
+    "\n",
+    "print(f\"Found {len(tif_files)} .tif files:\")\n",
+    "for file_key in tif_files:\n",
+    "    filename = file_key.split('/')[-1]\n",
+    "    event_info = detect_activation_event(filename)\n",
+    "    \n",
+    "    if event_info:\n",
+    "        print(f\"  ✓ {file_key} - Location: {event_info['location']}, Hazard: {event_info['hazard']}\")\n",
+    "    else:\n",
+    "        print(f\"  ✗ {file_key} - No activation event pattern\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Process Files\n",
+    "\n",
+    "Main processing loop: detect activation events, add metadata, move to destination."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "================================================================================\n",
+      "Starting batch processing\n",
+      "================================================================================\n",
+      "\n",
+      "Processing: 202501_Fire_CA_LC09_NBR_182831_041036_2025-01-14_day.tif\n",
+      "  Location: CA, Hazard: Fire\n",
+      "  Reading from S3...\n",
+      "    Size: 158,238,568 bytes\n",
+      "    Dimensions: 8554x7185\n",
+      "    CRS: EPSG:4326\n",
+      "  Validating input COG structure...\n",
+      "    ✓ Input is valid COG\n",
+      "      Compression: ZSTD\n",
+      "      Tile size: [512, 512]\n",
+      "      Overviews: 4 levels\n",
+      "  Creating COG with metadata and ZSTD compression...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1272/2684847193.py:178: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n",
+      "  'PROCESSING_DATE': datetime.utcnow().isoformat(),\n",
+      "Reading input: /vsimem/input.tif\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    Reading original file settings...\n",
+      "    Original settings: compress=zstd, predictor=None, level=None\n",
+      "    Rebuilding as COG (preserving original compression settings)...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Adding overviews...\n",
+      "Updating dataset tags...\n",
+      "Writing output to: /vsimem/output_cog.tif\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    COG creation complete with embedded metadata\n",
+      "    Output size: 150,638,215 bytes\n",
+      "    Output size: 150,638,215 bytes\n",
+      "    Size change: -4.8%\n",
+      "  Validating output COG structure...\n",
+      "    ✓ Output is valid COG\n",
+      "      Compression: ZSTD\n",
+      "      Tile size: [512, 512]\n",
+      "      Overviews: 4 levels\n",
+      "  Uploading to s3://nasa-disasters-dev/ProgramData/Landsat/burnRatio/202501_Fire_CA_LC09_NBR_182831_041036_2025-01-14_day.tif...\n",
+      "    ✓ Upload successful\n",
+      "\n",
+      "================================================================================\n",
+      "Processing Complete\n",
+      "================================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "def process_file(source_key: str) -> Dict[str, any]:\n",
+    "    \"\"\"\n",
+    "    Process a single GeoTIFF file.\n",
+    "    \n",
+    "    Returns:\n",
+    "        Dictionary with processing results\n",
+    "    \"\"\"\n",
+    "    filename = source_key.split('/')[-1]\n",
+    "    result = {\n",
+    "        'filename': filename,\n",
+    "        'source_key': source_key,\n",
+    "        'success': False,\n",
+    "        'message': ''\n",
+    "    }\n",
+    "    \n",
+    "    # Step 1: Detect activation event pattern\n",
+    "    event_metadata = detect_activation_event(filename)\n",
+    "    \n",
+    "    if not event_metadata:\n",
+    "        result['message'] = 'No activation event pattern detected - skipping'\n",
+    "        return result\n",
+    "    \n",
+    "    print(f\"\\nProcessing: {filename}\")\n",
+    "    print(f\"  Location: {event_metadata['location']}, Hazard: {event_metadata['hazard']}\")\n",
+    "    \n",
+    "    try:\n",
+    "        # Step 2: Read file from S3\n",
+    "        print(\"  Reading from S3...\")\n",
+    "        file_bytes, metadata, tags = read_geotiff_from_s3(SOURCE_BUCKET, source_key)\n",
+    "        print(f\"    Size: {len(file_bytes):,} bytes\")\n",
+    "        print(f\"    Dimensions: {metadata['width']}x{metadata['height']}\")\n",
+    "        print(f\"    CRS: {metadata.get('crs', 'Unknown')}\")\n",
+    "        \n",
+    "        # Step 2.5: Validate input COG status\n",
+    "        print(\"  Validating input COG structure...\")\n",
+    "        input_is_cog, input_cog_info = validate_cog(file_bytes, f\"input_{filename}\")\n",
+    "        \n",
+    "        if input_is_cog:\n",
+    "            print(f\"    ✓ Input is valid COG\")\n",
+    "            print(f\"      Compression: {input_cog_info.get('compression', 'Unknown')}\")\n",
+    "            print(f\"      Tile size: {input_cog_info.get('blocksize', (0, 0))}\")\n",
+    "            print(f\"      Overviews: {input_cog_info.get('overviews', 0)} levels\")\n",
+    "        else:\n",
+    "            print(f\"    ⚠ Input is NOT a valid COG - will convert\")\n",
+    "            if input_cog_info.get('errors'):\n",
+    "                for error in input_cog_info['errors'][:3]:  # Show first 3 errors\n",
+    "                    print(f\"      - {error}\")\n",
+    "        \n",
+    "        # Step 3: Add metadata and create/optimize COG\n",
+    "        print(\"  Creating COG with metadata and ZSTD compression...\")\n",
+    "        modified_bytes = add_metadata_and_create_cog(file_bytes, event_metadata)\n",
+    "        print(f\"    Output size: {len(modified_bytes):,} bytes\")\n",
+    "        \n",
+    "        size_change = ((len(modified_bytes) - len(file_bytes)) / len(file_bytes)) * 100\n",
+    "        print(f\"    Size change: {size_change:+.1f}%\")\n",
+    "        \n",
+    "        # Step 3.5: Validate output COG status\n",
+    "        print(\"  Validating output COG structure...\")\n",
+    "        output_is_cog, output_cog_info = validate_cog(modified_bytes, f\"output_{filename}\")\n",
+    "        \n",
+    "        if output_is_cog:\n",
+    "            print(f\"    ✓ Output is valid COG\")\n",
+    "            print(f\"      Compression: {output_cog_info.get('compression', 'Unknown')}\")\n",
+    "            print(f\"      Tile size: {output_cog_info.get('blocksize', (0, 0))}\")\n",
+    "            print(f\"      Overviews: {output_cog_info.get('overviews', 0)} levels\")\n",
+    "            result['cog_valid'] = True\n",
+    "        else:\n",
+    "            print(f\"    ✗ WARNING: Output is NOT a valid COG!\")\n",
+    "            if output_cog_info.get('errors'):\n",
+    "                for error in output_cog_info['errors'][:3]:\n",
+    "                    print(f\"      - {error}\")\n",
+    "            result['cog_valid'] = False\n",
+    "        \n",
+    "        # Step 4: Upload to destination\n",
+    "        dest_key = DESTINATION_PREFIX + filename\n",
+    "        print(f\"  Uploading to s3://{DESTINATION_BUCKET}/{dest_key}...\")\n",
+    "        \n",
+    "        if upload_to_s3(modified_bytes, DESTINATION_BUCKET, dest_key):\n",
+    "            print(\"    ✓ Upload successful\")\n",
+    "            result['destination_key'] = dest_key\n",
+    "            result['success'] = True\n",
+    "            result['message'] = 'Processed successfully'\n",
+    "            result['input_was_cog'] = input_is_cog\n",
+    "            result['output_is_cog'] = output_is_cog\n",
+    "            \n",
+    "            # Step 5: Optionally delete from source\n",
+    "            if DELETE_AFTER_MOVE:\n",
+    "                print(f\"  Deleting from source...\")\n",
+    "                if delete_from_s3(SOURCE_BUCKET, source_key):\n",
+    "                    print(\"    ✓ Deleted from source\")\n",
+    "                    result['deleted_from_source'] = True\n",
+    "                else:\n",
+    "                    print(\"    ✗ Failed to delete from source\")\n",
+    "                    result['deleted_from_source'] = False\n",
+    "        else:\n",
+    "            result['message'] = 'Upload to destination failed'\n",
+    "            \n",
+    "    except Exception as e:\n",
+    "        result['message'] = f'Error: {str(e)}'\n",
+    "        print(f\"  ✗ Error: {e}\")\n",
+    "    \n",
+    "    return result\n",
+    "\n",
+    "\n",
+    "# Process all files\n",
+    "print(f\"\\n{'='*80}\")\n",
+    "print(\"Starting batch processing\")\n",
+    "print(f\"{'='*80}\")\n",
+    "\n",
+    "results = []\n",
+    "for file_key in tif_files:\n",
+    "    result = process_file(file_key)\n",
+    "    results.append(result)\n",
+    "\n",
+    "print(f\"\\n{'='*80}\")\n",
+    "print(\"Processing Complete\")\n",
+    "print(f\"{'='*80}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary Report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Summary statistics\n",
+    "total_files = len(results)\n",
+    "successful = sum(1 for r in results if r['success'])\n",
+    "skipped = sum(1 for r in results if 'skipping' in r['message'])\n",
+    "failed = total_files - successful - skipped\n",
+    "\n",
+    "# COG statistics\n",
+    "input_cogs = sum(1 for r in results if r.get('input_was_cog', False))\n",
+    "output_cogs = sum(1 for r in results if r.get('output_is_cog', False))\n",
+    "converted_to_cog = sum(1 for r in results if r.get('success') and not r.get('input_was_cog', False) and r.get('output_is_cog', False))\n",
+    "\n",
+    "print(\"\\nProcessing Summary:\")\n",
+    "print(f\"  Total files: {total_files}\")\n",
+    "print(f\"  ✓ Successful: {successful}\")\n",
+    "print(f\"  ○ Skipped (no pattern): {skipped}\")\n",
+    "print(f\"  ✗ Failed: {failed}\")\n",
+    "\n",
+    "print(\"\\nCOG Status:\")\n",
+    "print(f\"  Input COGs: {input_cogs}/{total_files}\")\n",
+    "print(f\"  Output COGs: {output_cogs}/{total_files}\")\n",
+    "print(f\"  Converted to COG: {converted_to_cog}\")\n",
+    "\n",
+    "if DELETE_AFTER_MOVE:\n",
+    "    deleted = sum(1 for r in results if r.get('deleted_from_source', False))\n",
+    "    print(f\"\\n  🗑 Deleted from source: {deleted}\")\n",
+    "\n",
+    "print(\"\\nDetailed Results:\")\n",
+    "for result in results:\n",
+    "    status = '✓' if result['success'] else ('○' if 'skipping' in result['message'] else '✗')\n",
+    "    cog_status = ''\n",
+    "    if result.get('success'):\n",
+    "        if result.get('output_is_cog'):\n",
+    "            cog_status = ' [COG ✓]'\n",
+    "        else:\n",
+    "            cog_status = ' [COG ✗]'\n",
+    "    print(f\"  {status} {result['filename']}: {result['message']}{cog_status}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verify Metadata in Processed Files\n",
+    "\n",
+    "Read a processed file and verify the metadata was added correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get first successful result\n",
+    "successful_results = [r for r in results if r['success']]\n",
+    "\n",
+    "if successful_results:\n",
+    "    sample_result = successful_results[0]\n",
+    "    dest_key = sample_result['destination_key']\n",
+    "    \n",
+    "    print(f\"Verifying metadata in: s3://{DESTINATION_BUCKET}/{dest_key}\\n\")\n",
+    "    \n",
+    "    # Read processed file\n",
+    "    file_bytes, metadata, tags = read_geotiff_from_s3(DESTINATION_BUCKET, dest_key)\n",
+    "    \n",
+    "    print(\"=\" * 60)\n",
+    "    print(\"GeoTIFF Metadata Tags:\")\n",
+    "    print(\"=\" * 60)\n",
+    "    for key, value in tags.items():\n",
+    "        if 'ACTIVATION' in key or 'PROCESSING' in key or 'ORIGINAL' in key or 'PROCESSOR' in key or 'COG' in key:\n",
+    "            print(f\"  {key}: {value}\")\n",
+    "    \n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    print(\"COG Validation Results:\")\n",
+    "    print(\"=\" * 60)\n",
+    "    \n",
+    "    # Validate COG structure\n",
+    "    is_cog, cog_info = validate_cog(file_bytes, dest_key.split('/')[-1])\n",
+    "    \n",
+    "    if is_cog:\n",
+    "        print(\"✓ File is a VALID Cloud-Optimized GeoTIFF (COG)\\n\")\n",
+    "        \n",
+    "        print(\"Structure Details:\")\n",
+    "        print(f\"  Dimensions: {cog_info.get('width')}x{cog_info.get('height')}\")\n",
+    "        print(f\"  Bands: {cog_info.get('bands')}\")\n",
+    "        print(f\"  Compression: {cog_info.get('compression')}\")\n",
+    "        print(f\"  Tiled: {cog_info.get('tiled')}\")\n",
+    "        print(f\"  Block/Tile Size: {cog_info.get('blocksize')}\")\n",
+    "        print(f\"  Overview Levels: {cog_info.get('overviews')}\")\n",
+    "        \n",
+    "        if cog_info.get('warnings'):\n",
+    "            print(\"\\nWarnings:\")\n",
+    "            for warning in cog_info['warnings']:\n",
+    "                print(f\"  ⚠ {warning}\")\n",
+    "    else:\n",
+    "        print(\"✗ File is NOT a valid COG\\n\")\n",
+    "        \n",
+    "        if cog_info.get('errors'):\n",
+    "            print(\"Errors:\")\n",
+    "            for error in cog_info['errors']:\n",
+    "                print(f\"  ✗ {error}\")\n",
+    "        \n",
+    "        if cog_info.get('warnings'):\n",
+    "            print(\"\\nWarnings:\")\n",
+    "            for warning in cog_info['warnings']:\n",
+    "                print(f\"  ⚠ {warning}\")\n",
+    "    \n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    \n",
+    "else:\n",
+    "    print(\"No successfully processed files to verify\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/_quarto.yml b/_quarto.yml
index 649bb8b..9a5ad53 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -60,6 +60,8 @@ website:
           - Jupyterhub/clone_conversion_repo.ipynb
           - Jupyterhub/sentinel2_workflow-testUpdates.ipynb
           - Jupyterhub/CSDA-demo.ipynb
+          - text: "GeoTIFF Event Processor"
+            href: Jupyterhub/extract_eventname_metadata.ipynb
       - section: workflow.qmd
         text: Data Workflow Diagrams
         contents: