From 06b39f5b98820c62958cc8de91c79e26cf05b9f6 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Thu, 16 Oct 2025 10:46:01 -0500 Subject: [PATCH 1/7] feat: add variable stats to /compatibility output for xarray datasets --- titiler/cmr/compatibility.py | 58 ++++++++++++++++++++++++++++++++---- titiler/cmr/factory.py | 13 ++++++-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/titiler/cmr/compatibility.py b/titiler/cmr/compatibility.py index 37fee73..986a938 100644 --- a/titiler/cmr/compatibility.py +++ b/titiler/cmr/compatibility.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Literal, Optional +import numpy as np from fastapi import HTTPException from pydantic import BaseModel from rio_tiler.constants import WEB_MERCATOR_TMS @@ -18,15 +19,38 @@ from titiler.xarray.io import Reader as XarrayReader +class VariableInfo(BaseModel): + """Metadata for a single xarray variable""" + + shape: List[int] + dtype: str + min: Optional[float] = None + max: Optional[float] = None + mean: Optional[float] = None + p01: Optional[float] = None + p05: Optional[float] = None + p95: Optional[float] = None + p99: Optional[float] = None + + +class CoordinateInfo(BaseModel): + """Metadata for a single xarray coordinate""" + + size: int + dtype: str + min: Optional[float] = None + max: Optional[float] = None + + class CompatibilityResponse(BaseModel): """Compatibility endpoint response model""" concept_id: ConceptID backend: Literal["rasterio", "xarray"] datetime: List[Dict[str, Any]] - variables: Optional[Dict[str, Dict[str, Any]]] = None + variables: Optional[Dict[str, VariableInfo]] = None dimensions: Optional[Dict[str, int]] = None - coordinates: Optional[Dict[str, Dict[str, Any]]] = None + coordinates: Optional[Dict[str, CoordinateInfo]] = None example_assets: Optional[Dict[str, str] | str] = None sample_asset_raster_info: Optional[Info] = None @@ -40,13 +64,35 @@ def extract_xarray_metadata(ds: Any) -> Dict[str, Any]: Returns: Dictionary containing variables, dimensions, and coordinates metadata """ - variables = { - var: { + variables = {} + for var in ds.data_vars: + var_info: Dict[str, Any] = { "shape": list(ds[var].shape), "dtype": str(ds[var].dtype), } - for var in ds.data_vars - } + + # Add statistics for numeric variables + if ds[var].dtype.kind in ["i", "f", "u"]: + try: + # Load data into memory once for efficiency + var_data = ds[var] + values = var_data.values + + var_info["min"] = float(np.nanmin(values)) + var_info["max"] = float(np.nanmax(values)) + var_info["mean"] = float(np.nanmean(values)) + + # Calculate multiple percentiles in a single pass, filtering out NaNs + p01, p05, p95, p99 = np.nanpercentile(values, [1, 5, 95, 99]) + var_info["p01"] = float(p01) + var_info["p05"] = float(p05) + var_info["p95"] = float(p95) + var_info["p99"] = float(p99) + except Exception: + # Skip statistics if computation fails (e.g., too large, all NaN values) + pass + + variables[var] = var_info coordinates = {} for coord, coord_data in ds.coords.items(): diff --git a/titiler/cmr/factory.py b/titiler/cmr/factory.py index ef1e8eb..a39f56e 100644 --- a/titiler/cmr/factory.py +++ b/titiler/cmr/factory.py @@ -482,15 +482,22 @@ def concept_metadata_endpoint( "**Returns:**\n" "- `backend`: Which reader to use ('xarray' or 'rasterio')\n" "- `datetime`: Temporal extent(s) of the dataset\n" - "- `variables`: Available data variables with shape and dtype (xarray only)\n" + "- `variables`: Available data variables with shape, dtype, and statistics (xarray only)\n" + " - For numeric variables, includes: min, max, mean, and percentiles (p01, p05, p95, p99)\n" + " - These statistics are computed from the sample asset to characterize the data distribution\n" "- `dimensions`: Dimension names and sizes (xarray only)\n" - "- `coordinates`: Coordinate information with ranges (xarray only)\n" - "- `example_assets`: Sample data URL from the collection\n\n" + "- `coordinates`: Coordinate information with value ranges (xarray only)\n" + "- `example_assets`: Sample data URL from the collection\n" + "- `sample_asset_raster_info`: Rasterio metadata for sample asset (rasterio only)\n\n" "**Use this information to:**\n" "- Set `backend` parameter for /tiles, /bbox, and /feature endpoints\n" "- Choose valid `variable` names for xarray datasets\n" "- Identify available dimensions for selection/interpolation\n" "- Determine temporal coverage for time-based queries\n" + "- **Parameterize visualization requests** using variable statistics:\n" + " - Set `rescale` parameter based on min/max or percentile ranges (e.g., `rescale=p01,p99` for contrast stretching)\n" + " - Choose appropriate colormaps based on data range and distribution\n" + " - Avoid clipping by understanding the actual data value range before rendering\n" "- Understand data structure before making tile/image requests" ), response_model_exclude_none=True, From ef85bc61160ae99de17fde46214197af80b6f030 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Fri, 17 Oct 2025 05:17:18 -0500 Subject: [PATCH 2/7] update tests --- tests/test_compatibility.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_compatibility.py b/tests/test_compatibility.py index 118231d..ba1d2e9 100644 --- a/tests/test_compatibility.py +++ b/tests/test_compatibility.py @@ -2,6 +2,7 @@ from unittest.mock import MagicMock, patch +import numpy as np import pytest from fastapi import HTTPException @@ -24,14 +25,14 @@ def test_extract_basic_metadata(self): # Mock data variables mock_var = MagicMock() mock_var.shape = (365, 1800, 3600) - mock_var.dtype = "float32" + mock_var.dtype = np.dtype("float32") mock_ds.data_vars = ["temperature"] mock_ds.__getitem__ = lambda self, key: mock_var # Mock coordinates mock_coord = MagicMock() mock_coord.size = 365 - mock_coord.dtype.kind = "f" + mock_coord.dtype = np.dtype("float64") mock_coord.min.return_value = 0.0 mock_coord.max.return_value = 364.0 @@ -61,14 +62,14 @@ def test_extract_metadata_with_non_numeric_coord(self): # Mock data variables mock_var = MagicMock() mock_var.shape = (10,) - mock_var.dtype = "float32" + mock_var.dtype = np.dtype("float32") mock_ds.data_vars = ["data"] mock_ds.__getitem__ = lambda self, key: mock_var # Mock string coordinate (no min/max) mock_coord = MagicMock() mock_coord.size = 10 - mock_coord.dtype.kind = "U" # Unicode string + mock_coord.dtype = np.dtype("U10") # Unicode string # Create a proper mock for coords mock_coords = MagicMock() @@ -114,7 +115,7 @@ def test_xarray_success(self, mock_xarray_open, mock_backend): mock_ds = MagicMock() mock_var = MagicMock() mock_var.shape = (10, 20) - mock_var.dtype = "float32" + mock_var.dtype = np.dtype("float32") mock_ds.data_vars = ["temp"] mock_ds.__getitem__ = lambda self, key: mock_var From 0a2f21aa5e7b7fddf13d3773f8cdcc1d91d9d269 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Tue, 21 Oct 2025 14:20:28 -0500 Subject: [PATCH 3/7] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21fb8ea..5aeaf2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - interpolate {datetime} in if sel includes {dim}={datetime} ([#78](https://github.com/developmentseed/titiler-cmr/pull/78)) - /compatibility and /concept_metadata endpoints ([#80](https://github.com/developmentseed/titiler-cmr/pull/80)) +- add dataset statistics to /compatibility endpoint output for xarray datasets ([#80](https://github.com/developmentseed/titiler-cmr/pull/82)) ## [v0.2.0] From da8f79999e49dc611b42379730a92f9cad4a8bb2 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Tue, 21 Oct 2025 14:27:38 -0500 Subject: [PATCH 4/7] remove docs deploy preview from docs action --- .github/workflows/docs.yml | 180 ++----------------------------------- 1 file changed, 9 insertions(+), 171 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d027e2d..3831e43 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -5,12 +5,9 @@ on: - develop tags: - 'v*' - pull_request: - types: [opened, synchronize, reopened, closed] permissions: contents: write - pull-requests: write jobs: deploy-versions: @@ -20,20 +17,24 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 # Important for mike to work with tags - - - uses: astral-sh/setup-uv@v3 + + - uses: astral-sh/setup-uv@v7 with: - version: "0.5.*" + version: "0.9.*" enable-cache: true - name: Sync - run: | + run: | uv sync git restore uv.lock - + - name: Configure Git run: | git config --local user.name "GitHub Actions" git config --local user.email "actions@github.com" + + - name: Fetch gh-pages branch + run: | + git fetch origin gh-pages --depth=1 || true - name: Deploy docs env: @@ -56,166 +57,3 @@ jobs: uv run mike deploy --push --update-aliases dev uv run mike set-default --push dev fi - - # Job for PR previews - preview: - if: github.event_name == 'pull_request' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: astral-sh/setup-uv@v3 - with: - version: "0.5.*" - enable-cache: true - - name: Sync - run: | - uv sync - git diff - - # Only build and deploy if PR is opened/synchronized/reopened - - name: Build docs - if: github.event.action != 'closed' - env: - EARTHDATA_USERNAME: ${{ secrets.earthdata_username }} - EARTHDATA_PASSWORD: ${{ secrets.earthdata_password }} - run: | - echo "machine urs.earthdata.nasa.gov\nlogin ${EARTHDATA_USERNAME}\npassword ${EARTHDATA_PASSWORD}" > ~/.netrc - uv run mkdocs build --site-dir site/pr-${{ github.event.pull_request.number }} - - - name: Deploy preview - id: deploy - if: github.event.action != 'closed' - run: | - git config --global user.name "GitHub Actions" - git config --global user.email "actions@github.com" - - # Checkout gh-pages branch - git restore . - git fetch origin gh-pages --depth=1 - git checkout gh-pages - - # Create preview directory if it doesn't exist - mkdir -p pr-previews - - # Copy new preview - rm -rf pr-previews/pr-${{ github.event.pull_request.number }} - cp -r site/pr-${{ github.event.pull_request.number }} pr-previews/ - - # Check if there are actual changes in git - git add pr-previews - if git diff --staged --quiet; then - echo "No changes in documentation. Skipping deployment." - echo "has_changes=false" >> $GITHUB_OUTPUT - exit 0 - fi - - # If we get here, there are changes - git commit -m "Deploy preview for PR #${{ github.event.pull_request.number }}" - git push origin gh-pages - echo "has_changes=true" >> $GITHUB_OUTPUT - - - name: Post/Update PR Comment - if: github.event.action != 'closed' - uses: actions/github-script@v6 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - console.log('Starting PR comment update...'); - const has_changes = '${{ steps.deploy.outputs.has_changes }}' === 'true'; - const preview_url = `https://${context.repo.owner}.github.io/${context.repo.repo}/pr-previews/pr-${context.payload.pull_request.number}/`; - const message = `📚 Documentation preview will be available at: ${preview_url} - - Status: ${has_changes ? '✅ Preview is ready!' : '🔄 No changes in documentation since last update'}`; - - const comments = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.payload.pull_request.number, - }); - - const docComment = comments.data.find(comment => - comment.user.login === 'github-actions[bot]' && - comment.body.includes('Documentation preview will be available at:') - ); - - if (docComment) { - await github.rest.issues.updateComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: docComment.id, - body: message - }); - } else { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.payload.pull_request.number, - body: message - }); - } - - - name: Cleanup preview - if: github.event.action == 'closed' - run: | - git config --global user.name "GitHub Actions" - git config --global user.email "actions@github.com" - - # Checkout gh-pages branch - git restore uv.lock - git fetch origin gh-pages --depth=1 - git checkout gh-pages - - # Remove the preview directory for this PR - rm -rf pr-previews/pr-${{ github.event.pull_request.number }} - - # Commit and push if there are changes - git add pr-previews - git diff --staged --quiet || (git commit -m "Remove preview for PR #${{ github.event.pull_request.number }}" && git push origin gh-pages) - - # Optional job to cleanup old PR previews - cleanup-old-previews: - if: github.event_name == 'push' && github.ref == 'refs/heads/develop' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - ref: gh-pages - - - name: Get closed PRs - id: closed-prs - uses: actions/github-script@v7 - with: - script: | - const prs = await github.rest.pulls.list({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'closed' - }); - return prs.data.map(pr => pr.number); - result-encoding: string - - - name: Cleanup old previews - run: | - git config --global user.name "GitHub Actions" - git config --global user.email "actions@github.com" - - if [ -d "pr-previews" ]; then - cd pr-previews - for preview in pr-*; do - if [ -d "$preview" ]; then - PR_NUM=$(echo $preview | sed 's/pr-//') - if ! echo "${{ steps.closed-prs.outputs.result }}" | grep -q "$PR_NUM"; then - rm -rf "$preview" - echo "Removed old preview: $preview" - fi - fi - done - - git add . - git diff --staged --quiet || (git commit -m "Cleanup old PR previews" && git push origin gh-pages) - else - echo "No preview directores found. Nothing to clean up." - fi - - From f2cc7ecdb55450ed7224911f9311bff2b103584a Mon Sep 17 00:00:00 2001 From: hrodmn Date: Tue, 21 Oct 2025 14:48:38 -0500 Subject: [PATCH 5/7] calculate stats on subsample of large arrays --- titiler/cmr/compatibility.py | 42 ++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/titiler/cmr/compatibility.py b/titiler/cmr/compatibility.py index 986a938..d4e82a8 100644 --- a/titiler/cmr/compatibility.py +++ b/titiler/cmr/compatibility.py @@ -55,11 +55,15 @@ class CompatibilityResponse(BaseModel): sample_asset_raster_info: Optional[Info] = None -def extract_xarray_metadata(ds: Any) -> Dict[str, Any]: +def extract_xarray_metadata(ds: Any, max_sample_size: int = 100_000) -> Dict[str, Any]: """Extract comprehensive metadata from an xarray Dataset. + For large arrays, uses sampling along each dimension to avoid memory issues. + Args: ds: xarray Dataset instance + max_sample_size: Maximum number of elements to sample for statistics. + Arrays larger than this will be sampled. Default: 1,000,000 Returns: Dictionary containing variables, dimensions, and coordinates metadata @@ -71,12 +75,42 @@ def extract_xarray_metadata(ds: Any) -> Dict[str, Any]: "dtype": str(ds[var].dtype), } - # Add statistics for numeric variables if ds[var].dtype.kind in ["i", "f", "u"]: try: - # Load data into memory once for efficiency var_data = ds[var] - values = var_data.values + total_size = var_data.size + + # Use sampling for large arrays to avoid memory issues + if total_size > max_sample_size: + # Calculate stride needed to get approximately max_sample_size elements + # We'll sample along each dimension proportionally + target_fraction = (max_sample_size / total_size) ** ( + 1.0 / len(var_data.shape) + ) + + indexers = {} + actual_sample_size = 1 + for dim in var_data.dims: + dim_size = var_data.sizes[dim] + sample_size = max(1, int(dim_size * target_fraction)) + # Random sample of indices along this dimension + indices = np.sort( + np.random.choice(dim_size, size=sample_size, replace=False) + ) + indexers[dim] = indices + actual_sample_size *= sample_size + + # Sample using integer indexing (efficient with chunked data) + sampled = var_data.isel(indexers) + values = sampled.values + + logger.info( + f"Sampled {actual_sample_size:,} of {total_size:,} elements " + f"from variable '{var}' for statistics" + ) + else: + # Load entire array for smaller datasets + values = var_data.values var_info["min"] = float(np.nanmin(values)) var_info["max"] = float(np.nanmax(values)) From 3d2973065db9641afb5b6082d3461e4b0942b848 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Tue, 21 Oct 2025 15:00:32 -0500 Subject: [PATCH 6/7] calculate max sample size more carefully --- titiler/cmr/compatibility.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/titiler/cmr/compatibility.py b/titiler/cmr/compatibility.py index d4e82a8..59467ed 100644 --- a/titiler/cmr/compatibility.py +++ b/titiler/cmr/compatibility.py @@ -55,7 +55,9 @@ class CompatibilityResponse(BaseModel): sample_asset_raster_info: Optional[Info] = None -def extract_xarray_metadata(ds: Any, max_sample_size: int = 100_000) -> Dict[str, Any]: +def extract_xarray_metadata( + ds: Any, max_sample_size: float = 100_000.0 +) -> Dict[str, Any]: """Extract comprehensive metadata from an xarray Dataset. For large arrays, uses sampling along each dimension to avoid memory issues. @@ -82,23 +84,27 @@ def extract_xarray_metadata(ds: Any, max_sample_size: int = 100_000) -> Dict[str # Use sampling for large arrays to avoid memory issues if total_size > max_sample_size: - # Calculate stride needed to get approximately max_sample_size elements - # We'll sample along each dimension proportionally - target_fraction = (max_sample_size / total_size) ** ( - 1.0 / len(var_data.shape) - ) - + # Calculate exact sample size per dimension to stay within budget indexers = {} actual_sample_size = 1 - for dim in var_data.dims: + remaining_budget = max_sample_size + + for i, dim in enumerate(var_data.dims): dim_size = var_data.sizes[dim] - sample_size = max(1, int(dim_size * target_fraction)) + # Distribute budget across remaining dimensions + dims_remaining = len(var_data.dims) - i + samples_per_dim = int( + remaining_budget ** (1.0 / dims_remaining) + ) + sample_size = min(dim_size, max(1, samples_per_dim)) + # Random sample of indices along this dimension indices = np.sort( np.random.choice(dim_size, size=sample_size, replace=False) ) indexers[dim] = indices actual_sample_size *= sample_size + remaining_budget = max_sample_size / actual_sample_size # Sample using integer indexing (efficient with chunked data) sampled = var_data.isel(indexers) From 0ceeca0453ed8d3c2c607f2cb96df4207f2249c0 Mon Sep 17 00:00:00 2001 From: hrodmn Date: Tue, 21 Oct 2025 15:09:23 -0500 Subject: [PATCH 7/7] add /compatibility examples to notebooks --- docs/examples/rasterio_backend_example.ipynb | 42 +++++++---------- docs/examples/xarray_backend_example.ipynb | 49 +++++--------------- 2 files changed, 29 insertions(+), 62 deletions(-) diff --git a/docs/examples/rasterio_backend_example.ipynb b/docs/examples/rasterio_backend_example.ipynb index 226922c..451c1c9 100644 --- a/docs/examples/rasterio_backend_example.ipynb +++ b/docs/examples/rasterio_backend_example.ipynb @@ -27,9 +27,10 @@ }, "outputs": [], "source": [ + "import json\n", + "\n", "import earthaccess\n", "import httpx\n", - "import json\n", "\n", "from folium import Map, TileLayer" ] @@ -76,9 +77,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Examine a granule\n", + "## Explore the collection using the `/compatibility` endpoint\n", "\n", - "Each granule contains the data for a single point in time for an MGRS tile. " + "The `/compatibility` endpoint will display information about the collection and return some details about a sample granule. The output is helpful for understanding the structure of the collection and the granules so that you can craft the right set of parameters for visualization or statistics requests." ] }, { @@ -87,27 +88,20 @@ "metadata": {}, "outputs": [], "source": [ - "import earthaccess\n", - "import morecantile\n", - "\n", - "tms = morecantile.tms.get(\"WebMercatorQuad\")\n", - "\n", - "bounds = tms.bounds(62, 44, 7)\n", - "xmin, ymin, xmax, ymax = (round(n, 8) for n in bounds)\n", - "concept_id = \"C2021957295-LPCLOUD\"\n", + "compatibility_response = httpx.get(\n", + " f\"{titiler_endpoint}/compatibility\",\n", + " params={\"concept_id\": concept_id},\n", + " timeout=None,\n", + ").json()\n", "\n", - "results = earthaccess.search_data(\n", - " bounding_box=(xmin, ymin, xmax, ymax),\n", - " count=1,\n", - " concept_id=concept_id,\n", - " temporal=(\"2024-02-11\", \"2024-02-13\"),\n", - ")\n", - "print(\"Granules:\")\n", - "print(results)\n", - "print()\n", - "print(\"Example of COGs URL: \")\n", - "for link in results[0].data_links(access=\"direct\"):\n", - " print(link)" + "print(json.dumps(compatibility_response, indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The details from the sample granule show that it has 18 assets (you would need to look more into what each of the assets represents). To properly configure the assets for titiler-cmr we will need to use the `bands_regex` parameter to identify the bands that we want to be available for visualizations. The `datetime` key shows the reported temporal range from CMR which indicates that the dataset has granules from `2015-11-28` to present." ] }, { @@ -138,7 +132,7 @@ " temporal=(\"2024-02-11\", \"2024-02-13\"),\n", " )\n", "\n", - "print(assets[0])" + "print(json.dumps(assets[0], indent=2))" ] }, { diff --git a/docs/examples/xarray_backend_example.ipynb b/docs/examples/xarray_backend_example.ipynb index 8cc3f8a..ebbf816 100644 --- a/docs/examples/xarray_backend_example.ipynb +++ b/docs/examples/xarray_backend_example.ipynb @@ -30,7 +30,6 @@ "\n", "import earthaccess\n", "import httpx\n", - "import xarray as xr\n", "from folium import Map, TileLayer\n", "\n", "# titiler_endpoint = \"http://localhost:8081\" # docker network endpoint\n", @@ -73,9 +72,9 @@ "id": "2a4cffa6-0059-4033-a708-db60d743f0e3", "metadata": {}, "source": [ - "## Examine a granule\n", + "## Explore the collection using the `/compatibility` endpoint\n", "\n", - "Each granule contains a single day record for the entire globe and has a single data file. " + "The `/compatibility` endpoint will display information about the collection and return some details about a sample granule. The output is helpful for understanding the structure of the collection and the granules so that you can craft the right set of parameters for visualization or statistics requests." ] }, { @@ -85,47 +84,21 @@ "metadata": {}, "outputs": [], "source": [ - "results = earthaccess.search_data(\n", - " count=1,\n", - " concept_id=concept_id,\n", - " temporal=(\"2024-10-12\", \"2024-10-13\"),\n", - ")\n", - "print(\"Granules:\")\n", - "print(results)\n", - "print()\n", - "print(\"Example of NetCDF URL: \")\n", - "for link in results[0].data_links(access=\"external\"):\n", - " print(link)" - ] - }, - { - "cell_type": "markdown", - "id": "eaa3f378-95fa-4c5a-9ccb-24b3064fb5a7", - "metadata": {}, - "source": [ - "## Explore the available variables\n", + "compatibility_response = httpx.get(\n", + " f\"{titiler_endpoint}/compatibility\",\n", + " params={\"concept_id\": concept_id},\n", + " timeout=None,\n", + ").json()\n", "\n", - "The NetCDF file can be opened with xarray using the `h5netcdf` engine. When running outside of AWS region us-west-2 you will need to access the data using \"external\" `https` links (rather than \"direct\" `s3` links). Those links will require authentication which is handled by `earthaccess` as long as you have your Earthdata credentials stored in the `~/.netrc` file!" + "print(json.dumps(compatibility_response, indent=2))" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "61ec4071-bf37-421f-bf58-ac399f827052", + "cell_type": "markdown", + "id": "04014a32-9c11-4b75-b40a-e5ad4efd686b", "metadata": {}, - "outputs": [], "source": [ - "fs = earthaccess.get_fsspec_https_session()\n", - "\n", - "ds = xr.open_dataset(\n", - " fs.open(results[0].data_links(access=\"external\")[0]),\n", - " engine=\"h5netcdf\",\n", - ")\n", - "print(\"Data Variables:\")\n", - "for var in ds.data_vars:\n", - " print(str(var))\n", - "\n", - "display(ds)" + "The details from the sample granule show that it is a NetCDF file with four variables (`analysed_sst`, `analysis_error`, `mask`, and `sea_ice_fraction`) and each contains an array with a single time coordinate. The `datetime` key shows the reported temporal range from CMR which indicates that the dataset has granules from `2002-05-31` to present. For each variable several summary statistics are available to help you craft min/max values for the `rescale` parameter." ] }, {