From 6e4463a6121d144ec8ccdcf76d7b09b9092ec6b5 Mon Sep 17 00:00:00 2001 From: Sourabh S Joshi Date: Mon, 16 Mar 2026 10:57:43 -0700 Subject: [PATCH] Add serve deploy and quickstart guides. --- serve/DEPLOY.md | 57 +++++++++++ serve/README.md | 138 ++++++++++++++++++++++++++ serve/server/README_object_storage.md | 6 +- 3 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 serve/DEPLOY.md create mode 100644 serve/README.md diff --git a/serve/DEPLOY.md b/serve/DEPLOY.md new file mode 100644 index 000000000..553e6c41e --- /dev/null +++ b/serve/DEPLOY.md @@ -0,0 +1,57 @@ +# Lepton.AI deployment + +## Using the Lepton.AI Dashboard + +We will use the Lepton.AI dashboard to start the inference service. +Please refer to your onboarding instructions to get access to this dashboard. + +The dashboard has an `Endpoints` tab on the top. +This is used to deploy long running services such as inference. + +* Click on the `Endpoints` tab, then click on the `Create Endpoint` button on the right hand side. +* Choose the `Create from Container Image` option. +* Set an appropriate Endpoint name. +* Resource: + * Choose the GPU option. Currently we only support x1 GPU, but this will change in the future. + * Choose any preemption policy. +* Image Configuration: + * Set your custom docker image, or use one of the prebuilt tags as appropriate. + * Set server port to 8000 for the inference container, and 8888 for the jupyter container. + * A registry auth might need to be created to access a private registry. If so, supply it here. + * For the custom command, refer to the [Custom Command](#custom-command) section. +* Access Tokens: + * If required, we can create a new access token for authorization. + * If one is created, then it will need to be supplied while calling the REST APIs using the header + `-H "Authorization: Bearer ${TOKEN}"`. +* Environment variables and secrets can be provided if necessary (e.g. WANDB_API_KEY). +* Storage: + * The inference container expects a mount for `/outputs`. Set this in the `Mount Path`. + * During onboarding, your project is provided with some NFS storage at a certain path. + You can provide a sub-directory within this path in the `From path`. + * Volume should be `lepton-shared-fs` or `amlfs`. +* Click `Create` to create this endpoint. Choose 1 replica. + +Once the endpoint scales and is ready, you can start sending REST API requests to it. + +### Custom Command + +The docker image as built from the default Dockerfile comes preset with the command to run the +service. +If the default settings in `serve/server/conf/config.yaml` are fine, then you can leave this +section below blank. +If you wish to override certain settings with env vars or have some custom setup of your own, +then provide those here. + +```bash +#!/bin/bash + +``` + +## Debugging and logs + +We can click on the Endpoint -> Replicas to bring up some additional options. + +* Clicking on `API` brings up an option to run the various REST APIs. + For e.g. health check, or list inference requests, etc. +* Clicking on `Terminal` for the specific replica opens a Terminal into the container. +* Click on `Logs` shows a live stream of the current logs (slightly delayed). diff --git a/serve/README.md b/serve/README.md new file mode 100644 index 000000000..3241d1a3f --- /dev/null +++ b/serve/README.md @@ -0,0 +1,138 @@ +# Quickstart guide + +## Developer quickstart + +Developers who have Earth2Studio installed on a gpu-enabled system can easily get started with the +inference platform as follows. +For developers who prefer to test using a container with requirements pre-installed, +please refer to the section [Container Builds](#container-builds) below. + +* Install redis + + ```bash + apt update && apt install redis + ``` + +* Install requirements for the inference server + + ```bash + cd server + pip install -r requirements.txt + ``` + +* The default Dockerfile CMD starts up the inference server. + +* Check health + + ```bash + curl localhost:8000/health + ``` + +### Creating and testing a custom workflow locally + +* Use the Earth2Workflow base class to develop the inference workflows. + Examples are shown in the files: server/example_workflows/deterministic_earth2_workflow.py. + +An example of a locally tested custom_workflow is shown below. + +```python +""" +Deterministic Workflow Custom Pipeline + +This pipeline implements the deterministic workflow from examples/01_deterministic_workflow.py +as a custom pipeline that can be invoked via the REST API. +""" + +from datetime import datetime +from typing import Literal + +from earth2studio import run +from earth2studio.data import GFS +from earth2studio.io import IOBackend +from earth2studio.models.px import DLWP, FCN +from earth2studio.serve.server import Earth2Workflow, workflow_registry + + +@workflow_registry.register +class DeterministicEarth2Workflow(Earth2Workflow): + """ + Deterministic workflow with auto-registration + """ + + name = "deterministic_earth2_workflow" + description = "Deterministic workflow with auto-registration" + + def __init__(self, model_type: Literal["fcn", "dlwp"] = "fcn"): + super().__init__() + + if model_type == "fcn": + package = FCN.load_default_package() + self.model = FCN.load_model(package) + elif model_type == "dlwp": + package = DLWP.load_default_package() + self.model = DLWP.load_model(package) + else: + raise ValueError(f"Unsupported model type: {model_type}") + + self.data = GFS() + + def __call__( + self, + io: IOBackend, + start_time: list[datetime] = [datetime(2024, 1, 1, 0)], + num_steps: int = 20, + ): + """Run the deterministic workflow pipeline""" + + run.deterministic(start_time, num_steps, self.model, self.data, io) + +print("initializing ") +model = DeterministicEarth2Workflow() +print("calling model") +from earth2studio.io import ZarrBackend +io = ZarrBackend() +model(io) +``` + +It is run as follows without needing to start redis etc. + +```bash +python serve/server/example_workflows/custom_workflow.py +``` + +* Refer to these READMEs [Earth2Workflow](./server/README_earth2workflows.md), + [Workflow](./server/README_workflows.md) + +## Container builds + +The Earth2Studio parent directory contains Dockerfiles that let you build the inference service +for deployment onto Lepton.AI. + +### Inference Container + +The inference container can be built from the [Dockerfile](./Dockerfile). + +Alternatively, the prebuilt container images can be used from the +[NGC registry][ngc-registry] after onboarding. + + +[ngc-registry]: https://registry.ngc.nvidia.com/orgs/dycvht5ows21/containers/earth2studio-scicomp/tags + +## Lepton.AI onboarding + +Please talk to your NVIDIA contact or TAM to get onboarded onto the Lepton.AI cluster. + +## Lepton.AI deployment + +Please see the [deployment guide](DEPLOY.md) for instructions on how to set up the inference +service on your Lepton.AI endpoint. + +## Using the inference service + +Once you set up your inference endpoint, you may either call the services directly through REST +APIs or you may use the client SDK. + +## Writing custom inference workflows + +You may port more [predefined examples](../examples) or write your own custom workflows using the +[custom workflows](server/README_workflows.md) guide. diff --git a/serve/server/README_object_storage.md b/serve/server/README_object_storage.md index b455137d1..1e84d107b 100644 --- a/serve/server/README_object_storage.md +++ b/serve/server/README_object_storage.md @@ -188,7 +188,7 @@ When object storage is enabled, the workflow result metadata includes additional The Python client SDK handles storage type automatically: ```python -from api_client.e2client import RemoteEarth2Workflow +from earth2studio.serve.client.e2client import RemoteEarth2Workflow workflow = RemoteEarth2Workflow(api_url, workflow_name="deterministic_earth2_workflow") @@ -202,7 +202,7 @@ ds = result.as_dataset() # Automatically fetches from S3 if configured The `Earth2StudioClient.download_result()` method handles both storage types: ```python -from api_client.client import Earth2StudioClient, InferenceRequest +from earth2studio.serve.client.client import Earth2StudioClient, InferenceRequest client = Earth2StudioClient(api_url, workflow_name="deterministic_earth2_workflow") request_result = client.run_inference_sync( @@ -243,7 +243,7 @@ The client provides an fsspec mapper for opening Zarr stores directly: ```python import xarray as xr -from api_client.object_storage import create_cloudfront_mapper +from earth2studio.serve.client.fsspec_utils import create_cloudfront_mapper # Create a mapper from the signed URL mapper = create_cloudfront_mapper(request_result.signed_url, zarr_path="results.zarr")