From 6e4463a6121d144ec8ccdcf76d7b09b9092ec6b5 Mon Sep 17 00:00:00 2001
From: Sourabh S Joshi <sojoshi@nvidia.com>
Date: Mon, 16 Mar 2026 10:57:43 -0700
Subject: [PATCH] Add serve deploy and quickstart guides.

---
 serve/DEPLOY.md                       |  57 +++++++++++
 serve/README.md                       | 138 ++++++++++++++++++++++++++
 serve/server/README_object_storage.md |   6 +-
 3 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 serve/DEPLOY.md
 create mode 100644 serve/README.md
diff --git a/serve/DEPLOY.md b/serve/DEPLOY.md
new file mode 100644
index 000000000..553e6c41e
--- /dev/null
+++ b/serve/DEPLOY.md
@@ -0,0 +1,57 @@
+# Lepton.AI deployment
+
+## Using the Lepton.AI Dashboard
+
+We will use the Lepton.AI dashboard to start the inference service.
+Please refer to your onboarding instructions to get access to this dashboard.
+
+The dashboard has an `Endpoints` tab on the top.
+This is used to deploy long running services such as inference.
+
+* Click on the `Endpoints` tab, then click on the `Create Endpoint` button on the right hand side.
+* Choose the `Create from Container Image` option.
+* Set an appropriate Endpoint name.
+* Resource:
+  * Choose the GPU option. Currently we only support x1 GPU, but this will change in the future.
+  * Choose any preemption policy.
+* Image Configuration:
+  * Set your custom docker image, or use one of the prebuilt tags as appropriate.
+  * Set server port to 8000 for the inference container, and 8888 for the jupyter container.
+  * A registry auth might need to be created to access a private registry. If so, supply it here.
+  * For the custom command, refer to the [Custom Command](#custom-command) section.
+* Access Tokens:
+  * If required, we can create a new access token for authorization.
+  * If one is created, then it will need to be supplied while calling the REST APIs using the header
+    `-H "Authorization: Bearer ${TOKEN}"`.
+* Environment variables and secrets can be provided if necessary (e.g. WANDB_API_KEY).
+* Storage:
+  * The inference container expects a mount for `/outputs`. Set this in the `Mount Path`.
+  * During onboarding, your project is provided with some NFS storage at a certain path.
+    You can provide a sub-directory within this path in the `From path`.
+  * Volume should be `lepton-shared-fs` or `amlfs`.
+* Click `Create` to create this endpoint. Choose 1 replica.
+
+Once the endpoint scales and is ready, you can start sending REST API requests to it.
+
+### Custom Command
+
+The docker image as built from the default Dockerfile comes preset with the command to run the
+service.
+If the default settings in `serve/server/conf/config.yaml` are fine, then you can leave this
+section below blank.
+If you wish to override certain settings with env vars or have some custom setup of your own,
+then provide those here.
+
+```bash
+#!/bin/bash
+<Additional custom setup if needed>
+```
+
+## Debugging and logs
+
+We can click on the Endpoint -> Replicas to bring up some additional options.
+
+* Clicking on `API` brings up an option to run the various REST APIs.
+  For e.g. health check, or list inference requests, etc.
+* Clicking on `Terminal` for the specific replica opens a Terminal into the container.
+* Click on `Logs` shows a live stream of the current logs (slightly delayed).
diff --git a/serve/README.md b/serve/README.md
new file mode 100644
index 000000000..3241d1a3f
--- /dev/null
+++ b/serve/README.md
@@ -0,0 +1,138 @@
+# Quickstart guide
+
+## Developer quickstart
+
+Developers who have Earth2Studio installed on a gpu-enabled system can easily get started with the
+inference platform as follows.
+For developers who prefer to test using a container with requirements pre-installed,
+please refer to the section [Container Builds](#container-builds) below.
+
+* Install redis
+
+  ```bash
+  apt update && apt install redis
+  ```
+
+* Install requirements for the inference server
+
+  ```bash
+  cd server
+  pip install -r requirements.txt
+  ```
+
+* The default Dockerfile CMD starts up the inference server.
+
+* Check health
+
+  ```bash
+  curl localhost:8000/health
+  ```
+
+### Creating and testing a custom workflow locally
+
+* Use the Earth2Workflow base class to develop the inference workflows.
+  Examples are shown in the files: server/example_workflows/deterministic_earth2_workflow.py.
+
+An example of a locally tested custom_workflow is shown below.
+
+```python
+"""
+Deterministic Workflow Custom Pipeline
+
+This pipeline implements the deterministic workflow from examples/01_deterministic_workflow.py
+as a custom pipeline that can be invoked via the REST API.
+"""
+
+from datetime import datetime
+from typing import Literal
+
+from earth2studio import run
+from earth2studio.data import GFS
+from earth2studio.io import IOBackend
+from earth2studio.models.px import DLWP, FCN
+from earth2studio.serve.server import Earth2Workflow, workflow_registry
+
+
+@workflow_registry.register
+class DeterministicEarth2Workflow(Earth2Workflow):
+    """
+    Deterministic workflow with auto-registration
+    """
+
+    name = "deterministic_earth2_workflow"
+    description = "Deterministic workflow with auto-registration"
+
+    def __init__(self, model_type: Literal["fcn", "dlwp"] = "fcn"):
+        super().__init__()
+
+        if model_type == "fcn":
+            package = FCN.load_default_package()
+            self.model = FCN.load_model(package)
+        elif model_type == "dlwp":
+            package = DLWP.load_default_package()
+            self.model = DLWP.load_model(package)
+        else:
+            raise ValueError(f"Unsupported model type: {model_type}")
+
+        self.data = GFS()
+
+    def __call__(
+        self,
+        io: IOBackend,
+        start_time: list[datetime] = [datetime(2024, 1, 1, 0)],
+        num_steps: int = 20,
+    ):
+        """Run the deterministic workflow pipeline"""
+
+        run.deterministic(start_time, num_steps, self.model, self.data, io)
+
+print("initializing ")
+model = DeterministicEarth2Workflow()
+print("calling model")
+from earth2studio.io import ZarrBackend
+io = ZarrBackend()
+model(io)
+```
+
+It is run as follows without needing to start redis etc.
+
+```bash
+python serve/server/example_workflows/custom_workflow.py
+```
+
+* Refer to these READMEs [Earth2Workflow](./server/README_earth2workflows.md),
+  [Workflow](./server/README_workflows.md)
+
+## Container builds
+
+The Earth2Studio parent directory contains Dockerfiles that let you build the inference service
+for deployment onto Lepton.AI.
+
+### Inference Container
+
+The inference container can be built from the [Dockerfile](./Dockerfile).
+
+Alternatively, the prebuilt container images can be used from the
+[NGC registry][ngc-registry] after onboarding.
+
+<!-- markdownlint-disable-next-line MD013 -->
+[ngc-registry]: https://registry.ngc.nvidia.com/orgs/dycvht5ows21/containers/earth2studio-scicomp/tags
+
+## Lepton.AI onboarding
+
+Please talk to your NVIDIA contact or TAM to get onboarded onto the Lepton.AI cluster.
+
+## Lepton.AI deployment
+
+Please see the [deployment guide](DEPLOY.md) for instructions on how to set up the inference
+service on your Lepton.AI endpoint.
+
+## Using the inference service
+
+Once you set up your inference endpoint, you may either call the services directly through REST
+APIs or you may use the client SDK.
+
+## Writing custom inference workflows
+
+You may port more [predefined examples](../examples) or write your own custom workflows using the
+[custom workflows](server/README_workflows.md) guide.
diff --git a/serve/server/README_object_storage.md b/serve/server/README_object_storage.md
index b455137d1..1e84d107b 100644
--- a/serve/server/README_object_storage.md
+++ b/serve/server/README_object_storage.md
@@ -188,7 +188,7 @@ When object storage is enabled, the workflow result metadata includes additional
 The Python client SDK handles storage type automatically:
 
 ```python
-from api_client.e2client import RemoteEarth2Workflow
+from earth2studio.serve.client.e2client import RemoteEarth2Workflow
 
 workflow = RemoteEarth2Workflow(api_url, workflow_name="deterministic_earth2_workflow")
 
@@ -202,7 +202,7 @@ ds = result.as_dataset()  # Automatically fetches from S3 if configured
 The `Earth2StudioClient.download_result()` method handles both storage types:
 
 ```python
-from api_client.client import Earth2StudioClient, InferenceRequest
+from earth2studio.serve.client.client import Earth2StudioClient, InferenceRequest
 
 client = Earth2StudioClient(api_url, workflow_name="deterministic_earth2_workflow")
 request_result = client.run_inference_sync(
@@ -243,7 +243,7 @@ The client provides an fsspec mapper for opening Zarr stores directly:
 
 ```python
 import xarray as xr
-from api_client.object_storage import create_cloudfront_mapper
+from earth2studio.serve.client.fsspec_utils import create_cloudfront_mapper
 
 # Create a mapper from the signed URL
 mapper = create_cloudfront_mapper(request_result.signed_url, zarr_path="results.zarr")