hao-ai-lab
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
Lines changed: 6 additions & 5 deletions b/‎deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
Lines changed: 6 additions & 5 deletions
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/lib/config.py
Lines changed: 32 additions & 4 deletions b/‎deploy/dynamo/sdk/src/dynamo/sdk/lib/config.py
Lines changed: 32 additions & 4 deletions
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/pipeline.py
Lines changed: 17 additions & 13 deletions b/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/pipeline.py
Lines changed: 17 additions & 13 deletions
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_config.py
Lines changed: 142 additions & 0 deletions b/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_config.py
Lines changed: 142 additions & 0 deletions
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/e2e.py renamed to ‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_e2e.py
Lines changed: 1 addition & 1 deletion b/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/e2e.py renamed to ‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_e2e.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/link.py renamed to ‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_link.py b/‎deploy/dynamo/sdk/src/dynamo/sdk/tests/link.py renamed to ‎deploy/dynamo/sdk/src/dynamo/sdk/tests/test_link.py
diff --git a/‎docs/guides/dynamo_serve.md
Lines changed: 11 additions & 11 deletions b/‎docs/guides/dynamo_serve.md
Lines changed: 11 additions & 11 deletions
diff --git a/‎examples/llm/components/kv_router.py
Lines changed: 1 addition & 1 deletion b/‎examples/llm/components/kv_router.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/llm/configs/agg.yaml
Lines changed: 6 additions & 6 deletions b/‎examples/llm/configs/agg.yaml
Lines changed: 6 additions & 6 deletions
@@ -217,17 +217,17 @@ def serve(
                 # Initialize service_configs as empty dict if it's None
                 # Convert nested YAML structure to flat dict with dot notation
                 for service, configs in yaml_configs.items():
+                    if service not in service_configs:
+                        service_configs[service] = {}
                     for key, value in configs.items():
-                        if service not in service_configs:
-                            service_configs[service] = {}
                         service_configs[service][key] = value
 
         # Process service-specific options
         cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
         for service, configs in cmdline_overrides.items():
+            if service not in service_configs:
+                service_configs[service] = {}
             for key, value in configs.items():
-                if service not in service_configs:
-                    service_configs[service] = {}
                 service_configs[service][key] = value
 
         # Process depends
@@ -243,11 +243,12 @@ def serve(
             rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
             sys.exit(0)
 
+        configure_server_logging()
         # Set environment variable with service configuration
         if service_configs:
+            logger.info(f"Running dynamo serve with service configs {service_configs}")
             os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
 
-        configure_server_logging()
         if working_dir is None:
             if os.path.isdir(os.path.expanduser(bento)):
                 working_dir = os.path.expanduser(bento)
 
@@ -14,8 +14,11 @@
 # limitations under the License.
 
 import json
+import logging
 import os
 
+logger = logging.getLogger(__name__)
+
 
 class ServiceConfig(dict):
     """Configuration store that inherits from dict for simpler access patterns"""
@@ -47,14 +50,27 @@ def require(self, service_name, key):
         return self[service_name][key]
 
     def as_args(self, service_name, prefix=""):
-        """Extract configs as CLI args for a service, with optional prefix filtering"""
+        """Extract configs as CLI args for a service, with optional prefix filtering.
+
+        Every component will additionally have the args in the `Common` configs
+        applied if it has subscribed to that config key, i.e. the given key is provided in
+        the component's `common-configs` setting, and that key has not been overriden by the
+        component's config.
+        """
+        COMMON_CONFIG_SERVICE = "Common"
+        COMMON_CONFIG_KEY = "common-configs"
+
         if service_name not in self:
             return []
 
-        args = []
-        for key, value in self[service_name].items():
+        args: list[str] = []
+
+        def add_to_args(args: list[str], key: str, value):
             if prefix and not key.startswith(prefix):
-                continue
+                return
+
+            if key.endswith(COMMON_CONFIG_KEY):
+                return
 
             # Strip prefix if needed
             arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key
@@ -68,4 +84,16 @@ def as_args(self, service_name, prefix=""):
             else:
                 args.extend([f"--{arg_key}", str(value)])
 
+        if (common := self.get(COMMON_CONFIG_SERVICE)) is not None and (
+            common_config_keys := self[service_name].get(COMMON_CONFIG_KEY)
+        ) is not None:
+            for key in common_config_keys:
+                if key in common and key not in self[service_name]:
+                    add_to_args(args, key, common[key])
+
+        for key, value in self[service_name].items():
+            add_to_args(args, key, value)
+
+        logger.info(f"Running {service_name} with {args=}")
+
         return args
@@ -86,15 +86,15 @@ class Backend2:
     backend = depends(Backend)
 
     def __init__(self) -> None:
-        print("Starting middle2")
+        print("Starting backend2")
 
     @dynamo_endpoint()
     async def generate(self, req: RequestType):
         """Forward requests to backend."""
 
         req_text = req.text
-        print(f"Middle2 received: {req_text}")
-        text = f"{req_text}-mid2"
+        print(f"Backend2 received: {req_text}")
+        text = f"{req_text}-back2"
         next_request = RequestType(text=text).model_dump_json()
         print(next_request)
 
@@ -117,8 +117,17 @@ async def generate(self, req: RequestType):
         req_text = req.text
         print(f"Middle received: {req_text}")
         text = f"{req_text}-mid"
-        for token in text.split():
-            yield f"Mid: {token}"
+
+        txt = RequestType(text=text)
+
+        if self.backend:
+            async for back_resp in self.backend.generate(txt.model_dump_json()):
+                print(f"Frontend received back_resp: {back_resp}")
+                yield f"Frontend: {back_resp}"
+        else:
+            async for back_resp in self.backend2.generate(txt.model_dump_json()):
+                print(f"Frontend received back_resp: {back_resp}")
+                yield f"Frontend: {back_resp}"
 
 
 @service(resources={"cpu": "1"}, traffic={"timeout": 60})
@@ -136,11 +145,6 @@ async def generate(self, text):
         print(f"Frontend received type: {type(text)}")
         txt = RequestType(text=text)
         print(f"Frontend sending: {type(txt)}")
-        if self.backend:
-            async for back_resp in self.backend.generate(txt.model_dump_json()):
-                print(f"Frontend received back_resp: {back_resp}")
-                yield f"Frontend: {back_resp}"
-        else:
-            async for mid_resp in self.middle.generate(txt.model_dump_json()):
-                print(f"Frontend received mid_resp: {mid_resp}")
-                yield f"Frontend: {mid_resp}"
+        async for mid_resp in self.middle.generate(txt.model_dump_json()):
+            print(f"Frontend received mid_resp: {mid_resp}")
+            yield f"Frontend: {mid_resp}"
@@ -0,0 +1,142 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+from dynamo.sdk.lib.config import ServiceConfig
+
+pytestmark = pytest.mark.pre_merge
+
+
+def test_service_config_with_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that includes common-configs
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true,
+            "common-configs": ["model", "block-size", "max-model-len"]
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each common config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+
+def test_service_config_without_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that DOESN'T include common-configs
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are NOT included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that none of the common configs appear in arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" not in vllm_worker_args
+
+
+def test_service_config_with_direct_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with direct configs (no Common section reference)
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "VllmWorker": {
+            "enforce-eager": true,
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        }
+    }
+    """
+
+    # Get arguments and verify direct configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+
+def test_service_config_override_common_configs():
+    # Reset singleton instance
+    ServiceConfig._instance = None
+
+    # Set environment variable with config that includes common-configs
+    # overridden by the subscribing config
+    os.environ[
+        "DYNAMO_SERVICE_CONFIG"
+    ] = """
+    {
+        "Common": {
+            "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+            "block-size": 64,
+            "max-model-len": 16384
+        },
+        "VllmWorker": {
+            "enforce-eager": true,
+            "block-size": 128,
+            "common-configs": ["model", "block-size", "max-model-len"]
+        }
+    }
+    """
+
+    # Get arguments and verify common configs are included
+    service_config = ServiceConfig.get_instance()
+    vllm_worker_args = service_config.as_args("VllmWorker")
+
+    # Check that each common config appears in the arguments
+    for key in ["model", "block-size", "max-model-len"]:
+        assert f"--{key}" in vllm_worker_args
+
+    assert vllm_worker_args[vllm_worker_args.index("--block-size") + 1] == "128"
@@ -79,4 +79,4 @@ async def test_pipeline():
             if attempt == max_retries - 1:
                 raise
             print(f"Attempt {attempt + 1} failed, retrying...")
-            await asyncio.sleep(1)
+            await asyncio.sleep(3)
@@ -162,36 +162,36 @@ This will print out something like
 ```bash
 Service Configuration:
 {
+  "Common": {
+    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+    "block-size": 64,
+    "max-model-len": 16384,
+  },
   "Frontend": {
     "served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
     "endpoint": "dynamo.Processor.chat/completions",
     "port": 8000
   },
   "Processor": {
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-    "block-size": 64,
-    "max-model-len": 16384,
-    "router": "round-robin"
+    "router": "round-robin",
+    "common-configs": [model, block-size, max-model-len]
   },
   "VllmWorker": {
-    "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
     "enforce-eager": true,
-    "block-size": 64,
-    "max-model-len": 16384,
     "max-num-batched-tokens": 16384,
     "enable-prefix-caching": true,
     "router": "random",
     "tensor-parallel-size": 1,
     "ServiceArgs": {
       "workers": 1
-    }
+    },
+    "common-configs": [model, block-size, max-model-len]
   }
 }
 
 Environment Variable that would be set:
-DYNAMO_SERVICE_CONFIG={"Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64,
-"max-model-len": 16384, "router": "round-robin"}, "VllmWorker": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "enforce-eager": true, "block-size": 64, "max-model-len": 16384, "max-num-batched-tokens": 16384, "enable-prefix-caching":
-true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}}}
+DYNAMO_SERVICE_CONFIG={"Common": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64, "max-model-len": 16384}, "Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"router": "round-robin", "common-configs": ["model", "block-size", "max-model-len"]}, "VllmWorker": {"enforce-eager": true, "max-num-batched-tokens": 16384, "enable-prefix-caching":
+true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}, "common-configs": ["model", "block-size", "max-model-len"]}}
 ```
 
 You can override any of these configuration options by passing in CLI flags to serve. For example, to change the routing strategy, you can run
 
@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace:
         help="Minimum number of workers required before proceeding",
     )
     parser.add_argument(
-        "--model-name",
+        "--model",
         type=str,
         default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         help="Model that is being served",
 
@@ -12,23 +12,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  block-size: 64
+  max-model-len: 16384
 
 Frontend:
   served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
   endpoint: dynamo.Processor.chat/completions
   port: 8000
 
 Processor:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  block-size: 64
-  max-model-len: 16384
   router: round-robin
+  common-configs: [model, block-size, max-model-len]
 
 VllmWorker:
-  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
   enforce-eager: true
-  block-size: 64
-  max-model-len: 16384
   max-num-batched-tokens: 16384
   enable-prefix-caching: true
   router: random
@@ -37,3 +36,4 @@ VllmWorker:
     workers: 1
     resources:
       gpu: 1
+  common-configs: [model, block-size, max-model-len]
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace:`
`43`	`43`	`help="Minimum number of workers required before proceeding",`
`44`	`44`	`)`
`45`	`45`	`parser.add_argument(`
`46`		`- "--model-name",`
	`46`	`+ "--model",`
`47`	`47`	`type=str,`
`48`	`48`	`default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",`
`49`	`49`	`help="Model that is being served",`