Skip to content

Commit 0292feb

Browse files
feat: Extract Common Configs + Log Configs on Init + Add test_ to sdk/tests filenames required for pytest (ai-dynamo#434)
Co-authored-by: ishandhanani <[email protected]>
1 parent 0186aa7 commit 0292feb

File tree

13 files changed

+244
-75
lines changed

13 files changed

+244
-75
lines changed

deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,17 @@ def serve(
217217
# Initialize service_configs as empty dict if it's None
218218
# Convert nested YAML structure to flat dict with dot notation
219219
for service, configs in yaml_configs.items():
220+
if service not in service_configs:
221+
service_configs[service] = {}
220222
for key, value in configs.items():
221-
if service not in service_configs:
222-
service_configs[service] = {}
223223
service_configs[service][key] = value
224224

225225
# Process service-specific options
226226
cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
227227
for service, configs in cmdline_overrides.items():
228+
if service not in service_configs:
229+
service_configs[service] = {}
228230
for key, value in configs.items():
229-
if service not in service_configs:
230-
service_configs[service] = {}
231231
service_configs[service][key] = value
232232

233233
# Process depends
@@ -243,11 +243,12 @@ def serve(
243243
rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
244244
sys.exit(0)
245245

246+
configure_server_logging()
246247
# Set environment variable with service configuration
247248
if service_configs:
249+
logger.info(f"Running dynamo serve with service configs {service_configs}")
248250
os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
249251

250-
configure_server_logging()
251252
if working_dir is None:
252253
if os.path.isdir(os.path.expanduser(bento)):
253254
working_dir = os.path.expanduser(bento)

deploy/dynamo/sdk/src/dynamo/sdk/lib/config.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
# limitations under the License.
1515

1616
import json
17+
import logging
1718
import os
1819

20+
logger = logging.getLogger(__name__)
21+
1922

2023
class ServiceConfig(dict):
2124
"""Configuration store that inherits from dict for simpler access patterns"""
@@ -47,14 +50,27 @@ def require(self, service_name, key):
4750
return self[service_name][key]
4851

4952
def as_args(self, service_name, prefix=""):
50-
"""Extract configs as CLI args for a service, with optional prefix filtering"""
53+
"""Extract configs as CLI args for a service, with optional prefix filtering.
54+
55+
Every component will additionally have the args in the `Common` configs
56+
applied if it has subscribed to that config key, i.e. the given key is provided in
57+
the component's `common-configs` setting, and that key has not been overriden by the
58+
component's config.
59+
"""
60+
COMMON_CONFIG_SERVICE = "Common"
61+
COMMON_CONFIG_KEY = "common-configs"
62+
5163
if service_name not in self:
5264
return []
5365

54-
args = []
55-
for key, value in self[service_name].items():
66+
args: list[str] = []
67+
68+
def add_to_args(args: list[str], key: str, value):
5669
if prefix and not key.startswith(prefix):
57-
continue
70+
return
71+
72+
if key.endswith(COMMON_CONFIG_KEY):
73+
return
5874

5975
# Strip prefix if needed
6076
arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key
@@ -68,4 +84,16 @@ def as_args(self, service_name, prefix=""):
6884
else:
6985
args.extend([f"--{arg_key}", str(value)])
7086

87+
if (common := self.get(COMMON_CONFIG_SERVICE)) is not None and (
88+
common_config_keys := self[service_name].get(COMMON_CONFIG_KEY)
89+
) is not None:
90+
for key in common_config_keys:
91+
if key in common and key not in self[service_name]:
92+
add_to_args(args, key, common[key])
93+
94+
for key, value in self[service_name].items():
95+
add_to_args(args, key, value)
96+
97+
logger.info(f"Running {service_name} with {args=}")
98+
7199
return args

deploy/dynamo/sdk/src/dynamo/sdk/tests/pipeline.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ class Backend2:
8686
backend = depends(Backend)
8787

8888
def __init__(self) -> None:
89-
print("Starting middle2")
89+
print("Starting backend2")
9090

9191
@dynamo_endpoint()
9292
async def generate(self, req: RequestType):
9393
"""Forward requests to backend."""
9494

9595
req_text = req.text
96-
print(f"Middle2 received: {req_text}")
97-
text = f"{req_text}-mid2"
96+
print(f"Backend2 received: {req_text}")
97+
text = f"{req_text}-back2"
9898
next_request = RequestType(text=text).model_dump_json()
9999
print(next_request)
100100

@@ -117,8 +117,17 @@ async def generate(self, req: RequestType):
117117
req_text = req.text
118118
print(f"Middle received: {req_text}")
119119
text = f"{req_text}-mid"
120-
for token in text.split():
121-
yield f"Mid: {token}"
120+
121+
txt = RequestType(text=text)
122+
123+
if self.backend:
124+
async for back_resp in self.backend.generate(txt.model_dump_json()):
125+
print(f"Frontend received back_resp: {back_resp}")
126+
yield f"Frontend: {back_resp}"
127+
else:
128+
async for back_resp in self.backend2.generate(txt.model_dump_json()):
129+
print(f"Frontend received back_resp: {back_resp}")
130+
yield f"Frontend: {back_resp}"
122131

123132

124133
@service(resources={"cpu": "1"}, traffic={"timeout": 60})
@@ -136,11 +145,6 @@ async def generate(self, text):
136145
print(f"Frontend received type: {type(text)}")
137146
txt = RequestType(text=text)
138147
print(f"Frontend sending: {type(txt)}")
139-
if self.backend:
140-
async for back_resp in self.backend.generate(txt.model_dump_json()):
141-
print(f"Frontend received back_resp: {back_resp}")
142-
yield f"Frontend: {back_resp}"
143-
else:
144-
async for mid_resp in self.middle.generate(txt.model_dump_json()):
145-
print(f"Frontend received mid_resp: {mid_resp}")
146-
yield f"Frontend: {mid_resp}"
148+
async for mid_resp in self.middle.generate(txt.model_dump_json()):
149+
print(f"Frontend received mid_resp: {mid_resp}")
150+
yield f"Frontend: {mid_resp}"
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import os
17+
18+
import pytest
19+
20+
from dynamo.sdk.lib.config import ServiceConfig
21+
22+
pytestmark = pytest.mark.pre_merge
23+
24+
25+
def test_service_config_with_common_configs():
26+
# Reset singleton instance
27+
ServiceConfig._instance = None
28+
29+
# Set environment variable with config that includes common-configs
30+
os.environ[
31+
"DYNAMO_SERVICE_CONFIG"
32+
] = """
33+
{
34+
"Common": {
35+
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
36+
"block-size": 64,
37+
"max-model-len": 16384
38+
},
39+
"VllmWorker": {
40+
"enforce-eager": true,
41+
"common-configs": ["model", "block-size", "max-model-len"]
42+
}
43+
}
44+
"""
45+
46+
# Get arguments and verify common configs are included
47+
service_config = ServiceConfig.get_instance()
48+
vllm_worker_args = service_config.as_args("VllmWorker")
49+
50+
# Check that each common config appears in the arguments
51+
for key in ["model", "block-size", "max-model-len"]:
52+
assert f"--{key}" in vllm_worker_args
53+
54+
55+
def test_service_config_without_common_configs():
56+
# Reset singleton instance
57+
ServiceConfig._instance = None
58+
59+
# Set environment variable with config that DOESN'T include common-configs
60+
os.environ[
61+
"DYNAMO_SERVICE_CONFIG"
62+
] = """
63+
{
64+
"Common": {
65+
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
66+
"block-size": 64,
67+
"max-model-len": 16384
68+
},
69+
"VllmWorker": {
70+
"enforce-eager": true
71+
}
72+
}
73+
"""
74+
75+
# Get arguments and verify common configs are NOT included
76+
service_config = ServiceConfig.get_instance()
77+
vllm_worker_args = service_config.as_args("VllmWorker")
78+
79+
# Check that none of the common configs appear in arguments
80+
for key in ["model", "block-size", "max-model-len"]:
81+
assert f"--{key}" not in vllm_worker_args
82+
83+
84+
def test_service_config_with_direct_configs():
85+
# Reset singleton instance
86+
ServiceConfig._instance = None
87+
88+
# Set environment variable with direct configs (no Common section reference)
89+
os.environ[
90+
"DYNAMO_SERVICE_CONFIG"
91+
] = """
92+
{
93+
"VllmWorker": {
94+
"enforce-eager": true,
95+
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
96+
"block-size": 64,
97+
"max-model-len": 16384
98+
}
99+
}
100+
"""
101+
102+
# Get arguments and verify direct configs are included
103+
service_config = ServiceConfig.get_instance()
104+
vllm_worker_args = service_config.as_args("VllmWorker")
105+
106+
# Check that each config appears in the arguments
107+
for key in ["model", "block-size", "max-model-len"]:
108+
assert f"--{key}" in vllm_worker_args
109+
110+
111+
def test_service_config_override_common_configs():
112+
# Reset singleton instance
113+
ServiceConfig._instance = None
114+
115+
# Set environment variable with config that includes common-configs
116+
# overridden by the subscribing config
117+
os.environ[
118+
"DYNAMO_SERVICE_CONFIG"
119+
] = """
120+
{
121+
"Common": {
122+
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
123+
"block-size": 64,
124+
"max-model-len": 16384
125+
},
126+
"VllmWorker": {
127+
"enforce-eager": true,
128+
"block-size": 128,
129+
"common-configs": ["model", "block-size", "max-model-len"]
130+
}
131+
}
132+
"""
133+
134+
# Get arguments and verify common configs are included
135+
service_config = ServiceConfig.get_instance()
136+
vllm_worker_args = service_config.as_args("VllmWorker")
137+
138+
# Check that each common config appears in the arguments
139+
for key in ["model", "block-size", "max-model-len"]:
140+
assert f"--{key}" in vllm_worker_args
141+
142+
assert vllm_worker_args[vllm_worker_args.index("--block-size") + 1] == "128"

deploy/dynamo/sdk/src/dynamo/sdk/tests/e2e.py renamed to deploy/dynamo/sdk/src/dynamo/sdk/tests/test_e2e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,4 @@ async def test_pipeline():
7979
if attempt == max_retries - 1:
8080
raise
8181
print(f"Attempt {attempt + 1} failed, retrying...")
82-
await asyncio.sleep(1)
82+
await asyncio.sleep(3)

docs/guides/dynamo_serve.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,36 +162,36 @@ This will print out something like
162162
```bash
163163
Service Configuration:
164164
{
165+
"Common": {
166+
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
167+
"block-size": 64,
168+
"max-model-len": 16384,
169+
},
165170
"Frontend": {
166171
"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
167172
"endpoint": "dynamo.Processor.chat/completions",
168173
"port": 8000
169174
},
170175
"Processor": {
171-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
172-
"block-size": 64,
173-
"max-model-len": 16384,
174-
"router": "round-robin"
176+
"router": "round-robin",
177+
"common-configs": [model, block-size, max-model-len]
175178
},
176179
"VllmWorker": {
177-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
178180
"enforce-eager": true,
179-
"block-size": 64,
180-
"max-model-len": 16384,
181181
"max-num-batched-tokens": 16384,
182182
"enable-prefix-caching": true,
183183
"router": "random",
184184
"tensor-parallel-size": 1,
185185
"ServiceArgs": {
186186
"workers": 1
187-
}
187+
},
188+
"common-configs": [model, block-size, max-model-len]
188189
}
189190
}
190191

191192
Environment Variable that would be set:
192-
DYNAMO_SERVICE_CONFIG={"Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64,
193-
"max-model-len": 16384, "router": "round-robin"}, "VllmWorker": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "enforce-eager": true, "block-size": 64, "max-model-len": 16384, "max-num-batched-tokens": 16384, "enable-prefix-caching":
194-
true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}}}
193+
DYNAMO_SERVICE_CONFIG={"Common": {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "block-size": 64, "max-model-len": 16384}, "Frontend": {"served_model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "endpoint": "dynamo.Processor.chat/completions", "port": 8000}, "Processor": {"router": "round-robin", "common-configs": ["model", "block-size", "max-model-len"]}, "VllmWorker": {"enforce-eager": true, "max-num-batched-tokens": 16384, "enable-prefix-caching":
194+
true, "router": "random", "tensor-parallel-size": 1, "ServiceArgs": {"workers": 1}, "common-configs": ["model", "block-size", "max-model-len"]}}
195195
```
196196

197197
You can override any of these configuration options by passing in CLI flags to serve. For example, to change the routing strategy, you can run

examples/llm/components/kv_router.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def parse_args(service_name, prefix) -> Namespace:
4343
help="Minimum number of workers required before proceeding",
4444
)
4545
parser.add_argument(
46-
"--model-name",
46+
"--model",
4747
type=str,
4848
default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
4949
help="Model that is being served",

examples/llm/configs/agg.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,22 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
Common:
16+
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
17+
block-size: 64
18+
max-model-len: 16384
1519

1620
Frontend:
1721
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
1822
endpoint: dynamo.Processor.chat/completions
1923
port: 8000
2024

2125
Processor:
22-
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
23-
block-size: 64
24-
max-model-len: 16384
2526
router: round-robin
27+
common-configs: [model, block-size, max-model-len]
2628

2729
VllmWorker:
28-
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
2930
enforce-eager: true
30-
block-size: 64
31-
max-model-len: 16384
3231
max-num-batched-tokens: 16384
3332
enable-prefix-caching: true
3433
router: random
@@ -37,3 +36,4 @@ VllmWorker:
3736
workers: 1
3837
resources:
3938
gpu: 1
39+
common-configs: [model, block-size, max-model-len]

0 commit comments

Comments
 (0)