Skip to content

Update guidellm #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 91 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
91 commits
Select commit Hold shift + click to select a range
f6aa8fe
simple change
Jun 27, 2025
18f41b1
test lmeval change
Jun 27, 2025
a425d43
update branch
Jun 27, 2025
6fc29f4
use main
Jun 27, 2025
956a12b
remove gcs
Jun 27, 2025
5e09fb7
readd gc
Jun 27, 2025
655f00e
remove gc
Jun 27, 2025
ba703b0
back to guidellm
Jun 27, 2025
b4deac8
simplified
Jun 27, 2025
6ed6862
simple vllm
Jun 27, 2025
b3f55bc
skip vllm
Jun 27, 2025
3a709da
pause vllm
Jun 27, 2025
02cac57
update benchmark report
Jun 27, 2025
a85bb4f
update ip
Jun 27, 2025
c3af0cf
update branch
Jun 27, 2025
ede7482
added base task param
Jun 27, 2025
87496ea
retry branch name
Jun 27, 2025
b64ffd8
repo branch
Jun 27, 2025
7dc5e48
readd branch
Jun 27, 2025
2d05c64
branch in base task
Jun 27, 2025
60e6e9e
optional branch
Jun 27, 2025
ee4d7c9
add branch choice
Jun 27, 2025
998a8bc
include benchmark
Jun 27, 2025
6944cb4
refactor default
Jun 27, 2025
6e4a5d5
moved generate text
Jun 27, 2025
41f3f21
test
Jun 30, 2025
850fd21
add debug
Jun 30, 2025
5e87674
add os lib
Jun 30, 2025
c9b63a8
use default scenario
Jun 30, 2025
4d68ea8
benchmark with scenario
Jun 30, 2025
0f07b28
overlap with guidellm vars
Jun 30, 2025
6a67050
check model and target
Jun 30, 2025
72094b4
add debugs
Jun 30, 2025
10180a3
list keys that overlap
Jun 30, 2025
9191f13
only replace model
Jun 30, 2025
1b0e4a4
update with scenario
Jun 30, 2025
7515a61
readd default scenario
Jun 30, 2025
e6318f5
readd default scenario
Jun 30, 2025
9f61d6e
pin to main
Jun 30, 2025
8c8c23e
readd vllm server
Jul 1, 2025
ec725d1
updated vllm server
Jul 1, 2025
5b22309
print the input vars
Jul 1, 2025
5e8053a
remove gpu count
Jul 1, 2025
af3ebaa
simple path
Jul 1, 2025
5c4f5b8
vllm print
Jul 1, 2025
b8a1e9f
added cwd
Jul 1, 2025
0365496
ensure setup uses branch
Jul 1, 2025
348fd82
add guide again
Jul 1, 2025
cb882af
readd gpu count
Jul 1, 2025
464591e
update vllm server
Jul 1, 2025
c0d0dba
revert target
Jul 1, 2025
81c62f7
install editable guidellm
Jul 1, 2025
97e36cb
print package list
Jul 1, 2025
063c8b9
added package print
Jul 1, 2025
d6ef266
older guidellm
Jul 1, 2025
8c64910
updated to use dev branch
Jul 2, 2025
7dee38b
redo with custom branch
Jul 2, 2025
263c2ff
repo override
Jul 2, 2025
90e461b
add packages to guidellm
Jul 2, 2025
4f00a5a
update setup.py
Jul 2, 2025
14f84ce
readd
Jul 2, 2025
ad2b423
before vllm
Jul 2, 2025
98eb6f8
removed vllm
Jul 2, 2025
10874d3
remove vllm
Jul 2, 2025
629d195
cleanup
Jul 2, 2025
768d135
back to base
Jul 2, 2025
09c3978
readd
Jul 2, 2025
e64fb12
readd start vllm server
Jul 2, 2025
873c222
use guidellm branch
Jul 2, 2025
16b83bc
base complete
Jul 2, 2025
432031e
test rag
Jul 2, 2025
e9117ea
clean up
Jul 2, 2025
9984a8c
base package as variable
Jul 2, 2025
b8b51e9
test default branch change
Jul 2, 2025
b99afec
update branch names
Jul 2, 2025
b2c2918
use main branch in config
Jul 2, 2025
d1e686b
print the scenario
Jul 2, 2025
5d3e3ff
modify tokens
Jul 2, 2025
3b0d86c
revert lmeval and setup.py, update vllm server log
Jul 3, 2025
a2d6eb5
readd default scenarios
Jul 3, 2025
81f5199
change default guidellm json
Jul 3, 2025
1550333
add config examples json
Jul 3, 2025
420137d
use original default
Jul 3, 2025
9d284c9
add log
Jul 3, 2025
e863516
include user scenario
Jul 3, 2025
3703e62
revert lmeval example
Jul 3, 2025
d1b985a
add file error handling
Jul 3, 2025
e60aab1
removed package prints
Jul 3, 2025
515a1db
default config
Jul 3, 2025
ac9ef63
readd output path
Jul 3, 2025
69638ea
onpremise settings
Jul 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions examples/guidellm_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
GUIDELLM__MAX_CONCURRENCY=256,
GUIDELLM__REQUEST_TIMEOUT=21600,
target="http://localhost:8000/v1",
data_type="emulated",
max_seconds=30,
data="prompt_tokens=512,generated_tokens=256",
#scenario = "benchmarking_32k",
data="prompt_tokens=128,output_tokens=128",
branch = "update_guidellm",
vllm_kwargs={"enable-chunked-prefill": True}
)

task.execute_remotely("oneshot-a100x1")
#task.execute_locally()
#task.execute_locally()
4 changes: 2 additions & 2 deletions examples/lmeval_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
model_id="meta-llama/Llama-3.2-1B-Instruct",
tasks="gsm8k",
model_args="dtype=auto,max_model_len=8192",
batch_size="auto",
batch_size="auto",
)

task.execute_remotely("oneshot-a100x1")
#task.execute_locally()
#task.execute_locally()
6 changes: 4 additions & 2 deletions src/automation/configs.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest"
DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
DEFAULT_RESEARCH_BRANCH = "main"
DEFAULT_GUIDELLM_SCENARIO = "chat"
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_128k.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 128000,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 128000,
"output_tokens": 2048,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 2048
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_16k.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 16000,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 16000,
"output_tokens": 2048,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 2048
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_32k.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 32000,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 32000,
"output_tokens": 2048,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 2048
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_64k.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 64000,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 64000,
"output_tokens": 2048,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 2048
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_chat.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 512,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 512,
"output_tokens": 256,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 256
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 256,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 256,
"output_tokens": 1024,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 1024
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 1024,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 1024,
"output_tokens": 1024,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 1024
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 768,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 768,
"output_tokens": 128,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 128
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 256,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 256,
"output_tokens": 128,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 128
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_long_rag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 10240,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 10240,
"output_tokens": 1536,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 1536
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/benchmarking_rag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 1024,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 1024,
"output_tokens": 128,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 128
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 1024,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 1024,
"output_tokens": 128,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 128
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/chat.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 512,
"prompt_tokens_stdev": 128,
"prompt_tokens_min": 1,
"prompt_tokens_max": 1024,
"output_tokens": 256,
"output_tokens_stdev": 64,
"output_tokens_min": 1,
"output_tokens_max": 1024
}
}
13 changes: 13 additions & 0 deletions src/automation/standards/benchmarking/rag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"rate_type": "sweep",
"data": {
"prompt_tokens": 4096,
"prompt_tokens_stdev": 512,
"prompt_tokens_min": 2048,
"prompt_tokens_max": 6144,
"output_tokens": 512,
"output_tokens_stdev": 128,
"output_tokens_min": 1,
"output_tokens_max": 1024
}
}
21 changes: 14 additions & 7 deletions src/automation/tasks/base_task.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
from clearml import Task
from typing import Sequence, Optional
from automation.configs import DEFAULT_OUTPUT_URI
from automation.configs import DEFAULT_OUTPUT_URI, DEFAULT_RESEARCH_BRANCH
from automation.standards import STANDARD_CONFIGS
import yaml
import os

class BaseTask():

base_packages = ["git+https://github.com/neuralmagic/research.git"]
#base_packages = ["git+https://github.com/neuralmagic/research.git"]
#base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"]

def __init__(
self,
project_name: str,
task_name: str,
docker_image: str,
branch: Optional[str] = DEFAULT_RESEARCH_BRANCH,
packages: Optional[Sequence[str]]=None,
task_type: str="training",
):
branch_name = branch or DEFAULT_RESEARCH_BRANCH
base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch_name}"]

if packages is not None:
packages = list(set(packages + self.base_packages))
packages = list(set(packages + base_packages))
else:
packages = self.base_packages
packages = base_packages

print(packages)

self.project_name = project_name
self.task_name = task_name
self.docker_image = docker_image
self.packages = packages
self.task_type = task_type
self.task = None
self.branch= branch
self.script_path = None
self.callable_artifacts = None

Expand All @@ -50,8 +57,8 @@ def process_config(self, config):
return yaml.safe_load(open(STANDARD_CONFIGS[config], "r"))
elif os.path.exists(config):
return yaml.safe_load(open(config, "r"))
elif os.path.exists(os.path.join("..", "standatrds", config)):
return yaml.safe_load(open(os.path.join("..", "standatrds", config)), "r")
elif os.path.exists(os.path.join("..", "standards", config)):
return yaml.safe_load(open(os.path.join("..", "standards", config)), "r")
else:
return yaml.safe_load(config)

Expand Down Expand Up @@ -91,7 +98,7 @@ def create_task(self):
add_task_init_call=True,
script=self.script_path,
repo="https://github.com/neuralmagic/research.git",
branch="main",
branch=self.branch,
)
self.task.output_uri = DEFAULT_OUTPUT_URI
self.set_arguments()
Expand Down
6 changes: 4 additions & 2 deletions src/automation/tasks/guidellm.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from automation.tasks import BaseTask
from automation.configs import DEFAULT_DOCKER_IMAGE
from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
from typing import Optional, Sequence
import os

DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@http_backend"
GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"

class GuideLLMTask(BaseTask):

Expand All @@ -23,6 +23,7 @@ def __init__(
docker_image: str=DEFAULT_DOCKER_IMAGE,
packages: Optional[Sequence[str]]=None,
clearml_model: bool=False,
branch: str= DEFAULT_RESEARCH_BRANCH,
task_type: str="training",
vllm_kwargs: dict={},
target: str="http://localhost:8000/v1",
Expand Down Expand Up @@ -52,6 +53,7 @@ def __init__(
docker_image=docker_image,
packages=packages,
task_type=task_type,
branch = branch,
)

# Check for conflicts in configs and constructor arguments
Expand Down
Loading