Skip to content
This repository has been archived by the owner on Dec 11, 2024. It is now read-only.

Commit

Permalink
Support MLPer inference rgat reference implementation (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Dec 10, 2024
1 parent c8bf646 commit 81b22b3
Show file tree
Hide file tree
Showing 16 changed files with 141 additions and 39 deletions.
46 changes: 33 additions & 13 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,12 @@ deps:
## RGAT
- tags: get,ml-model,rgat
names:
- ml-model
- rgat-model
enable_if_env:
CM_MODEL:
- rgat
skip_if_env:
RGAT_CHECKPOINT_PATH:
CM_ML_MODEL_RGAT_CHECKPOINT_PATH:
- 'on'

########################################################################
Expand Down Expand Up @@ -620,6 +619,9 @@ deps:
enable_if_env:
CM_MODEL:
- rgat
skip_if_env:
CM_DATASET_IGBH_PATH:
- "on"

########################################################################
# Install MLPerf inference dependencies
Expand Down Expand Up @@ -1224,27 +1226,45 @@ variations:
group: models
env:
CM_MODEL: rgat
adr:
pytorch:
version: 2.1.0
deps:
- tags: get,generic-python-lib,_package.colorama
- tags: get,generic-python-lib,_package.tqdm
- tags: get,generic-python-lib,_package.requests
- tags: get,generic-python-lib,_package.torchdata
- tags: get,generic-python-lib,_package.torch-geometric
- tags: get,generic-python-lib,_package.torch-scatter
- tags: get,generic-python-lib,_package.torch-sparse
version: 0.7.0
- tags: get,generic-python-lib,_package.torchvision
version: 0.16.0
- tags: get,generic-python-lib,_package.pybind11
- tags: get,generic-python-lib,_package.PyYAML
- tags: get,generic-python-lib,_package.numpy
version: 1.26.4
- tags: get,generic-python-lib,_package.pydantic
- tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git
- tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html
enable_if_env:
CM_MLPERF_DEVICE:
- cpu

rgat,cuda:
deps:
- tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
enable_if_env:
CM_MLPERF_DEVICE:
- gpu

- tags: get,generic-python-lib,_package.torch-scatter
- tags: get,generic-python-lib,_package.torch-sparse
- tags: get,generic-python-lib,_package.torch-geometric
env:
CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>.html"

rgat,cpu:
deps:
- tags: get,generic-python-lib,_package.torch-geometric
env:
CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
- tags: get,generic-python-lib,_package.torch-scatter
env:
CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
- tags: get,generic-python-lib,_package.torch-sparse
env:
CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
- tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html

# Target devices
cpu:
Expand Down
17 changes: 10 additions & 7 deletions script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,12 @@ def preprocess(i):
scenario_extra_options = ''

NUM_THREADS = env['CM_NUM_THREADS']
if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu":
if int(
NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat":
NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU

if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']:
if env['CM_MODEL'] in ['resnet50', 'retinanet',
'stable-diffusion-xl', 'rgat']:
scenario_extra_options += " --threads " + NUM_THREADS

ml_model_name = env['CM_MODEL']
Expand Down Expand Up @@ -485,15 +487,16 @@ def get_run_cmd_reference(
# have to add the condition for running in debug mode or real run mode
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \
" --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \
" --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \
" --device " + device.replace("cuda", "cuda:0") + \
" --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \
" --device " + device.replace("cuda", "gpu") + \
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
scenario_extra_options + mode_extra_options + \
" --output " + env['CM_MLPERF_OUTPUT_DIR'] + \
' --dtype ' + dtype_rgat + \
" --model-path " + env['RGAT_CHECKPOINT_PATH'] + \
" --mlperf_conf " + \
os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf")
" --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH']

if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes":
cmd += " --in-memory "

if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]:
cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN']
Expand Down
35 changes: 35 additions & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,20 @@ variations:
env:
CM_MODEL:
rgat
posthook_deps:
- enable_if_env:
CM_MLPERF_LOADGEN_MODE:
- accuracy
- all
CM_MLPERF_ACCURACY_RESULTS_DIR:
- 'on'
skip_if_env:
CM_MLPERF_IMPLEMENTATION:
- nvidia
names:
- mlperf-accuracy-script
- 3d-unet-accuracy-script
tags: run,accuracy,mlperf,_igbh

sdxl:
group:
Expand Down Expand Up @@ -1645,6 +1659,25 @@ variations:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'
CM_MLPERF_INFERENCE_VERSION: '4.1'

r5.0-dev_default:
group:
reproducibility
add_deps_recursive:
nvidia-inference-common-code:
version: r4.1
tags: _mlcommons
nvidia-inference-server:
version: r4.1
tags: _mlcommons
intel-harness:
tags: _v4.1
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'


invalid_variation_combinations:
-
- retinanet
Expand Down Expand Up @@ -1768,6 +1801,8 @@ docker:
- "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}"
- "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}"
- "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}"
- "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}"
- "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}"
skip_run_cmd: 'no'
shm_size: '32gb'
interactive: True
Expand Down
1 change: 1 addition & 0 deletions script/get-cudnn/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ default_env:

deps:
- tags: detect,os
- tags: detect,sudo
- names:
- cuda
skip_if_env:
Expand Down
14 changes: 9 additions & 5 deletions script/get-dataset-mlperf-inference-igbh/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ tags:
- inference
uid: 824e61316c074253
new_env_keys:
- CM_IGBH_DATASET_PATH
- CM_DATASET_IGBH_PATH
- CM_DATASET_IGBH_SIZE
input_mapping:
out_path: CM_IGBH_DATASET_OUT_PATH
deps:
Expand All @@ -21,6 +22,9 @@ deps:
- tags: get,python
names:
- get-python
- tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git
- tags: get,generic-python-lib,_package.colorama
- tags: get,generic-python-lib,_package.tqdm
prehook_deps:
#paper
- env:
Expand Down Expand Up @@ -359,13 +363,13 @@ variations:
default: true
group: dataset-type
env:
CM_IGBH_DATASET_TYPE: debug
CM_IGBH_DATASET_SIZE: tiny
CM_DATASET_IGBH_TYPE: debug
CM_DATASET_IGBH_SIZE: tiny
full:
group: dataset-type
env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_SIZE: full
CM_DATASET_IGBH_TYPE: debug
CM_DATASET_IGBH_SIZE: tiny
glt:
env:
CM_IGBH_GRAPH_COMPRESS: yes
Expand Down
10 changes: 5 additions & 5 deletions script/get-dataset-mlperf-inference-igbh/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ def preprocess(i):
x_sep = " && "

# download the model
if env['CM_IGBH_DATASET_TYPE'] == "debug":
if env['CM_DATASET_IGBH_TYPE'] == "debug":
run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \
f" tools/download_igbh_test.py --target-path {download_loc} "

# split seeds
run_cmd += x_sep + \
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}"
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}"

# compress graph(for glt implementation)
if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes":
run_cmd += x_sep + \
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}"
f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}"

env['CM_RUN_CMD'] = run_cmd

Expand All @@ -49,10 +49,10 @@ def postprocess(i):

env = i['env']

env['CM_IGBH_DATASET_PATH'] = env.get(
env['CM_DATASET_IGBH_PATH'] = env.get(
'CM_IGBH_DATASET_OUT_PATH', os.getcwd())

print(
f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}")
f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}")

return {'return': 0}
2 changes: 1 addition & 1 deletion script/get-ml-model-rgat/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ input_mapping:
to: CM_DOWNLOAD_PATH
new_env_keys:
- CM_ML_MODEL_*
- RGAT_CHECKPOINT_PATH
- CM_ML_MODEL_RGAT_CHECKPOINT_PATH
prehook_deps:
- enable_if_env:
CM_DOWNLOAD_TOOL:
Expand Down
8 changes: 4 additions & 4 deletions script/get-ml-model-rgat/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def postprocess(i):

env = i['env']

if env.get('RGAT_CHECKPOINT_PATH', '') == '':
env['RGAT_CHECKPOINT_PATH'] = os.path.join(
if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '':
env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join(
env['CM_ML_MODEL_PATH'], "RGAT.pt")
elif env.get('CM_ML_MODEL_PATH', '') == '':
env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH']
env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH']

env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH']
env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH']

return {'return': 0}
12 changes: 12 additions & 0 deletions script/get-mlperf-inference-src/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,18 @@ versions:
env:
CM_MLPERF_LAST_RELEASE: v3.1
CM_TMP_GIT_CHECKOUT: ''
r4.0:
adr:
inference-git-repo:
tags: _tag.v4.0
env:
CM_MLPERF_LAST_RELEASE: v4.0
r4.1:
adr:
inference-git-repo:
tags: _tag.v4.1
env:
CM_MLPERF_LAST_RELEASE: v4.1
tvm:
env:
CM_MLPERF_LAST_RELEASE: v3.1
Expand Down
2 changes: 1 addition & 1 deletion script/get-mlperf-inference-src/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def preprocess(i):
env["CM_GIT_URL"] = "https://github.com/mlcommons/inference"

if env.get("CM_MLPERF_LAST_RELEASE", '') == '':
env["CM_MLPERF_LAST_RELEASE"] = "v4.1"
env["CM_MLPERF_LAST_RELEASE"] = "v5.0"

if 'CM_GIT_DEPTH' not in env:
env['CM_GIT_DEPTH'] = ''
Expand Down
4 changes: 4 additions & 0 deletions script/process-mlperf-accuracy/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,7 @@ variations:
env:
CM_DATASET: terabyte
group: dataset
igbh:
env:
CM_DATASET: igbh
group: dataset
10 changes: 10 additions & 0 deletions script/process-mlperf-accuracy/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,16 @@ def preprocess(i):
" --dtype " + env.get('CM_ACCURACY_DTYPE',
"float32") + " > '" + out_file + "'"

elif dataset == "igbh":
if env.get('CM_DATASET_IGBH_SIZE', '') == '':
if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE',
'') == "full":
env['CM_DATASET_IGBH_SIZE'] = "full"
else:
env['CM_DATASET_IGBH_SIZE'] = "tiny"
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join(
result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'"

else:
return {'return': 1, 'error': 'Unsupported dataset'}

Expand Down
13 changes: 13 additions & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,19 @@ variations:
mlperf-inference-nvidia-scratch-space:
tags: _version.r4_1
group: benchmark-version

r5.0-dev:
env:
CM_MLPERF_INFERENCE_VERSION: '5.0-dev'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default
group: benchmark-version
adr:
get-mlperf-inference-results-dir:
tags: _version.r5.0-dev
get-mlperf-inference-submission-dir:
tags: _version.r5.0-dev
mlperf-inference-nvidia-scratch-space:
tags: _version.r5.0-dev

short:
add_deps_recursive:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
'name': 'mlperf'})
checks.check_return(r)

r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr':
r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr':
{'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community',
'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline',
'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'})
checks.check_return(r)

r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr':
r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr':
{'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {
'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}},
'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'})
checks.check_return(r)

r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr':
r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr':
{'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community',
'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline',
'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'})
Expand Down

0 comments on commit 81b22b3

Please sign in to comment.