From 81b22b3899f1bbf08bbd4894c45e440096715eaf Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:06:13 +0530 Subject: [PATCH] Support MLPer inference rgat reference implementation (#50) --- .../_cm.yaml | 46 +++++++++++++------ .../customize.py | 17 ++++--- script/app-mlperf-inference/_cm.yaml | 35 ++++++++++++++ script/get-cudnn/_cm.yaml | 1 + .../_cm.yaml | 14 ++++-- .../customize.py | 10 ++-- script/get-ml-model-rgat/_cm.yaml | 2 +- script/get-ml-model-rgat/customize.py | 8 ++-- script/get-mlperf-inference-src/_cm.yaml | 12 +++++ script/get-mlperf-inference-src/customize.py | 2 +- script/process-mlperf-accuracy/_cm.yaml | 4 ++ script/process-mlperf-accuracy/customize.py | 10 ++++ script/run-mlperf-inference-app/_cm.yaml | 13 ++++++ .../src/tutorials/test_tutorial_retinanet.py | 2 +- .../src/tutorials/test_tutorial_tvm_pip_ge.py | 2 +- .../src/tutorials/test_tutorial_tvm_pip_vm.py | 2 +- 16 files changed, 141 insertions(+), 39 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 85fddc989e..8fa3df206a 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -482,13 +482,12 @@ deps: ## RGAT - tags: get,ml-model,rgat names: - - ml-model - rgat-model enable_if_env: CM_MODEL: - rgat skip_if_env: - RGAT_CHECKPOINT_PATH: + CM_ML_MODEL_RGAT_CHECKPOINT_PATH: - 'on' ######################################################################## @@ -620,6 +619,9 @@ deps: enable_if_env: CM_MODEL: - rgat + skip_if_env: + CM_DATASET_IGBH_PATH: + - "on" ######################################################################## # Install MLPerf inference dependencies @@ -1224,27 +1226,45 @@ variations: group: models env: CM_MODEL: rgat + adr: + pytorch: + version: 2.1.0 deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - - tags: get,generic-python-lib,_package.torch-geometric - - tags: get,generic-python-lib,_package.torch-scatter - - tags: get,generic-python-lib,_package.torch-sparse + version: 0.7.0 + - tags: get,generic-python-lib,_package.torchvision + version: 0.16.0 - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML + - tags: get,generic-python-lib,_package.numpy + version: 1.26.4 - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - cpu + + rgat,cuda: + deps: - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - gpu - + - tags: get,generic-python-lib,_package.torch-scatter + - tags: get,generic-python-lib,_package.torch-sparse + - tags: get,generic-python-lib,_package.torch-geometric + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + + rgat,cpu: + deps: + - tags: get,generic-python-lib,_package.torch-geometric + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-scatter + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-sparse + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 8cd17e7de5..dcffa5672d 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -115,10 +115,12 @@ def preprocess(i): scenario_extra_options = '' NUM_THREADS = env['CM_NUM_THREADS'] - if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + if int( + NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU - if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + if env['CM_MODEL'] in ['resnet50', 'retinanet', + 'stable-diffusion-xl', 'rgat']: scenario_extra_options += " --threads " + NUM_THREADS ml_model_name = env['CM_MODEL'] @@ -485,15 +487,16 @@ def get_run_cmd_reference( # have to add the condition for running in debug mode or real run mode cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ - " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ - " --device " + device.replace("cuda", "cuda:0") + \ + " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ + " --device " + device.replace("cuda", "gpu") + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ - " --mlperf_conf " + \ - os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": + cmd += " --in-memory " if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index ffb4a26b86..4c368346eb 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -767,6 +767,20 @@ variations: env: CM_MODEL: rgat + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - 3d-unet-accuracy-script + tags: run,accuracy,mlperf,_igbh sdxl: group: @@ -1645,6 +1659,25 @@ variations: CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' CM_MLPERF_INFERENCE_VERSION: '4.1' + r5.0-dev_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r4.1 + tags: _mlcommons + nvidia-inference-server: + version: r4.1 + tags: _mlcommons + intel-harness: + tags: _v4.1 + default_env: + CM_SKIP_SYS_UTILS: 'yes' + CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + + invalid_variation_combinations: - - retinanet @@ -1768,6 +1801,8 @@ docker: - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" + - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" + - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' interactive: True diff --git a/script/get-cudnn/_cm.yaml b/script/get-cudnn/_cm.yaml index b01506f6dc..fa5ccd2c77 100644 --- a/script/get-cudnn/_cm.yaml +++ b/script/get-cudnn/_cm.yaml @@ -19,6 +19,7 @@ default_env: deps: - tags: detect,os +- tags: detect,sudo - names: - cuda skip_if_env: diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index c3e78b4640..4750f3ff51 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -11,7 +11,8 @@ tags: - inference uid: 824e61316c074253 new_env_keys: - - CM_IGBH_DATASET_PATH + - CM_DATASET_IGBH_PATH + - CM_DATASET_IGBH_SIZE input_mapping: out_path: CM_IGBH_DATASET_OUT_PATH deps: @@ -21,6 +22,9 @@ deps: - tags: get,python names: - get-python + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm prehook_deps: #paper - env: @@ -359,13 +363,13 @@ variations: default: true group: dataset-type env: - CM_IGBH_DATASET_TYPE: debug - CM_IGBH_DATASET_SIZE: tiny + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny full: group: dataset-type env: - CM_IGBH_DATASET_TYPE: full - CM_IGBH_DATASET_SIZE: full + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny glt: env: CM_IGBH_GRAPH_COMPRESS: yes diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py index 9d4240209a..a0e6f24a64 100644 --- a/script/get-dataset-mlperf-inference-igbh/customize.py +++ b/script/get-dataset-mlperf-inference-igbh/customize.py @@ -27,18 +27,18 @@ def preprocess(i): x_sep = " && " # download the model - if env['CM_IGBH_DATASET_TYPE'] == "debug": + if env['CM_DATASET_IGBH_TYPE'] == "debug": run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ f" tools/download_igbh_test.py --target-path {download_loc} " # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" env['CM_RUN_CMD'] = run_cmd @@ -49,10 +49,10 @@ def postprocess(i): env = i['env'] - env['CM_IGBH_DATASET_PATH'] = env.get( + env['CM_DATASET_IGBH_PATH'] = env.get( 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) print( - f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}") + f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") return {'return': 0} diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 0bc4b1eab1..644bf688a3 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -12,7 +12,7 @@ input_mapping: to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* -- RGAT_CHECKPOINT_PATH +- CM_ML_MODEL_RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: CM_DOWNLOAD_TOOL: diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index 2fc39c59d2..ac8feaad7a 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -19,12 +19,12 @@ def postprocess(i): env = i['env'] - if env.get('RGAT_CHECKPOINT_PATH', '') == '': - env['RGAT_CHECKPOINT_PATH'] = os.path.join( + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( env['CM_ML_MODEL_PATH'], "RGAT.pt") elif env.get('CM_ML_MODEL_PATH', '') == '': - env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index e19e653787..c5e195a889 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -166,6 +166,18 @@ versions: env: CM_MLPERF_LAST_RELEASE: v3.1 CM_TMP_GIT_CHECKOUT: '' + r4.0: + adr: + inference-git-repo: + tags: _tag.v4.0 + env: + CM_MLPERF_LAST_RELEASE: v4.0 + r4.1: + adr: + inference-git-repo: + tags: _tag.v4.1 + env: + CM_MLPERF_LAST_RELEASE: v4.1 tvm: env: CM_MLPERF_LAST_RELEASE: v3.1 diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py index c9aad1ee14..16669e2d55 100644 --- a/script/get-mlperf-inference-src/customize.py +++ b/script/get-mlperf-inference-src/customize.py @@ -54,7 +54,7 @@ def preprocess(i): env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" if env.get("CM_MLPERF_LAST_RELEASE", '') == '': - env["CM_MLPERF_LAST_RELEASE"] = "v4.1" + env["CM_MLPERF_LAST_RELEASE"] = "v5.0" if 'CM_GIT_DEPTH' not in env: env['CM_GIT_DEPTH'] = '' diff --git a/script/process-mlperf-accuracy/_cm.yaml b/script/process-mlperf-accuracy/_cm.yaml index f6d9acd5e1..59544fd3ab 100644 --- a/script/process-mlperf-accuracy/_cm.yaml +++ b/script/process-mlperf-accuracy/_cm.yaml @@ -261,3 +261,7 @@ variations: env: CM_DATASET: terabyte group: dataset + igbh: + env: + CM_DATASET: igbh + group: dataset diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 381b1cdcd1..f1d8b78747 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -171,6 +171,16 @@ def preprocess(i): " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") + " > '" + out_file + "'" + elif dataset == "igbh": + if env.get('CM_DATASET_IGBH_SIZE', '') == '': + if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', + '') == "full": + env['CM_DATASET_IGBH_SIZE'] = "full" + else: + env['CM_DATASET_IGBH_SIZE'] = "tiny" + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" + else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 5b9d4b1512..05ae0d476a 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -360,6 +360,19 @@ variations: mlperf-inference-nvidia-scratch-space: tags: _version.r4_1 group: benchmark-version + + r5.0-dev: + env: + CM_MLPERF_INFERENCE_VERSION: '5.0-dev' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + group: benchmark-version + adr: + get-mlperf-inference-results-dir: + tags: _version.r5.0-dev + get-mlperf-inference-submission-dir: + tags: _version.r5.0-dev + mlperf-inference-nvidia-scratch-space: + tags: _version.r5.0-dev short: add_deps_recursive: diff --git a/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py b/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py index 0b96f17f5a..bc8d22f783 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py @@ -30,7 +30,7 @@ 'name': 'mlperf'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) diff --git a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py index 692ddeb830..4e17d572d4 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py @@ -18,7 +18,7 @@ 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', diff --git a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py index 5758ad08f2..28bc0132bf 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py @@ -20,7 +20,7 @@ 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'})