From 20d73cf37df14387ed23e2742fd7361106a15830 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 22 Jul 2024 21:40:29 +0200 Subject: [PATCH 1/2] remove tmp files --- cm-repro/cm-run-script-input.json | 41 ------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 cm-repro/cm-run-script-input.json diff --git a/cm-repro/cm-run-script-input.json b/cm-repro/cm-run-script-input.json deleted file mode 100644 index 9fe20fd5e8..0000000000 --- a/cm-repro/cm-run-script-input.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "action": "run", - "automation": "script", - "tags": "run-mlperf,inference,_find-performance,_full", - "model": "mixtral-8x7b-99", - "implementation": "reference", - "framework": "pytorch", - "category": "edge", - "scenario": "Offline", - "execution_mode": "test", - "device": "cuda", - "test_query_count": "100", - "adr": { - "cuda": { - "version": "12.4.1" - } - }, - "quiet": true, - "repro": true, - "cmd": [ - "--tags=run-mlperf,inference,_find-performance,_full", - "--model=mixtral-8x7b-99", - "--implementation=reference", - "--framework=pytorch", - "--category=edge", - "--scenario=Offline", - "--execution_mode=test", - "--device=cuda", - "--test_query_count=100", - "--adr.cuda.version=12.4.1", - "--quiet", - "--repro" - ], - "out": "con", - "parsed_automation": [ - [ - "script", - "5b4e0237da074764" - ] - ] -} \ No newline at end of file From fb71007baece233785289f5e38839b198112dbee Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 22 Jul 2024 22:29:35 +0200 Subject: [PATCH 2/2] update automation, reproducibility and optimization challenges --- README.md | 92 ++++- .../README.md | 32 ++ .../_cm.json | 22 ++ .../README.md | 4 + .../_cm.yaml | 21 ++ .../README.md | 10 + .../_cm.yaml | 25 ++ .../README.md | 30 ++ .../_cm.json | 22 ++ .../connect-mlperf-with-medperf/README.md | 23 ++ .../connect-mlperf-with-medperf/_cm.json | 26 ++ .../README.md | 16 + .../_cm.json | 20 ++ .../README.md | 7 + .../_cm.json | 19 ++ .../README.md | 18 + .../_cm.json | 27 ++ .../README.md | 74 +++++ .../_cm.json | 27 ++ ...wd-benchmark-mlperf-bert-inference-cuda.md | 281 ++++++++++++++++ .../docs/generate-3d-unet-submission.md | 59 ++++ .../docs/generate-bert-submission.md | 80 +++++ .../docs/generate-resnet50-submission.md | 82 +++++ .../docs/generate-retinanet-submission.md | 67 ++++ .../docs/generate-rnnt-submission.md | 53 +++ .../docs/setup-aws-instance.md | 48 +++ .../docs/setup-gcp-instance.md | 35 ++ .../docs/setup-nvidia-jetson-orin.md | 53 +++ .../README.md | 83 +++++ .../_cm.json | 26 ++ .../docs/generate-3d-unet-submission.md | 67 ++++ .../docs/generate-bert-submission.md | 113 +++++++ .../docs/generate-resnet50-submission.md | 90 +++++ .../docs/generate-retinanet-submission.md | 75 +++++ .../docs/generate-rnnt-submission.md | 61 ++++ .../docs/setup-aws-instance.md | 50 +++ .../docs/setup-gcp-instance.md | 37 +++ .../docs/setup-nvidia-jetson-orin.md | 54 +++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 20 ++ .../_cm.json | 26 ++ .../README.md | 31 ++ .../_cm.json | 28 ++ .../README.md | 32 ++ .../_cm.json | 27 ++ .../README.md | 52 +++ .../_cm.json | 27 ++ .../README.md | 31 ++ .../_cm.json | 26 ++ .../README.md | 34 ++ .../_cm.json | 28 ++ .../README.md | 33 ++ .../_cm.json | 26 ++ .../README.md | 41 +++ .../_cm.json | 28 ++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 36 ++ .../_cm.json | 28 ++ .../repro-mlperf-inf-v3.0-orin/README.md | 16 + challenge/repro-mlperf-inf-v3.0-orin/_cm.json | 23 ++ .../README.md | 39 +++ .../_cm.json | 20 ++ .../README.md | 3 + .../repro-mlperf-inference-v4.0-2024/_cm.yaml | 25 ++ .../README.md | 4 + .../repro-mlperf-inference-v4.1-2024/_cm.yaml | 22 ++ .../README.md | 36 ++ .../_cm.json | 23 ++ .../README.md | 17 + .../_cm.json | 23 ++ challenge/run-mlperf@home-v3.1-cpu/README.md | 67 ++++ challenge/run-mlperf@home-v3.1-cpu/_cm.json | 21 ++ .../run-cpu-bert-99-deepsparse.md | 100 ++++++ ...cpu-dse-mobilenets-efficientnets-tflite.md | 77 +++++ challenge/run-mlperf@home-v3.1-gpu/README.md | 65 ++++ challenge/run-mlperf@home-v3.1-gpu/_cm.json | 20 ++ ...idia-gpu-bert-99-nvidia-docker-tensorrt.md | 193 +++++++++++ .../run-nvidia-gpu-gpt-j-6b-ref-pytorch.md | 314 ++++++++++++++++++ .../train-llm-for-cm-mlperf-2023/README.md | 20 ++ .../train-llm-for-cm-mlperf-2023/_cm.json | 21 ++ .../README.md | 10 + .../_cm.json | 7 + ...wd-benchmark-mlperf-bert-inference-cuda.md | 285 ++++++++++++++++ .../docs/generate-bert-submission.md | 87 +++++ .../docs/generate-resnet50-submission.md | 74 +++++ .../docs/run-nvidia-implementation.md | 47 +++ .../docs/setup-aws-graviton.md | 25 ++ .../get-mlperf-inference-repos.cmd | 3 + .../README.md | 93 ++++++ .../_cm.json | 16 + .../_cm.json | 17 + .../_cm.json | 17 + .../_cm.json | 17 + .../README.md | 42 +++ .../_cm.yaml | 45 +++ .../customize.py | 0 .../install_deps.sh | 15 + .../install_deps_cuda.sh | 15 + .../install_deps_driver.sh | 15 + .../install_deps_pytorch.sh | 15 + .../install_deps_transformers.sh | 17 + .../run.sh | 21 ++ .../run_figure11.sh | 20 ++ .../run_figure12.sh | 18 + .../run_figure13.sh | 19 ++ .../README.md | 61 ++++ .../_cm.yaml | 40 +++ .../customize.py | 22 ++ .../install_deps.sh | 28 ++ .../install_spec_deps.sh | 37 +++ .../plot.sh | 21 ++ .../run.sh | 23 ++ .../run_spec.sh | 23 ++ .../README.md | 74 +++++ .../_cm.yaml | 48 +++ .../customize.py | 22 ++ .../install_deps.sh | 15 + .../install_deps_gem5.sh | 12 + .../install_deps_kernel.sh | 12 + .../plot.sh | 28 ++ .../run.sh | 16 + .../README.md | 50 +++ .../_cm.yaml | 36 ++ .../install_deps.bat | 18 + .../install_deps.sh | 12 + .../plot.bat | 12 + .../plot.sh | 83 +++++ .../run.bat | 12 + .../run.sh | 49 +++ .../README.md | 30 ++ .../_cm.yaml | 20 ++ .../customize.py | 22 ++ .../install_deps.bat | 4 + .../install_deps.sh | 24 ++ .../main.py | 0 .../run.bat | 4 + .../run.sh | 41 +++ .../.gitignore | 1 + .../README.md | 74 +++++ .../_cm.yaml | 40 +++ .../check.sh | 15 + .../customize.py | 22 ++ .../install_deps.bat | 18 + .../install_deps.sh | 30 ++ .../main.py | 10 + .../plot.bat | 12 + .../plot.sh | 15 + .../plot_pregenerated.sh | 15 + .../run.bat | 12 + .../run.sh | 14 + .../Dockerfile | 28 ++ .../README.md | 40 +++ .../_cm.yaml | 30 ++ .../customize.py | 22 ++ .../install_deps.sh | 49 +++ .../plot.sh | 60 ++++ .../run.sh | 54 +++ .../Preliminary_build_onikiri.sh | 15 + .../Preliminary_create_binary.sh | 19 ++ .../Preliminary_experiment.sh | 30 ++ .../Preliminary_experiment_setup.sh | 13 + .../Preliminary_plot.sh | 15 + .../README.md | 49 +++ .../_cm.yaml | 55 +++ .../build_compiler.sh | 32 ++ .../build_onikiri.sh | 14 + .../create_binary.sh | 24 ++ .../experiment.sh | 14 + .../experiment_setup.sh | 16 + .../install_deps.sh | 4 + .../plot.sh | 34 ++ .../README.md} | 19 +- .../_cm.yaml | 9 +- .../customize.py | 22 ++ .../install_deps.sh | 0 .../main.py | 10 + .../plot.sh | 0 .../run.sh | 0 .../README.md | 1 - .../reproduce-micro-paper-2023-xyz/README.md | 178 ---------- 182 files changed, 6518 insertions(+), 203 deletions(-) create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/README.md create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/_cm.json create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json create mode 100644 challenge/connect-mlperf-with-medperf/README.md create mode 100644 challenge/connect-mlperf-with-medperf/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2023/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2024/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2024/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/README.md create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/README.md create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/_cm.json create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/README.md create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/README.md create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/README.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/_cm.json create mode 100644 project/mlperf-inference-v3.0-submissions/README.md create mode 100644 project/mlperf-inference-v3.0-submissions/_cm.json create mode 100644 project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md create mode 100644 project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/README.md create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json create mode 100644 report/mlperf-inference-v4.0-press-release-ctuning/_cm.json create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-22}/customize.py (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/plot.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-5}/main.py (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/.gitignore create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/check.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/main.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/plot.sh rename script/{reproduce-micro-paper-2023-victima/README-extra.md => reproduce-ieee-acm-micro2023-paper-96/README.md} (64%) rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/_cm.yaml (84%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-96/customize.py rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/install_deps.sh (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-96/main.py rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/plot.sh (100%) rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/run.sh (100%) delete mode 100644 script/reproduce-micro-paper-2023-victima/README.md delete mode 100644 script/reproduce-micro-paper-2023-xyz/README.md diff --git a/README.md b/README.md index 02656175e9..d3dc40d94e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml) [![MLPerf inference MLCommons C++ ResNet50](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml) -This repository contains reusable and cross-platform automation recipes to run DevOps, MLOps, AIOps and MLPerf +This repository contains reusable and cross-platform automation recipes to run DevOps, MLOps, and MLPerf via a simple and human-readable [Collective Mind interface (CM)](https://github.com/mlcommons/ck) while adapting to different operating systems, software and hardware. @@ -19,31 +19,66 @@ and unified input/output to make them reusable in different projects either indi or by chaining them together into portable automation workflows, applications and web services adaptable to continuously changing models, data sets, software and hardware. -### Citing this project +We develop and test [CM scripts](script) as a community effort to support the following projects: +* [CM for MLPerf](https://docs.mlcommons.org/inference): modularize and automate MLPerf benchmarks + (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org), [OctoML](https://octoml.ai) and [cTuning.org](https://cTuning.org)) +* [CM for research and education](https://cTuning.org/ae): provide a common interface to automate and reproduce results from research papers + and MLPerf benchmarks (maintained by [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org)) +* [CM for ABTF](https://github.com/mlcommons/cm4abtf): provide a unified CM interface to run automotive benchmarks + (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org)) +* [CM for optimization](https://access.cknowledge.org/playground/?action=challenges): co-design efficient and cost-effective + software and hardware for AI, ML and other emerging workloads via open challenges + (maintained by [cKnowledge.org](https://cKnowledge.org)) -Please use this [BibTeX file](https://github.com/mlcommons/ck/blob/master/citation.bib). +You can read this [ArXiv paper](https://arxiv.org/abs/2406.16791) to learn more about the CM motivation and long-term vision. + +Please provide your feedback or submit your issues [here](https://github.com/mlcommons/cm4mlops/issues). ## Catalog Online catalog: [cKnowledge](https://access.cknowledge.org/playground/?action=scripts), [MLCommons](https://docs.mlcommons.org/cm4mlops/scripts). -## Examples +## Citation -### Run image classificaiton via CM +Please use this [BibTeX file](https://github.com/mlcommons/ck/blob/master/citation.bib) to cite this project. -```bash -pip install cmind -U +## A few demos -cm pull repo mlcommons@cm4mlops --branch=dev +### Install CM and virtual env + +Install the [MLCommons CM automation language](https://access.cknowledge.org/playground/?action=install). -cmr "python app image-classification onnx" --quiet +### Pull this repository + +```bash +cm pull repo mlcommons@cm4mlops --branch=dev ``` -### Run MLPerf inference benchmark via CM +### Run image classification using CM ```bash -pip install cm4mlperf -U +cm run script "python app image-classification onnx _cpu" --help + +cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e +cm run script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg +cmr --tags=python,app,image-classification,onnx,_cpu --input=computer_mouse.jpg +cmr 3d5e908e472b417e --input=computer_mouse.jpg + +cm docker script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cm gui script "python app image-classification onnx _cpu" +``` + +### Re-run experiments from the ACM/IEEE MICRO'23 paper + +Check this [script/reproduce-ieee-acm-micro2023-paper-96](README.md). + +### Run MLPerf ResNet CPU inference benchmark via CM + +```bash cm run script --tags=run-mlperf,inference,_performance-only,_short \ --division=open \ --category=edge \ @@ -62,6 +97,38 @@ cm run script --tags=run-mlperf,inference,_performance-only,_short \ --time ``` +### Run MLPerf BERT CUDA inference benchmark v4.1 via CM + +```bash +cmr "run-mlperf inference _find-performance _full _r4.1" \ + --model=bert-99 \ + --implementation=nvidia \ + --framework=tensorrt \ + --category=datacenter \ + --scenario=Offline \ + --execution_mode=test \ + --device=cuda \ + --docker \ + --docker_cm_repo=mlcommons@cm4mlops \ + --docker_cm_repo_flags="--branch=mlperf-inference" \ + --test_query_count=100 \ + --quiet +``` + +### Run MLPerf SDXL reference inference benchmark v4.1 via CM + +```bash +cm run script \ + --tags=run-mlperf,inference,_r4.1 \ + --model=sdxl \ + --implementation=reference \ + --framework=pytorch \ + --category=datacenter \ + --scenario=Offline \ + --execution_mode=valid \ + --device=cuda \ + --quiet +``` ## License @@ -72,6 +139,5 @@ cm run script --tags=run-mlperf,inference,_performance-only,_short \ We thank [cKnowledge.org](https://cKnowledge.org), [cTuning foundation](https://cTuning.org) and [MLCommons](https://mlcommons.org) for sponsoring this project! - -We also thank all [volunteers, collaborators and contributors](CONTRIBUTING.md) +We also thank all [volunteers, collaborators and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for their support, fruitful discussions, and useful feedback! diff --git a/challenge/add-derived-metrics-to-mlperf-inference/README.md b/challenge/add-derived-metrics-to-mlperf-inference/README.md new file mode 100644 index 0000000000..8302f63d68 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/README.md @@ -0,0 +1,32 @@ +### Challenge + +Check past MLPerf inference results in [this MLCommons repository](https://github.com/mlcommons/cm4mlperf-results) +and add derived metrics such as result/No of cores, power efficiency, device cost, operational costs, etc. + +Add clock speed as a third dimension to graphs and improve Bar graph visualization. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/add-derived-metrics-to-mlperf-inference/_cm.json b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json new file mode 100644 index 0000000000..cbdc212467 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "add-derived-metrics-to-mlperf-inference", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240204", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "derived-metrics", + "mlperf-inference", + "mlperf-inference-derived-metrics" + ], + "title": "Add derived metrics to MLPerf inference benchmarks (power efficiency, results / No of cores, costs, etc)", + "trophies": true, + "uid": "c65b56d7770946ee" +} diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md new file mode 100644 index 0000000000..a2059c0fe8 --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md @@ -0,0 +1,4 @@ +20240220: +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) +* On-going efforts: https://github.com/mlcommons/ck/issues/1052 diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b8b519d27f --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml @@ -0,0 +1,21 @@ +alias: automate-mlperf-inference-v3.1-and-v4.0-2024 +uid: f89f152fc2614240 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Add MLCommons CM workflows and unifed interface to automate MLPerf inference v3.1 and v4.0 benchmarks (Intel, Nvidia, Qualcomm, Arm64, TPU ...) + +date_open: '20231215' +date_close: '20240315' + +hot: true + +tags: +- automate +- mlperf-inference-v3.1-and-v4.0 +- 2024 + +experiments: +- tags: mlperf-inference,v3.1 +- tags: mlperf-inference,v4.0 diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md new file mode 100644 index 0000000000..adfbea7263 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md @@ -0,0 +1,10 @@ +This challenge is under preparation. You can read about the motivation behind this challenge in our [invited talk at MLPerf-Bench @ HPCA'24](https://doi.org/10.5281/zenodo.10786893). + +We plan to extend [MLCommons CM framework](https://github.com/mlcommons/ck) +to automatically compose high-performance and cost-efficient AI systems +based on MLPerf inference v4.0 results and [CM automation recipes](https://access.cknowledge.org/playground/?action=scripts). + +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) + +Contact the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) for more details. diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b1d4fe9f18 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024 +uid: 7c983102d89e4869 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: "Compose high-performance and cost-efficint AI systems using MLCommons' Collective Mind and MLPerf inference" + +date_open: '20240101' + +tags: +- compose +- ai +- systems +- mlperf-inference-v4.0 +- cm +- mlcommons-cm +- mlperf +- v4.0 +- performance +- energy +- cost + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md new file mode 100644 index 0000000000..306341271c --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md @@ -0,0 +1,30 @@ +### Challenge + +Connect CM workflows to run MLPerf inference benchmarks with [OpenBenchmarking.org](https://openbenchmarking.org). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + + +### Organizers + +* Michael Larabel +* Grigori Fursin +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +Results will be available at [OpenBenchmark.org](https://openbenchmarking.org) +and [MLCommons CK playgronud](https://access.cknowledge.org/playground/?action=experiments). diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json new file mode 100644 index 0000000000..c1e65aadbd --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "connect-mlperf-inference-v3.1-with-openbenchmarking", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20240101", + "date_close_extension": true, + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "openbenchmarking", + "mlperf-inference", + "mlperf-inference-openbenchmarking" + ], + "title": "Run MLPerf inference benchmarks using CM via OpenBenchmarking.org", + "trophies": true, + "uid": "534592626eb44efe" +} diff --git a/challenge/connect-mlperf-with-medperf/README.md b/challenge/connect-mlperf-with-medperf/README.md new file mode 100644 index 0000000000..f2f572bd48 --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/README.md @@ -0,0 +1,23 @@ +### Challenge + +Evaluate models from [MLCommons MedPerf platform](https://www.medperf.org) in terms of latency, throughput, power consumption and other metrics +using MLPerf loadgen and MLCommons CM automation language. + +See the [Nature 2023 article about MedPerf](https://www.nature.com/articles/s42256-023-00652-2) +and [ACM REP'23 keynote about CM](https://doi.org/10.5281/zenodo.8105339) to learn more about these projects. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) diff --git a/challenge/connect-mlperf-with-medperf/_cm.json b/challenge/connect-mlperf-with-medperf/_cm.json new file mode 100644 index 0000000000..d48d0a9fea --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "connect-mlperf-with-medperf", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240105", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "medperf", + "mlperf-inference", + "mlperf-inference-medperf", + "mlperf-inference-medperf", + "mlperf-inference-medperf-v3.1", + "mlperf-inference-medperf-v3.1-2023", + "v3.1" + ], + "title": "Connect MedPerf with MLPerf and CM", + "trophies": true, + "uid": "c26d1fbf89164728" +} diff --git a/challenge/optimize-mlperf-inference-scc2023/README.md b/challenge/optimize-mlperf-inference-scc2023/README.md new file mode 100644 index 0000000000..62a4826ad2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/README.md @@ -0,0 +1,16 @@ +### CM tutorial + +https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md + +### Challenge + +Reproduce and optimize MLPerf inference benchmarks during Student Cluster Competition at SuperComputing'23. + +See our [related challange from 2022]()https://access.cknowledge.org/playground/?action=challenges&name=repro-mlperf-inference-retinanet-scc2022). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge.org](https://cKnowledge.org) + diff --git a/challenge/optimize-mlperf-inference-scc2023/_cm.json b/challenge/optimize-mlperf-inference-scc2023/_cm.json new file mode 100644 index 0000000000..021872b15a --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "optimize-mlperf-inference-scc2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20231115", + "date_open": "20230915", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2023" + ], + "title": "Reproduce and optimize MLPerf inference v3.1 benchmarks at the Student Cluster Competition'23 at SuperComputing'23 using CM", + "uid": "ddaf594f84b14bc2" +} diff --git a/challenge/optimize-mlperf-inference-scc2024/README.md b/challenge/optimize-mlperf-inference-scc2024/README.md new file mode 100644 index 0000000000..1f9be23af2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/README.md @@ -0,0 +1,7 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to run MLPerf inference benchmark at the Student Cluster Competition'24. + +See [the CM-MLPerf tutorial for SCC'23](https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md). +Note that the MLPerf model will change in SCC'24 - please stay tuned for more details! + +See https://sc24.supercomputing.org/students/student-cluster-competition for more details about SCC. diff --git a/challenge/optimize-mlperf-inference-scc2024/_cm.json b/challenge/optimize-mlperf-inference-scc2024/_cm.json new file mode 100644 index 0000000000..ab75aa27a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/_cm.json @@ -0,0 +1,19 @@ +{ + "alias": "optimize-mlperf-inference-scc2024", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20241001", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2024" + ], + "title": "Run and optimize the MLPerf inference benchmark using CM at the Student Cluster Competition'24 at SuperComputing'24", + "uid": "f7fcba4c43ab4412" +} diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/README.md b/challenge/optimize-mlperf-inference-v2.1-2022/README.md new file mode 100644 index 0000000000..d0ac7cf15b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/README.md @@ -0,0 +1,18 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf inference v2.1 benchmarks across diverse implementations, software and hardware +using the [MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results are available [here](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v2.1). diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json new file mode 100644 index 0000000000..31cb5dffd2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v2.1-2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20220901", + "date_open": "20220701", + "experiments": [ + { + "tags": "mlperf-inference,v2.1" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v2.1", + "mlperf-inference-v2.1-2022", + "v2.1" + ], + "title": "Run and optimize MLPerf inference v2.1 benchmarks", + "uid": "2e13154b7fbb412d" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/README.md b/challenge/optimize-mlperf-inference-v3.0-2023/README.md new file mode 100644 index 0000000000..da6decc8c7 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/README.md @@ -0,0 +1,74 @@ +### Challenge + +Run MLPerf inference v3.0 benchmarks out-of-the-box across diverse implementations, software and hardware +using the [MLCommons CM automation language](https://github.com/mlcommons/ck) +and submit public results to the MLPerf inference v3.0 via [cTuning foundation](https://cTuning.org). + +* [GUI to run MLPerf inference benchmarks](https://cknowledge.org/mlperf-inference-gui) +* [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +This challenge has been successfully completed. + +### Results + +Official results: +* https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/cTuning +* https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning + +Results in the MLCommons CK/CM format: +* https://github.com/mlcommons/cm4mlperf-results + +Visualization and comparison with derived metrics: +* [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0). + +### The outcome + +We are very pleased to announce the successful outcome of the 1st +community challenge to run, reproduce and optimize MLPerf inference v3.0 +benchmarks: our MLCommons CK/CM workflow automation framework has helped +to prepare more than 80% of all submission results including 98% of power +results with very diverse technology and benchmark implementations from +Neural Magic, Qualcomm, cKnowledge Ltd, KRAI, cTuning foundation, Dell +Technologies, Hewlett Packard Enterprise, Lenovo, Hugging Face, NVIDIA, +Intel Corporation, AMD and Apple across diverse CPUs, GPUs and DSPs with +PyTorch, ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud +providers (GCP, AWS, Azure) and individual servers and edge devices +provided by our [volunteers](https://access.cknowledge.org/playground/?action=contributors). + +You can now see and compare all MLPerf inference results v3.0, v2.1 and +v2.0 online together with reproducibility reports including the +[MLPerf BERT model](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) +from the [Hugging Face Zoo](https://www.linkedin.com/company/huggingface/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D) +on [Nvidia Jetson Orin platform](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md#reproducing-the-nvidia-jetson-agx-orin-submission). +You can even create your own derived metrics (such as performance per Watt), +provide your own constraints using this [MLCommons repository](https://github.com/mlcommons/cm_inference_results) and visualize +them as shown in [this example](https://access.cknowledge.org/playground/?action=experiments&name=e472410ee67c41f9&x=Result&y=Power_Efficiency&filter=result[%27Result_Power%27]%3C35&derived_metrics=result%5B%27Power_Efficiency%27%5D%3D1000%2Fresult%5B%27Result_Power%27%5D&c=accelerator_model_name&axis_key_s=version). + +Additional thanks to [Michael Goin](https://www.linkedin.com/in/michael-goin) +from [Neural Magic](https://www.linkedin.com/company/neural-magic/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), our international +students including [Himanshu Dutta](https://www.linkedin.com/in/ACoAACpPCiMB7zUNStsqBmaOCtd100a7wXBGu_M?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +[Aditya Kumar Shaw](https://www.linkedin.com/in/ACoAACJ3ikUBjuHqi35ibm8CG6IEYv-v_VsobIs?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +Sachin Mudaliyar, [Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +and all [CK/CM users and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for helping us to +validate, use and improve this open-source technology to automate +benchmarking and optimization of AI/ML systems in terms of performance, +accuracy, power and costs! We are also grateful to [HiPEAC](https://www.linkedin.com/company/hipeac) +and [OctoML](https://www.linkedin.com/company/octoml) for +sponsoring initial development and Peter Mattson, David Kanter, Vijay +Janapa Reddi and Alexandros Karargyris for fruitful discussions. + + +### Dissemination + +* [Forbes article](https://www.forbes.com/sites/karlfreund/2023/04/05/nvidia-performance-trounces-all-competitors-who-have-the-guts-to-submit-to-mlperf-inference-30/?sh=3c38d2866676) +* [ZDNet article](https://www.zdnet.com/article/nvidia-dell-qualcomm-speed-up-ai-results-in-latest-benchmark-tests) +* [LinkedIn article from Grigori Fursin (MLCommons Task Force co-chair)]( https://www.linkedin.com/pulse/announcing-my-new-project-reproducible-optimization-co-design-fursin ) +* [Linkedin article from Arjun Suresh (MLCommons Task Force co-chair)](https://www.linkedin.com/posts/arjunsuresh_nvidia-performance-trounces-all-competitors-activity-7049500972275929088-nnnx?utm_source=share&utm_medium=member_desktop) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json new file mode 100644 index 0000000000..0baf3cfeea --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230301", + "date_open": "20230201", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.0", + "mlperf-inference-v3.0-2023", + "v3.0" + ], + "title": "Run and optimize MLPerf inference v3.0 benchmarks", + "uid": "57cbc3384d7640f9" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md new file mode 100644 index 0000000000..f6a17979ca --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md @@ -0,0 +1,281 @@ +# Crowd-benchmarking MLPerf BERT inference + +
+Click here to see the table of contents. + +* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) +* [System preparation](#system-preparation) + * [Minimal system requirements](#minimal-system-requirements) + * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) + * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) + * [Detect or install CUDA](#detect-or-install-cuda) + * [Test CUDA installation](#test-cuda-installation) + * [Install Python virtual environment](#install-python-virtual-environment) + * [Detect or install cuDNN](#detect-or-install-cudnn) + * [Detect or install TensorRT](#detect-or-install-tensorrt) + * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) + * [Try ONNX runtime backend](#try-onnx-runtime-backend) + * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) + * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) + * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) + * [Populate the README files](#populate-the-readme-files) + * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) + * [Push the results to GitHub repo](#push-the-results-to-github-repo) + * [Try PyTorch backend](#try-pytorch-backend) + * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) +* [The next steps](#the-next-steps) + +
+ + +This is a pilot community project to collaboratively run MLPerf BERT inference benchmark +across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). +However, instead of searching for extraterrestrial intelligence, we are +searching for optimal software/hardware combination to run various AI and ML workloads +in terms of performance, accuracy, power and costs ... + +This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment +across continuously evolving software, hardware, models and data. + +*If you submit your results before 1pm PST on Friday 3rd, 2023, + they will be accepted for the official MLPerf inference v3.0 submission round + and your name acknowledged in the notes!* + + +# System preparation + +## Minimal system requirements + +* CPU: any x86-64 or Arm64 based machine +* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ +* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 +* Disk space: ~10GB +* Python: 3.8+ +* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) + +## Install CM (CK2) automation meta-framework + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) +(the 2nd generation on the Collective Mind framework) on your system. + +## Pull CM repository with portable automation recipes + +Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +supporting portable MLOps and DevOps: + +```bash +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. + +We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) +required to run a given software project such as the MLPerf inference benchmark. + +These CM scripts simply wrap existing native scripts and tools as simple micro-services +with a human-readable CLI and simple Python API to be able to easily connect them together +and run on any platform in a unified way. + +## Detect or install CUDA + +Run the following CM script: +```bash +cm run script "get cuda" --out=json +``` + +If CUDA is automatically detected, it will be registered in the CM cache: +```bash +cm show cache --tags=get,cuda +``` + +Otherwise, this script will attempt to download and install the latest CUDA +from Nvidia website. + +Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). + +### Test CUDA installation + +You can test if CUDA toolkit and driver was detected or installed successfully using the following command: +```bash +cm run script "get cuda-devices" +``` + +You should see similar output: +```txt +Checking compiler version ... + +nvcc: NVIDIA (R) Cuda compiler driver +Copyright (c) 2005-2022 NVIDIA Corporation +Built on Wed_Sep_21_10:33:58_PDT_2022 +Cuda compilation tools, release 11.8, V11.8.89 +Build cuda_11.8.r11.8/compiler.31833905_0 + +Compiling program ... + +Running program ... + + - Running postprocess ... +GPU Device ID: 0 +GPU Name: Tesla K80 +GPU compute capability: 3.7 +CUDA driver version: 11.4 +CUDA runtime version: 11.8 +Global memory: 11997020160 +Max clock rate: 823.500000 MHz +Total amount of shared memory per block: 49152 +Total number of registers available per block: 65536 +Warp size: 32 +Maximum number of threads per multiprocessor: 2048 +Maximum number of threads per block: 1024 +Max dimension size of a thread block X: 1024 +Max dimension size of a thread block Y: 1024 +Max dimension size of a thread block Z: 64 +Max dimension size of a grid size X: 2147483647 +Max dimension size of a grid size Y: 65535 +Max dimension size of a grid size Z: 65535 + + - running time of script "get,cuda-devices": 4.16 sec. + +``` + +## Install Python virtual environment + +```bash +cm run script "get sys-utils-cm" --quiet + +cm run script "install python-venv" --name=mlperf-cuda +``` + +If you want to install specific version of Python use the following command: +```bash +cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda +``` + +## Detect or install cuDNN + +```bash +cm run script "get cudnn" +``` + +If cuDNN is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script +to install it as follows: +```bash +cm run script "get cudnn" --tar_file= +``` + +We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. + +## Detect or install TensorRT + +```bash +cm run script "get tensorrt" +``` +If TensorRT is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script +to install it as follows: +```bash +cm run script "get tensorrt" --tar_file= +``` + +We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. + + +## Run MLPerf inference benchmark with BERT + +### Try ONNX runtime backend + +#### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ + --device=cuda --backend=onnxruntime --quiet +``` + +#### Do a full accuracy run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Do a full performance run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Populate the README files + +```bash +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Generate MLPerf submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. +You can use `--hw_note_extra` option to add your name to the notes. + +```bash +cm run script --tags=generate,inference,submission \ + --results_dir=$HOME/inference_3.0_results/valid_results \ + --adr.python.name=mlperf-cuda \ + --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ + --run-checker --submitter=cTuning --adr.inference-src.version=master + --hw_notes_extra="Result taken by " --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). +Then run the following command after replacing `--repo_url` with your fork URL. + +```bash +cm run script --tags=push,github,mlperf,inference,submission \ + --submission_dir=$HOME/inference_submission_tree \ + --adr.python.name=mlperf-cuda \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ + --commit_message="Bert crowd-results added" +``` + +Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + + + +### Try PyTorch backend + +You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. + +For example, + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=pytorch --execution-mode=valid \ + --results_dir=$HOME/inference_3.0_results --quiet +``` + + +## Test composable ML benchmark with other models, data sets, frameworks and platforms + +* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) +* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) + + +# The next steps + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..38f69a5d53 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,59 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..8aebb068f0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md @@ -0,0 +1,80 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.0_results --quiet +``` + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..6d6ba275fd --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,82 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4eedba9f31 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,67 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..d7191c808d --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,53 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..e1691c21ac --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md @@ -0,0 +1,48 @@ +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..6bd16556a3 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md @@ -0,0 +1,35 @@ +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..68db00ea0e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,53 @@ +## Setup +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-2023/README.md new file mode 100644 index 0000000000..6362f3eb66 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/README.md @@ -0,0 +1,83 @@ +### Introduction + +Our goal is to help the community benchmark and optimize various AI/ML applications +across diverse software and hardware provided by volunteers similar to SETI@home! + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, running MLPerf inference benchmarks and submitting results [turned out to be a challenge](https://doi.org/10.5281/zenodo.8144274) +even for experts and could easily take many weeks to prepare. That's why [MLCommons](https://mlcommons.org), +[cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +and [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +decided to develop an open-source, technology-agnostic +and non-intrusive [Collective Mind automation language (CM)](https://github.com/mlcommons/ck) +and [Collective Knowledge Playground (CK)](https://access.cknowledge.org/playground/?action=experiments) +to help anyone run, reproduce, optimize and compare MLPerf inference benchmarks out-of-the-box +across diverse software, hardware, models and data sets. + +You can read more about our vision, open-source technology and future plans +in this [presentation](https://doi.org/10.5281/zenodo.8105339). + + + +### Advanced challenge + +We would like to ask volunteers run various MLPerf inference benchmarks +on diverse CPUs (Intel, AMD, Arm) and Nvidia GPUs similar to SETI@home +across different framework (ONNX, PyTorch, TF, TFLite) +either natively or in a cloud (AWS, Azure, GCP, Alibaba, Oracle, OVHcloud, ...) +and submit results to MLPerf inference v3.1. + +However, since some benchmarks may take 1..2 days to run, we suggest to start in the following order (these links describe CM commands to run benchmarks and submit results): +* [CPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [CPU: TFLite C++ implementation of Image classification with variations of MobileNets and EfficientNets (open division)](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-mlperf-inference-mobilenet-models/README-about.md) +* [Nvidia GPU: Nvidia optimized implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_nvidia.md) +* [Nvidia GPU: Nvidia optimized implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [Nvidia GPU (24GB of memory min): Reference implementation of Language processing with GPT-J 6B (open)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/gpt-j/README_reference.md) +* [Nvidia GPU: Nvidia optimized implementation of all other models (open and closed division)](https://github.com/ctuning/mlcommons-ck/blob/master/docs/mlperf/inference/README.md#run-benchmarks-and-submit-results) + +Please read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to set up and run above benchmarks using CM. + +You can register your participation for the [Collective Knowledge leaderboard]( https://access.cKnowledge.org/playground/?action=contributors ) +using this [guide](https://github.com/mlcommons/ck/blob/master/platform/register.md). + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck/issues) +to help the community +improve the portability of the CM automation for MLPerf and other benchmarks and projects. + +Looking forward to your submissions and happy hacking! + + + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +You can see shared results in [this repostiory](https://github.com/ctuning/mlperf_inference_submissions_v3.1) +with PRs from participants [here](https://github.com/ctuning/mlperf_inference_submissions_v3.1/pulls). + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json new file mode 100644 index 0000000000..a30c26c928 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "experiments": [], + "points": 1, + "sort": -10, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.1", + "mlperf-inference-v3.1-2023", + "v3.1" + ], + "title": "Crowd-benchmark all MLPerf inference benchmarks similar to SETI@home (latency, throughput, power consumption, accuracy, costs)", + "trophies": true, + "uid": "3e971d8089014d1f" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..9806c22647 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,67 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..c43363c1e9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md @@ -0,0 +1,113 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend (Reference implementation) + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.1_results --quiet +``` + +## Pytorch backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +## TensorRT backend (Nvidia implementation) + +For TensorRT backend we are using the [Nvidia implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/reproduce-mlperf-inference-nvidia) and not the [MLPerf inference reference implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-inference-reference) for the below reasons +* TensorRT backend is not supported by default in the reference implementation +* Reference implemnetation is mostly for fp32 models and quantization is not suppoted by default +* Nvidia has done some fantastic work in optimizing performance for TensorRT backend + +To get setup please follow the instructions [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md) to download and install TensorRT and cuDNN unless you already have them installed. This readme also details how to handle the configuration files which are automatically generated by the Nvidia implementation scripts. Once this is done, the following command will run all the modes and scenarios. + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the default performance numbers +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + + +TensorRT backend has an engine generation stage which can be time consuming. For repeated runs `--adr.nvidia-harness.make_cmd=run_harness` option will avoid this engine regeneration and reuse the previously generated one. + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..470930e373 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,90 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4420462cde --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,75 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..a6ca069215 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,61 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..152c612aad --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md @@ -0,0 +1,50 @@ +## Setup ASW instance for MLPerf + +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..a3a0e457a1 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md @@ -0,0 +1,37 @@ +## Setup GCP instance for MLPerf + +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..08c0a8eeb0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,54 @@ +## Setup + +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md new file mode 100644 index 0000000000..b72349ad59 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json new file mode 100644 index 0000000000..66431963a5 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-amazon-inferentia-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "inferentia", + "mlperf-inference", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia-v3.1", + "mlperf-inference-inferentia-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia and submit to MLPerf inference v3.1+", + "uid": "c8f2573320424e2a" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md new file mode 100644 index 0000000000..c08847da6a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md @@ -0,0 +1,20 @@ +### Challenge + +Create any end-to-end AI application with web cam, speech recognition, chat bot, LLM +that uses any MLPerf model and CM automation. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +Looking forward to your submissions and happy hacking! + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) + diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json new file mode 100644 index 0000000000..23fb64d835 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-create-end-to-end-app", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20230704", + "date_close_extension": true, + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "end-to-end-app", + "mlperf-inference", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app-v3.1", + "mlperf-inference-end-to-end-app-v3.1-2023", + "v3.1" + ], + "title": "Generate end-to-end optimized AI apps (LLM, speech, etc) based on MLPerf inference results (with and without container)", + "uid": "96ca61a5aa914063" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md new file mode 100644 index 0000000000..f0f8908d29 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md @@ -0,0 +1,31 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with the DeepSparse library, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json new file mode 100644 index 0000000000..e1cc4f8880 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-deepsparse", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "deepsparse", + "mlperf-inference", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse-v3.1", + "mlperf-inference-deepsparse-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Neural Magic's DeepSparse library", + "trophies": true, + "uid": "c495863b08e74abc" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md new file mode 100644 index 0000000000..94fad05b51 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md @@ -0,0 +1,32 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Note that you can use either GCP TPU or Coral TPU USB-Accelerator CPU card. +In the latter case, you can reuse and extend our CM-MLPerf script for MobileNets! + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json new file mode 100644 index 0000000000..3d5aecc950 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-google-tpu-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "points":3, + "trophies":true, + "date_close_extension": true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tpu", + "mlperf-inference", + "mlperf-inference-tpu", + "mlperf-inference-tpu", + "mlperf-inference-tpu-v3.1", + "mlperf-inference-tpu-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU (GCP or Coral USB accelerator) and submit to MLPerf inference v3.1+", + "uid": "5975fd0e18cd4073" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md new file mode 100644 index 0000000000..014f83f7d9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md @@ -0,0 +1,52 @@ +### Introduction + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, it is difficult to customize and run MLPerf benchmarks with non-reference models. + +That's why the MLCommons Task Force on automation and reproducibility has developed +a [Collective Mind automation language](https://doi.org/10.5281/zenodo.8144274) +to modularize this benchmark and make it easier to run with different models and data sets. + + +### Challenge + +Implement a CM workflow to connect any Hugging Face model +to MLPerf loadgen and run it with random inputs to obtain a preliminary latency and througput +without accuracy. + +Resources: +* [CM script to get ML model from Hugging Face zoo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-huggingface-zoo) +* [CM script to convert Hugging Face model to ONNX](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/convert-ml-model-huggingface-to-onnx) +* [CM script to build MLPerf loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) +* [CM script to run Python Loadgen with any ONNX model](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-loadgen-generic-python/README-extra.md) +* [MLPerf BERT FP32 model is available at Hugging Face](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) + +Some results showcases CK workflow to benchmark Hugging Face models with MLPerf from v3.0 (BERT): +* https://access.cknowledge.org/playground/?action=experiments&name=2f1f70d8b2594149 +* https://access.cknowledge.org/playground/?action=experiments&name=mlperf-inference--v3.0--edge--open-power--language-processing--offline&result_uid=9d2594448bbb4b45 + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json new file mode 100644 index 0000000000..146505b55a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-hugging-face-models-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "huggingface", + "mlperf-inference", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface-v3.1", + "mlperf-inference-huggingface-v3.1-2023", + "v3.1" + ], + "title": "Implement CM automation to run benchmark Hugging Face models using MLPerf loadgen", + "uid": "72b95d08a9e04698" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md new file mode 100644 index 0000000000..aec0514730 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Intel-based platforms. + +You can start from reproducing any past MLPerf inference submission from Intel and their partners +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json new file mode 100644 index 0000000000..c3d9adbe4c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-intel-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "intel", + "mlperf-inference", + "mlperf-inference-intel", + "mlperf-inference-intel", + "mlperf-inference-intel-v3.1", + "mlperf-inference-intel-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Intel-based platforms", + "trophies": true, + "uid": "1c1d5da6766f4afb" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md new file mode 100644 index 0000000000..6aaf4e3947 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md @@ -0,0 +1,34 @@ +### Challenge + +Add support to run a reference implementation of any MLPerf inference benchmark using +[Mojo language]( https://github.com/modularml/mojo ) +from [Modular.ai](https://modular.ai). + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 with Mojo. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *The first implementation will receive a cache prize from organizers.* +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json new file mode 100644 index 0000000000..e805879dee --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-modular-mojo-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mojo", + "mlperf-inference", + "mlperf-inference-mojo", + "mlperf-inference-mojo", + "mlperf-inference-mojo-v3.1", + "mlperf-inference-mojo-v3.1-2023", + "v3.1" + ], + "title": "Run reference implementations of MLperf inference benchmarks using Mojo language from Modular.ai", + "trophies": true, + "uid": "0a8a7bb5572447db" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md new file mode 100644 index 0000000000..c16a9335a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md @@ -0,0 +1,33 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms. + +You can start from reproducing any past submission from Dell, Lenovo or HPE +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json new file mode 100644 index 0000000000..07c626e259 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-qualcomm-ai100-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "ai100", + "mlperf-inference", + "mlperf-inference-ai100", + "mlperf-inference-ai100", + "mlperf-inference-ai100-v3.1", + "mlperf-inference-ai100-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms", + "uid": "09bd5f9e05ff46b1" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md new file mode 100644 index 0000000000..f8d9fbd71b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md @@ -0,0 +1,41 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with Apache TVM, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [Deelvin](https://deelvin.com) +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +This challenge is under preparation. + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json new file mode 100644 index 0000000000..839fb6b86e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-tvm-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":1, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tvm", + "mlperf-inference", + "mlperf-inference-tvm", + "mlperf-inference-tvm", + "mlperf-inference-tvm-v3.1", + "mlperf-inference-tvm-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Apache TVM", + "uid": "29c416e245884746" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md new file mode 100644 index 0000000000..0a5fe9aa2c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add more models and hardware backends to the [universal C++ implementation of MLPerf inference benchmarks)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-mlperf-inference-cpp) +being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json new file mode 100644 index 0000000000..e4e5cae105 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "cpp", + "mlperf-inference", + "mlperf-inference-cpp", + "mlperf-inference-cpp", + "mlperf-inference-cpp-v3.1", + "mlperf-inference-cpp-v3.1-2023", + "v3.1" + ], + "title": "Add more models and hardware backends to the universal C++ implementation of MLPerf inference benchmarks from MLCommons", + "trophies": true, + "uid": "518420b0e6dd4fed" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md new file mode 100644 index 0000000000..d587f62f89 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Prepare, optimize and submit any benchmarking results to MLPerf inference v3.1 using +CM automation language on Windows. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +Open ticket: [GitHub](https://github.com/mlcommons/ck/issues/696) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json new file mode 100644 index 0000000000..1a55dcbe0f --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-windows-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":2, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "windows", + "mlperf-inference", + "mlperf-inference-windows", + "mlperf-inference-windows", + "mlperf-inference-windows-v3.1", + "mlperf-inference-windows-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks on Windows", + "uid": "53e56d714c7649c7" +} diff --git a/challenge/repro-mlperf-inf-v3.0-orin/README.md b/challenge/repro-mlperf-inf-v3.0-orin/README.md new file mode 100644 index 0000000000..54dd4feeb0 --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/README.md @@ -0,0 +1,16 @@ +### Challenge + +Reproduce MLPerf inference v3.0 benchmark results for Nvidia Jetson Orin +(performance, accuracy,power) and automate it using the +[MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +Finished. Preliminary results are available [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md). + diff --git a/challenge/repro-mlperf-inf-v3.0-orin/_cm.json b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json new file mode 100644 index 0000000000..aff0fdba0f --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "repro-mlperf-inf-v3.0-orin", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230406", + "date_open": "20230301", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "_password_hash": "$2b$12$ionIRWe5Ft7jkn4y/7C6/eYoo6uBBMkGy/9SxwtKhaDRqZ1w2s3dO", + "tags": [ + "reproduce", + "replicate", + "automate", + "orin", + "nvidia", + "mlperf-inference-v3.0-orin" + ], + "title": "Reproduce MLPerf inference v3.0 results for Nvidia Jetson Orin", + "uid": "6d377c1a1b224636" +} diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/README.md b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md new file mode 100644 index 0000000000..9917547c15 --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md @@ -0,0 +1,39 @@ +### Challenge + +Reproduce the MLPerf inference RetinaNet benchmark during Student Cluster Competition at SuperComputing'22 +using the following [CM tutorial](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results from 10 international student teams are available at: +* [W&B dashboard 1 (during SCC'22)](https://wandb.ai/cmind/cm-mlperf-sc22-scc-retinanet-offline/table?workspace=user-gfursin) +* [W&B dashboard 2 (after SCC'22)](https://wandb.ai/cmind/cm-mlperf-dse-testing/table?workspace=user-gfursin) + + +### Acknowledgments + +We thank +[Hai Ah Nam](https://www.nersc.gov/about/nersc-staff/advanced-technologies-group/hai-ah-nam), +[Steve Leak](https://www.linkedin.com/in/steve-leak), +[Vijay Janappa Reddi](https://scholar.harvard.edu/vijay-janapa-reddi/home), +[Tom Jablin](https://scholar.google.com/citations?user=L_1FmIMAAAAJ&hl=en), +[Ramesh N Chukka](https://www.linkedin.com/in/ramesh-chukka-74b5b21), +[Peter Mattson](https://www.linkedin.com/in/peter-mattson-33b8863/), +[David Kanter](https://www.linkedin.com/in/kanterd), +[Pablo Gonzalez Mesa](https://www.linkedin.com/in/pablo-gonzalez-mesa-952ab2207), +[Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +[Thomas Schmid](https://www.linkedin.com/in/tschmid) +and [Gaurav Verma](https://www.linkedin.com/in/grverma) +for their suggestions and contributions. + + diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json new file mode 100644 index 0000000000..68352f9c3b --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "repro-mlperf-inference-retinanet-scc2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20221201", + "date_open": "20221101", + "tags": [ + "modularize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2022" + ], + "title": "Automate MLPerf RetinaNet benchmark at the Student Cluster Competition at SuperComputing'22 using CM", + "uid": "e71fa8b396874e68" +} diff --git a/challenge/repro-mlperf-inference-v4.0-2024/README.md b/challenge/repro-mlperf-inference-v4.0-2024/README.md new file mode 100644 index 0000000000..af23eb1205 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/README.md @@ -0,0 +1,3 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to reproduce results from the MLPerf inference benchmark submission v4.0. +Please feel free to join the testing phase using [GitHub issues](https://github.com/mlcommons/ck/issues)! diff --git a/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..01bcfd52a7 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: repro-mlperf-inference-v4.0-2024 +uid: e6b8738383eb46d0 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Reproduce and automate MLPerf inference benchmark results v4.0 from different vendors (Intel, Nvidia, Qualcomm, Google, NeuralMagic, ...) using CM + +date_open: '20240201' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.0 +- mlperf-inference-v4.0-2024 +- v4.0 + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/repro-mlperf-inference-v4.1-2024/README.md b/challenge/repro-mlperf-inference-v4.1-2024/README.md new file mode 100644 index 0000000000..1aacc2d59d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/README.md @@ -0,0 +1,4 @@ +The [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) and [MLCommons](https://mlcommons.org) +are preparing an open reproducibility challenge to reproduce various results from the MLPerf inference benchmark v4.1 +using the MLCommons CM automation framework. Please stay tuned for more details! + diff --git a/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml new file mode 100644 index 0000000000..840d58318d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml @@ -0,0 +1,22 @@ +alias: repro-mlperf-inference-v4.1-2024 +uid: 2093f4d750144df4 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: 'Reproduce the upcoming MLPerf inference benchmark v4.1 results' + +date_open: '20240901' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.1 +- mlperf-inference-v4.1-2024 +- v4.1 diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md new file mode 100644 index 0000000000..0f59f59f0e --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Reproduce and automate [TinyMLPerf benchmarks](https://github.com/mlcommons/tiny). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +We have successfully reproduced [TinyMLPerf v1.0 submission with microTVM on the STMicroelectronics NUCLEO-L4R5ZI board](https://github.com/mlcommons/tiny_results_v1.0/tree/main/closed/OctoML), +automated it with the latest version of the [MLCommons CM automation language](https://github.com/mlcommons/ck/blob/master/docs/README.md), +submit reproduce results to the TinyMLperf v1.1 round, +and added all past TinyMLPerf results to the [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +for further collaborative analysis and improvement. + +Pleases check our tutorial and reproducibility report: +* [Automate TinyMLPerf benchmark](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/automate-mlperf-tiny.md) - useful for all SW/HW stacks and submission rounds. +* [Reproduce TinyMLPerf v1.0 submission](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/reproduce-mlperf-tiny.md). + +TinyMLPerf v1.1 results will be published at te [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +in mid June 2023. + +### Related discussions for the future + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 +* https://github.com/mlcommons/ck/issues/606 + +### Results + +All results will be available in [this GitHub repo](https://github.com/ctuning/cm4mlperf-results) +and can be visualized and compared using the [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny). diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json new file mode 100644 index 0000000000..4e9e248505 --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-and-automate-tinymlperf-v1.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "automate", + "reproduce", + "replicate", + "optimize", + "benchmark", + "tinymlperf", + "tinymlperf-inference", + "tinymlperf-inference-v3.0", + "tinymlperf-inference-v3.0-2023", + "v1.0" + ], + "title": "Reproduce and optimize TinyMLPerf inference v1.1 benchmarks", + "uid": "d98cd66e0e5641f7" +} diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/README.md b/challenge/reproduce-mlperf-training-v3.0-2023/README.md new file mode 100644 index 0000000000..a1f1ea22ac --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/README.md @@ -0,0 +1,17 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf training v3.0 benchmarks +using the [MLCommons CM (CK2) automation framework](https://github.com/mlcommons/ck) + +### Status + +We could not do a successful submission mainly because the training scripts were not converging on a single GPU. We tried resnet and bert training. The below CM scripts are added to do MLPerf training for BERT using the reference and NVIDIA implementations. + +1. [BERT Training using Nvidia code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-nvidia) +2. [BERT Training using MLPerf Reference code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-reference) + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json new file mode 100644 index 0000000000..d1e5eddea8 --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-mlperf-training-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-training", + "mlperf-training-v3.0", + "mlperf-training-v3.0-2023", + "v3.0" + ], + "title": "Reproduce MLPerf training v3.0 benchmarks", + "uid": "1d26149c1cce4da3" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/README.md b/challenge/run-mlperf@home-v3.1-cpu/README.md new file mode 100644 index 0000000000..bd734f7896 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/README.md @@ -0,0 +1,67 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient CPU (Intel/AMD/Arm) for BERT-99 model with DeepSparse engine +and different variations of MobileNets/EfficientNets with TFLite +in terms of latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with BERT and MobileNets/EfficientNets on one or more systems with different CPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (submission deadline: August 4, 2023), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* CPU: Any x86-64 or Arm64 +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS +* Disk space: + * BERT-99: ~ 20GB + * Different variations of MobileNets/EfficientNets: ~ 140GB +* Time to run: + * BERT-99: ~ 2 hours + * Different variations of MobileNets/EfficientNets: ~ 2 days + +### Instructions to run benchmarks and submit results + +You can run any of these benchmarks or all depending on available time: + +* [Automated Design Space Exploration of MobileNets/EfficientNets; TFLite MLPerf implementation; native environment or Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md) +* [BERT-99 model; DeepSparse MLPerf implementation; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-bert-99-deepsparse.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-cpu/_cm.json b/challenge/run-mlperf@home-v3.1-cpu/_cm.json new file mode 100644 index 0000000000..88f4716cda --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "run-mlperf@home-v3.1-cpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -20, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient CPUs (Intel/AMD/Arm) for BERT and MobileNets/EfficientNets (latency, throughput, accuracy, number of cores, frequency, memory size, cost and other metrics)", + "skip": true, + "trophies": true, + "uid": "498f33f3dac647c1" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md new file mode 100644 index 0000000000..b4266ffa97 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md @@ -0,0 +1,100 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with BERT-99 model and DeepSparse engine +on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for offline scenario in open division and edge category. + +It will require ~20GB of disk space and can take ~2 hours to run on 1 system. + + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + + + +### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance \ +--model=bert-99 --implementation=reference --device=cpu --backend=deepsparse \ +--category=edge --division=open --quiet --scenario=Offline +``` + +### Do full accuracy and performance run + +``` +cm run script --tags=generate-run-cmds,inference,_submission --model=bert-99 \ +--device=cpu --implementation=reference --backend=deepsparse \ +--execution-mode=valid --results_dir=$HOME/results_dir \ +--category=edge --division=open --quiet --scenario=Offline +``` +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md new file mode 100644 index 0000000000..f41b1b463b --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md @@ -0,0 +1,77 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with multiple variations of MobileNets and EfficientNets +and TFLite on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for singlestream scenario in open division and edge category. + +It will require ~140GB of disk space and can take ~2 days to run on 1 system producing 243 MLPerf results +during automatic design space exploration to trade off accuracy vs performance. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + diff --git a/challenge/run-mlperf@home-v3.1-gpu/README.md b/challenge/run-mlperf@home-v3.1-gpu/README.md new file mode 100644 index 0000000000..b6482d3835 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/README.md @@ -0,0 +1,65 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient Nvidia GPUs for GPT-J 6B model and BERT-99 in terms of +latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with GPT-J and BERT-99 models on one or more systems with different Nvidia GPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (**submission deadline: August 17, 2023**), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* GPU: Nvidia +* GPU memory: + * GPT-J 6B: min 24GB + * BERT-99: min 8..16GB +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS + any Linux (tested on Ubuntu 22.04) +* Disk space: ~30GB per model/data set +* Time to run: + * GPT-J 6B: ~ 1 day + * BERT-99: ~ 2 hours + +### Instructions to run benchmarks and submit results + +* [GPT-J 6B model (24GB min GPU memory); PyTorch+CUDA; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md) +* [BERT-99 model (8GB min GPU memory); TensorRT; Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-gpu/_cm.json b/challenge/run-mlperf@home-v3.1-gpu/_cm.json new file mode 100644 index 0000000000..af7deeadae --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "run-mlperf@home-v3.1-gpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -30, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient Nvidia GPUs for GPT-J 6B model and BERT (latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics)", + "trophies": true, + "uid": "54230c3b66564cef" +} diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md new file mode 100644 index 0000000000..f543c23621 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md @@ -0,0 +1,193 @@ +# Introduction + +This guide will help you run the Nvidia implementation of the MLPerf inference benchmark v3.1 +with BERT-99 model and TensorRT on any Linux-based system with Nvidia GPU (8..16GB min memory required) +and Docker. + +This benchmark is semi-automated by the [MLCommons CM language](https://doi.org/10.5281/zenodo.8105339) +and you should be able to submit official MLPerf v3.1 inference results +for all scenarios in closed division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + + +It will require ~30GB of disk space and can take ~2 hours to run on 1 system. + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup CUDA and Docker container + +### Download CUDA 11.8 + +Nvidia recommends the following version of CUDA to be used with their MLPerf inference implementation: + +``` +wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run +``` + +However, you are very welcome to try another version! + +### Download cuDNN, TensorRT + +For x86 machines, please download the following TAR files: +1. [cuDNN](https://developer.nvidia.com/cudnn) - note that Nvidia recommends `cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz` + but you are welcome to try another version +2. [TensorRT](https://developer.nvidia.com/tensorrt) - note that Nvidia recommends `TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz` + but you can try another version + + +### Set up Nvidia Docker container with MLPerf benchmarks + +1. [Install Docker](https://docs.docker.com/engine/install/) and [Nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) + +2. Give Docker permission to the current user + ``` + sudo usermod -aG docker $USER + ``` + Logout and login + Restart docker if required and confirm that Nvidia container toolkit is working by + ``` + nvidia-ctk --version + ``` + +3. Check if Nvidia driver is working properly on the host. + ``` + nvidia-smi + ``` + If the above command produces any error you'll need to install Nvidia drivers on the host. You can do this via CM if you have sudo access + ``` + cmr "install cuda prebuilt _driver" --version=11.8.0 + ``` + + +4. Build the docker container and mount the paths from the host machine. + + *You may need to change --cuda_run_file_path, --tensorrt_tar_file_path and --cudnn_tar_file_path if you downloaded other versions than recommended by Nvidia.* + + *You may want to change the `scratch_path` location as it can take 100s of GBs.* + + ```bash + cm docker script --tags=build,nvidia,inference,server \ + --cuda_run_file_path=$HOME/cuda_11.8.0_520.61.05_linux.run \ + --tensorrt_tar_file_path=$HOME/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ + --scratch_path=$HOME/mlperf_scratch \ + --docker_cm_repo=mlcommons@ck \ + --results_dir=$HOME/results_dir \ + --submission_dir=$HOME/submission_dir \ + --adr.compiler.tags=gcc + ``` + +5. At the end of the build you'll get a prompt - please enter your system name such as "aws_nvidia_t4" + (note that space, `-` and other special characters are not allowed), + and say `yes` to generating the configuration files. + + ``` + ============================================ + => A system ID is a string containing only letters, numbers, and underscores + => that is used as the human-readable name of the system. It is also used as + => the system name when creating the measurements/ and results/ entries. + => This string should also start with a letter to be a valid Python enum member name. + => Specify the system ID to use for the current system: phoenix + => Reloaded system list. MATCHED_SYSTEM: KnownSystem.phoenix + => This script will generate Benchmark Configuration stubs for the detected system. + Continue? [y/n]: y + ``` + Now you'll be inside the CM Nvidia docker container and can access Nvidia implementations of MLPerf inference benchmarks. + +6. Once the build is complete, you can run Nvidia implementations of MLPerf inference benchmarks + using the unified CM interface. + + You can also save the container at this stage using [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/) + so that it can be launched later without having to go through the previous steps. + + +### Do a test run to detect and record the system performance + +``` +cmr "generate-run-cmds inference _find-performance _all-scenarios" \ + --model=bert-99 \ + --implementation=nvidia-original \ + --device=cuda \ + --backend=tensorrt \ + --category=edge \ + --division=closed \ + --test_query_count=1000 \ + --quiet +``` + +### Do full accuracy and performance runs + +``` +cmr "generate-run-cmds inference _submission _allscenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +* `--offline_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +### Populate the README files describing your submission + +``` +cmr "generate-run-cmds inference _populate-readme _all-scenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + +## Questions? Suggestions? + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md new file mode 100644 index 0000000000..39b1cc0de2 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md @@ -0,0 +1,314 @@ +# Introduction + +This guide will help you run the reference implementation of the MLPerf inference benchmark v3.1 +with GPT-J 6B model and PyTorch on any Linux-based system with Nvidia GPU (24GB min memory required) +using the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339). + +CM will help you to obtain performance and accuracy numbers for GPT-J 6B model on your system +for the SingleStream scenario and submit them to the official MLPerf v3.1 inference benchmarking round +in open division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + +You can read more about scenarios, divisions and categories of MLPerf inference benchmarks +in this [MLPerf inference benchmark paper](https://arxiv.org/abs/1911.02549) - +our goal is to help the community compare performance, accuracy and other metrics of popular models across diverse systems +in an automated, unified and reproducible way! + +This benchmark will require ~30GB of disk space and can take ~1 day to run on one system +to have a valid MLPerf result. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://github.com/mlcommons/ck) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models, and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + +## Do the performance run + +Now you can run MLPerf inference benchmark to measure performance of GPT-J using CM command as follows +(note that `cmr` is equivalent to `cm run script`): + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +Note that this command will need to automatically download the model (24GB) +and [CNN Daily Mail dataset (relatively small)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-cnndm)! + +The benchmark run is expected to finish within 10-100 minutes depending on the performance of your GPU. + +In the end of the valid run, you should see [output](https://github.com/ctuning/mlperf_inference_submissions_v3.1/blob/main/open/cTuning/results/amd_zen4_workstation-reference-gpu-pytorch-v2.0.1-default_config/gptj-99/singlestream/performance/run_1/mlperf_log_summary.txt) similar to + +```txt +================================================ +MLPerf Results Summary +================================================ +SUT name : PySUT +Scenario : SingleStream +Mode : PerformanceOnly +90th percentile latency (ns) : 4751920830 +Result is : VALID + Min duration satisfied : Yes + Min queries satisfied : Yes + Early stopping satisfied: Yes +Early Stopping Result: + * Processed at least 64 queries (201). + * Would discard 9 highest latency queries. + * Early stopping 90th percentile estimate: 5387449249 + * Not enough queries processed for 99th percentile + early stopping estimate (would need to process at + least 662 total queries). + +================================================ +Additional Stats +================================================ +QPS w/ loadgen overhead : 0.33 +QPS w/o loadgen overhead : 0.33 + +Min latency (ns) : 881803157 +Max latency (ns) : 5939081711 +Mean latency (ns) : 3008773902 +50.00 percentile latency (ns) : 2788885477 +90.00 percentile latency (ns) : 4751920830 +95.00 percentile latency (ns) : 5307244203 +97.00 percentile latency (ns) : 5677375096 +99.00 percentile latency (ns) : 5927209480 +99.90 percentile latency (ns) : 5939081711 + +================================================ +Test Parameters Used +================================================ +samples_per_query : 1 +target_qps : 2000 +target_latency (ns): 0 +max_async_queries : 1 +min_duration (ms): 600000 +max_duration (ms): 620000 +min_query_count : 100 +max_query_count : 0 +qsl_rng_seed : 148687905518835231 +sample_index_rng_seed : 520418551913322573 +schedule_rng_seed : 811580660758947900 +accuracy_log_rng_seed : 0 +accuracy_log_probability : 0 +accuracy_log_sampling_target : 0 +print_timestamps : 0 +performance_issue_unique : 0 +performance_issue_same : 0 +performance_issue_same_index : 0 +performance_sample_count : 13368 + +No warnings encountered during test. + +No errors encountered during test. +``` + + +## Do the accuracy run + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +This accuracy run can take many hours (typically 12..46 hours). You can estimate it using the QPS (queries per second) +from the previous performance run as follows: + +accuracy time = data set / QPS = 13368 / QPS . + +For example, if your reported QPS is 0.1 (equivalent to 10000 ms latency), it will take 13368/0.1 ~ 37 hours. + + + +## Populate the MLPerf README files describing your submission + +Now you can use CM to automatically populate README files mandated by MLPerf to describe your submission +(we also show you a simpler syntax of `cmr` instead of `cm run script --tags=`): + +```bash +cmr "generate-run-cmds inference _populate-readme" \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + +## Generate MLPerf submission + +Unless your organization is an official member of MLCommons, you will be able to participate in the official MLPerf inference community submission +via the cTuning foundation (founding member of MLCommons). + +You should update the following flags in the below CM command: +* Use `--hw_notes_extra` option to add your name to the submission such as `--hw_notes_extra="Result taken by NAME" `. +* Use `--hw_name="My system name"` to give a meaningful system name describing your GPU. + Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems). +* Use `--submitter=` if your organization is an official MLCommons member and you would like to submit under your organization. + +You should use the master branch of MLCommons inference repo for the submission checker: + +```bash +cmr "generate inference submission" \ + --clean \ + --submitter=cTuning \ + --results_dir=$HOME/results_dir/valid_results \ + --submission_dir=$HOME/inference_submission_tree \ + --preprocess_submission=yes \ + --adr.compiler.tags=gcc \ + --adr.inference-src.version=master \ + --run-checker +``` + +## Push the results to GitHub repo + +1. Create a fork of [this cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1). + +2. Run the following command after replacing `--repo_url` with your fork URL. + + ``` + cmr "push github mlperf inference submission" \ + --submission_dir=$HOME/inference_submission_tree \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.1/ \ + --commit_message="GPTJ results on added by " + ``` + +3. Create a PR to the [cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1) + + + + + + + + + +## Additional performance optimization challenge for interested enthusiasts + +The MLPerf GPT-J inference benchmark is implemented in this [backend.py](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py). + +It is automatically installed and cached by CM. You can find it on your system using this command: +```bash +cd `cm find cache --tags=inference,src,_branch.master`/language/gpt-j +ls backend.py +``` + +The original model is available at the [Hugging Face Zoo](https://huggingface.co/EleutherAI/gpt-j-6b). It was fine-tuned by Intel for this benchmark +and is available at the MLCommons cloud. It is automatically downloaded by CM using [this script](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-ml-model-gptj/_cm.json). + +You can try to improve the performance (QPS) on this code or fine-tune model and substitute the default one +in [this line](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py#L27). + +Some examples of fine-tuning can be seen [here](https://betterprogramming.pub/fine-tuning-gpt-j-6b-on-google-colab-or-equivalent-desktop-or-server-gpu-b6dc849cb205). + +Any better performance or accuracy result will be very valuable to the community. + +After any modification, you can redo a quick performance run to see the performance difference. +``` +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + + diff --git a/challenge/train-llm-for-cm-mlperf-2023/README.md b/challenge/train-llm-for-cm-mlperf-2023/README.md new file mode 100644 index 0000000000..4e9f6cf178 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/README.md @@ -0,0 +1,20 @@ +### Challenge + +Improve the prototype of our LLM-based assistant to suggest users how to run MLPerf inference benchmarks +using the MLCommons CM automation language: https://access.cknowledge.org/assistant . + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *Get in touch with organizers for more info!* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) diff --git a/challenge/train-llm-for-cm-mlperf-2023/_cm.json b/challenge/train-llm-for-cm-mlperf-2023/_cm.json new file mode 100644 index 0000000000..ce6009db37 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "train-llm-for-cm-mlperf-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 3, + "tags": [ + "train", + "improve", + "llm", + "assistant", + "mlperf-llm", + "mlperf-llm-assistant", + "mlperf-assistant" + ], + "title": "Train and improve LLM to suggest users how to run MLPerf inference benchmarks using CM automation language", + "trophies": true, + "uid": "d37bf37a24c44ec3" +} diff --git a/project/mlperf-inference-v3.0-submissions/README.md b/project/mlperf-inference-v3.0-submissions/README.md new file mode 100644 index 0000000000..7ad8080b0c --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/README.md @@ -0,0 +1,10 @@ +Graphs: + https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,all,open,edge,image-classification,singlestream + https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy + + http://localhost:8501/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy + http://localhost:8501/?tags=mlperf-inference,all,open,edge,image-classification,singlestream&x=Result&y=Accuracy + +Local: + cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 + cm run script "gui _graph" diff --git a/project/mlperf-inference-v3.0-submissions/_cm.json b/project/mlperf-inference-v3.0-submissions/_cm.json new file mode 100644 index 0000000000..2cc81aa8b0 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/_cm.json @@ -0,0 +1,7 @@ +{ + "alias": "mlperf-inference-v3.0-submissions", + "automation_alias": "project", + "automation_uid": "6882553224164c56", + "tags": [], + "uid": "f571becbcbd44a7d" +} diff --git a/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md new file mode 100644 index 0000000000..9aae9bbe55 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md @@ -0,0 +1,285 @@ +# Crowd-benchmarking MLPerf BERT inference + +
+Click here to see the table of contents. + +* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) +* [System preparation](#system-preparation) + * [Minimal system requirements](#minimal-system-requirements) + * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) + * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) + * [Detect or install CUDA](#detect-or-install-cuda) + * [Test CUDA installation](#test-cuda-installation) + * [Install Python virtual environment](#install-python-virtual-environment) + * [Detect or install cuDNN](#detect-or-install-cudnn) + * [Detect or install TensorRT](#detect-or-install-tensorrt) + * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) + * [Try ONNX runtime backend](#try-onnx-runtime-backend) + * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) + * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) + * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) + * [Populate the README files](#populate-the-readme-files) + * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) + * [Push the results to GitHub repo](#push-the-results-to-github-repo) + * [Try PyTorch backend](#try-pytorch-backend) + * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) +* [The next steps](#the-next-steps) + +
+ + +This is a pilot community project to collaboratively run MLPerf BERT inference benchmark +across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). +However, instead of searching for extraterrestrial intelligence, we are +searching for optimal software/hardware combination to run various AI and ML workloads +in terms of performance, accuracy, power and costs ... + +This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment +across continuously evolving software, hardware, models and data. + +*If you submit your results before 1pm PST on Friday 3rd, 2023, + they will be accepted for the official MLPerf inference v3.0 submission round + and your name acknowledged in the notes!* + + +# System preparation + +## Minimal system requirements + +* CPU: any x86-64 or Arm64 based machine +* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ +* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 +* Disk space: ~10GB +* Python: 3.8+ +* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) + +## Install CM (CK2) automation meta-framework + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) +(the 2nd generation on the Collective Mind framework) on your system. + +## Pull CM repository with portable automation recipes + +Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +supporting portable MLOps and DevOps: + +```bash +cm pull repo mlcommons@ck +``` + +CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. + +We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) +required to run a given software project such as the MLPerf inference benchmark. + +These CM scripts simply wrap existing native scripts and tools as simple micro-services +with a human-readable CLI and simple Python API to be able to easily connect them together +and run on any platform in a unified way. + +## Detect or install CUDA + +Run the following CM script: +```bash +cm run script "get cuda" --out=json +``` + +If CUDA is automatically detected, it will be registered in the CM cache: +```bash +cm show cache --tags=get,cuda +``` + +Otherwise, this script will attempt to download and install the latest CUDA +from Nvidia website. + +Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). + +### Test CUDA installation + +You can test if CUDA toolkit and driver was detected or installed successfully using the following command: +```bash +cm run script "get cuda-devices" +``` + +You should see similar output: +```txt +Checking compiler version ... + +nvcc: NVIDIA (R) Cuda compiler driver +Copyright (c) 2005-2022 NVIDIA Corporation +Built on Wed_Sep_21_10:33:58_PDT_2022 +Cuda compilation tools, release 11.8, V11.8.89 +Build cuda_11.8.r11.8/compiler.31833905_0 + +Compiling program ... + +Running program ... + + - Running postprocess ... +GPU Device ID: 0 +GPU Name: Tesla K80 +GPU compute capability: 3.7 +CUDA driver version: 11.4 +CUDA runtime version: 11.8 +Global memory: 11997020160 +Max clock rate: 823.500000 MHz +Total amount of shared memory per block: 49152 +Total number of registers available per block: 65536 +Warp size: 32 +Maximum number of threads per multiprocessor: 2048 +Maximum number of threads per block: 1024 +Max dimension size of a thread block X: 1024 +Max dimension size of a thread block Y: 1024 +Max dimension size of a thread block Z: 64 +Max dimension size of a grid size X: 2147483647 +Max dimension size of a grid size Y: 65535 +Max dimension size of a grid size Z: 65535 + + - running time of script "get,cuda-devices": 4.16 sec. + +``` + +## Install Python virtual environment + +```bash +cm run script "get sys-utils-cm" --quiet + +cm run script "install python-venv" --name=mlperf-cuda +``` + +If you want to install specific version of Python use the following command: +```bash +cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda +``` + +## Detect or install cuDNN + +```bash +cm run script "get cudnn" +``` + +If cuDNN is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script +to install it as follows: +```bash +cm run script "get cudnn" --tar_file= +``` + +We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. + +## Detect or install TensorRT + +```bash +cm run script "get tensorrt" +``` +If TensorRT is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script +to install it as follows: +```bash +cm run script "get tensorrt" --tar_file= +``` + +We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. + + +## Run MLPerf inference benchmark with BERT + +### Try ONNX runtime backend + +#### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ + --device=cuda --backend=onnxruntime --quiet +``` + +#### Do a full accuracy run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Do a full performance run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Populate the README files + +```bash +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Generate MLPerf submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. +You can use `--hw_note_extra` option to add your name to the notes. + +```bash +cm run script --tags=generate,inference,submission \ + --results_dir=$HOME/inference_3.0_results/valid_results \ + --adr.python.name=mlperf-cuda \ + --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ + --run-checker --submitter=cTuning --adr.inference-src.version=master + --hw_notes_extra="Result taken by " --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). +Then run the following command after replacing `--repo_url` with your fork URL. + +```bash +cm run script --tags=push,github,mlperf,inference,submission \ + --submission_dir=$HOME/inference_submission_tree \ + --adr.python.name=mlperf-cuda \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ + --commit_message="Bert crowd-results added" +``` + +Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + + + +### Try PyTorch backend + +You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. + +For example, + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=pytorch --execution-mode=valid \ + --results_dir=$HOME/inference_3.0_results --quiet +``` + + +## Test composable ML benchmark with other models, data sets, frameworks and platforms + +* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) +* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) + + +# The next steps + +Feel free to join our [open taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +and the public [Discord server](https://discord.gg/JjWNWXKxwT) to learn about our roadmap and related community projects. + +Our ultimate goal is to help anyone automatically find or generate the optimal software/hardware stack from the cloud to the edge +for their AI/ML tasks based on their requrements and constraints (accuracy, performance, power consumption, costs, etc). + +*Prepared by [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) and [Grigori Fursin](https://cKnowledge.org/gfursin) (OctoML, MLCommons, cTuning foundation)* diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md new file mode 100644 index 0000000000..824279732e --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md @@ -0,0 +1,87 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission +``` +cm pull repo mlcommons@ck +``` +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for a closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy run for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Do a full performance run for all the scenarios +``` +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Generate actual submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. +``` +cm run script --tags=push,github,mlperf,inference,submission \ +--submission_dir=$HOME/inference_submission_tree \ +--repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ +--commit_message="Bert results added" +``` + +Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.0_results --quiet +``` + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..9129004321 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md @@ -0,0 +1,74 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_full,_all-scenarios --model=resnet50 \ +--device=cpu --backend=onnxruntime --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for a closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy run for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Do a full performance run for all the scenarios +``` +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Generate actual submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. +``` +cm run script --tags=push,github,mlperf,inference,submission --submission_dir=$HOME/inference_submission_tree \ +--repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ +--commit_message="ResNet50 results added" +``` + +Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=tf --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=tvm-onnx --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` diff --git a/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md b/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md new file mode 100644 index 0000000000..c35aada995 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md @@ -0,0 +1,47 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +Requirements: You need to have CUDA, cuDNN and TensorRT installed on your system. + +If CUDA is not detected, CM should download and install it automatically when you run the workflow. + +For x86 machines, you can download the tar files for cuDNN and TensorRT and install them using the following commands +```bash +cm run script --tags=get,cudnn --tar_file= +``` + +```bash +cm run script --tags=get,tensorrt --tar_file= +``` + +On other systems you can do a package manager install and then CM should pick up the installation automatically during the workflow run. + +Nvidia run configuration values for each model-sceraio for known systems are stored in `__init__.py` files under configs directory. For custom systems these are stored under `custom.py` files. When custom config files are generated they override the default config values with empty ones (not desirable). So, you'll probably need to open the custom config file and comment out the overrides. Typically `gpu_batch_size` and `offline_expected_qps` are enough for an offline scenario run on a typical single GPU system. + + +## Build Nvidia Inference Server +``` +cm run script --tags=build,nvidia,inference,server +``` + +## Run ResNet50 + +### Find SUT performance + +``` +cm run script --tags=generate,run-cmds,inference,_find-performance --model=resnet50 --implementation=nvidia-original \ +--device=cuda --adr.nvidia-harness.gpu_batch_size=64 --results_dir=$HOME/nvidia_original_results +``` + +### Do a complete submission run + +``` +cm run script --tags=generate,run-cmds,inference,_submission,_full --execution_mode=valid --model=resnet50 \ +--implementation=nvidia-original --device=cuda --adr.nvidia-harness.gpu_batch_size=64 \ +--adr.nvidia-harness.skip_preprocess=yes --adr.nvidia-harness.make_cmd=run_harness \ +--results_dir=$HOME/nvidia_original_results --submission_dir=$HOME/nvidia_original_submissions \ +--division=open --submitter=cTuning --category=edge +``` + diff --git a/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md b/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md new file mode 100644 index 0000000000..cb74086b54 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md @@ -0,0 +1,25 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +## Create an AWS Graviton Instance + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` + diff --git a/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd b/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd new file mode 100644 index 0000000000..87fa4e9ba2 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd @@ -0,0 +1,3 @@ +cm run script "get git repo _repo.https://github.com/ctuning/mlperf_inference_submissions_v3.0" --extra_cache_tags=mlperf-inference-results,version-3.0 +cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 +cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.0" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.0 diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/README.md b/report/mlperf-inference-v3.1-analysis-ctuning/README.md new file mode 100644 index 0000000000..9d4b696949 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/README.md @@ -0,0 +1,93 @@ +On this page, we highlight some of the exciting submissions done by CTuning for the MLCommons Inference 3.1 round. + +## Top Results in Edge Category + +In the edge category, Rigel Supercomputers from One Stop Systems achieved the peak offline performance for the four submitted benchmarks - Image classification (ResNet50), Object detection (RetinaNet), Language processing (Bert) and Speech Recognition (RNNT). The below graph compares the peak performance of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/9f8e3367-1ca4-4298-8545-285cdedfc991) + + +Nvidia RTX 4090 has the best performance for performance per accelerator, and this accelerator is assembled on a PC made by PCSPECIALIST UK. The below graph compares the performance per accelerator of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c02120cb-eda9-4eef-9e22-56fff4bf23a7) + + +Nvidia RTX 4090 wins the latency metric too for ResNet50, Bert and 3d-unet in the SingleStream scenario. +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d4b39a0-9f39-474a-ac16-5498e281ebad) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8afb5609-581d-4ee8-be56-731af731f10f) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/5cb88f53-9255-4a0b-98df-a192ba87b125) + + +## Best energy efficient results in Edge category + +For the Speech Recognition model rnnt, CTuning submitted the best power-efficient result on Nvidia Jetson Orin AGX. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d485aa50-a0d4-4a40-a805-cc2ddc3e0ca6) + + +For the Medical Imaging model 3d-unet where the samples per second is quite low, the best 4 energy efficient results are by CTuning. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d15297fb-3eff-47c9-b188-68d438b7f248) + +For the Language Processing model bert-99, gloria highend system from Qualcomm tops the energy efficiency metric and CTuning's Nvidia Jetson Orin AGX is at second place. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/22c85404-51f5-44b7-b128-8df4579c635c) + + + +## Benchmarking Rigel Supercomputer + +Rigel Edge Supercomputer from OneStopSytems wins the peak performance for all four submitted models and comfortably beats the second-place system. It also wins the best latency for ResNet50 MultiStream scenario. + + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/635f5f29-080f-4c7c-85a5-65fcf438f9e1) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c993c2f5-a8b7-4a11-b89f-35d96e357e42) + + + + + +## Benchmarking MLPerf Inference Reference Implementations + +We compared the performance of the reference implementation with that of the Nvidia optimized implementation by running both implementations on an Nvidia RTX 4090 GPU. Reference implementation uses fp32 models whereas Nvidia implementation uses quantized models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/b46bc509-f242-4bc6-a9e8-ec318d09616b) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/404b54d2-a04e-4e5e-861d-43c7d940faf8) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/f5a04e85-269f-485a-8839-348dddcd5eb7) + +## Showcasing Apple Metal Performance + +We benchmarked the performance of Apple metal using Tensorflow-metal. The below graphs show the performance benefit of running inference on Apple meta using tensorflow-metal versus onnxruntime running only on CPUs. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/87385e24-b3b5-4694-8106-2c30eeb393de) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c9a38dc9-0986-461e-b81d-988297e1771e) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/4b8565b4-7a23-4f29-b450-6eaf00d10f63) + + + + + +## Design Space Exploration For NeuralMagic Deepsparse Library + +Using CM experiment automation we did a design space exploration to find the optimal batch size for the bert-99 compatible sparse models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/a18088f2-c864-4c16-b714-5b375cf5fc94) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8bd95c5f-344f-4d9f-9f94-c3024efbce13) + + +## Comparing the performance of Modular MLPerf Inference C++ implementations + +Here we compare the performance of MIL Library used by CTuning and the KILT library used by KRAI both on CPUs and GPUs. This is not an apple-to-apple comparison as KILT used Nvidia Nvidia A1000 GPU and MIL was run on Nvidia RTX 4090 GPU. For CPUs, KILT was run on a [24-core Dell server](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/Krai/systems/7920t-kilt-onnxruntime_cpu.json) with peak frequency of 4000 MHz whereas MIL was run on a [16 core PCSPECIALIST custom workstation](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/CTuning/systems/amd_ryzen_workstation-cpp-cpu-onnxruntime-vdefault-default_config.json) with peak frequency of 5900 MHz. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d73360a-27ab-4158-b4cc-a5724d6d4c73) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d6b5516b-4861-4355-badf-65decbf8d3b0) + diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json new file mode 100644 index 0000000000..4860af17b2 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json @@ -0,0 +1,16 @@ +{ + "alias": "mlperf-inference-v3.1-analysis-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date":"20230917", + "title":"cTuning's analysis of MLPerf inference v3.1 community results", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "uid": "ebc483653dbc45b6" +} diff --git a/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..99d0370a50 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about making MLPerf inference accessible to everyone", + "uid": "85ff4a6ac203411e" +} diff --git a/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json new file mode 100644 index 0000000000..159a986735 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-hpcwire", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "redirect": "https://www.hpcwire.com/2023/09/13/mlperf-releases-latest-inference-results-and-new-storage-benchmark", + "title": "HPCWire about MLPerf inference v3.1 and storage results (with cTuning/cKnowledge coverage)", + "uid": "50960565640142d6" +} diff --git a/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..15c3fa6c42 --- /dev/null +++ b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v4.0-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-cm-mlperf-automation-helps-benchmark-commodity-hardware-fursin-61noe", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v4.0", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about a new version of the CM workflow to automate MLPerf", + "uid": "acc35b8e9ed14c98" +} diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/README.md b/script/reproduce-ieee-acm-micro2023-paper-22/README.md new file mode 100644 index 0000000000..6b86e491da --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/README.md @@ -0,0 +1,42 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/UofT-EcoSystem/Grape-MICRO56-Artifact/wiki#installation + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Install dependencies + +```bash +cmr "reproduce project micro-2023 22 _install_deps" +cmr "reproduce project micro-2023 22 _install_deps_driver" +cmr "reproduce project micro-2023 22 _install_deps_cuda" +cmr "reproduce project micro-2023 22 _install_deps_pytorch" +cmr "reproduce project micro-2023 22 _install_deps_transformers" +``` + +Please reboot the machine after the above installation steps for the GPU driver installation to take effect. This can be verified from the message `NVRM: loading customized kernel module from Grape` when running the command `sudo dmesg`. If the message does not appear, please repeat the command + +```bash +cmr "reproduce project micro-2023 22 _install_deps_driver" +``` + +### Run experiments + +```bash +cmr "reproduce project micro-2023 22 _run_figure13" +cmr "reproduce project micro-2023 22 _run_figure11" +cmr "reproduce project micro-2023 22 _run_figure12" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml new file mode 100644 index 0000000000..8f309ca885 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml @@ -0,0 +1,45 @@ +alias: reproduce-ieee-acm-micro2023-paper-22 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/UofT-EcoSystem/Grape-MICRO56-Artifact + env: + CM_GIT_ENV_KEY: 'GRAPE_MICRO56' + extra_cache_tags: micro56,artifact,ae,grape +script_name: run +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '22' +uid: e26c9ce3e7b84526 +variations: + install_deps: + script_name: install_deps + install_deps_driver: + script_name: install_deps_driver + install_deps_cuda: + script_name: install_deps_cuda + install_deps_pytorch: + script_name: install_deps_pytorch + install_deps_transformers: + script_name: install_deps_transformers + run: + script_name: run + run_figure11: + script_name: run_figure11 + run_figure12: + script_name: run_figure12 + run_figure13: + script_name: run_figure13 diff --git a/script/reproduce-micro-paper-2023-victima/customize.py b/script/reproduce-ieee-acm-micro2023-paper-22/customize.py similarity index 100% rename from script/reproduce-micro-paper-2023-victima/customize.py rename to script/reproduce-ieee-acm-micro2023-paper-22/customize.py diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh new file mode 100644 index 0000000000..c9d37d0ba6 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/0-install_build_essentials.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh new file mode 100644 index 0000000000..f3a345ec90 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/2-install_CUDA.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh new file mode 100644 index 0000000000..3e6d33783c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/1-install_NVIDIA_GPU_driver.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh new file mode 100644 index 0000000000..f961aaa009 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/3-build_PyTorch.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh new file mode 100644 index 0000000000..effe47e975 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +echo "git submodule update --init submodules/transformers" +git submodule update --init submodules/transformers + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run.sh new file mode 100644 index 0000000000..6b50d1b811 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure13.sh +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure11.sh +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure12.sh + + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh new file mode 100644 index 0000000000..bf2c7b0fcf --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=gpt2 +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=gptj +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=wav2vec2 + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh new file mode 100644 index 0000000000..1d9ea80270 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/3-test_runtime_breakdown.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh new file mode 100644 index 0000000000..6d2f05bf3c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/1-test_metadata_compression.sh + + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/README.md b/script/reproduce-ieee-acm-micro2023-paper-28/README.md new file mode 100644 index 0000000000..c0b235ba21 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/README.md @@ -0,0 +1,61 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/neel-patel-1/XFM_MICRO2023.git + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Regenerate Figures via CM interface + +1) Install deps: +```bash +cmr "reproduce project micro-2023 xfm _install_deps" +``` + +2) Run experiments: + +```bash +cmr "reproduce project micro-2023 xfm _run" +``` + +3) Plot results: + +```bash +cmr "reproduce project micro-2023 xfm _plot" +``` + +You should find `XFM_Access_Distribution.png` and `results.csv` in the `results` folder current directory. + +### Regenerate SPEC Workloads Experiments via CM Interface + +* if hosted SPEC 2017 for artifact evaluation purposes is no longer available, provide path to a local install of SPEC: + +1) (Optional) Provide path to local SPEC2017 .iso file +```bash +# if local spec is available, run below to avoid fetching remote SPEC, otherwise skip this step +cmr "download file _url.https://spec2017iso.s3.us-east-2.amazonaws.com/cpu2017-1_0_5.iso" --local_path=/path/to/local/cpu2017-1_0_5.iso +``` + +1) Install deps: +```bash +cmr "reproduce project micro-2023 xfm _install_spec_deps" +``` + +2) run: +```bash +cmr "reproduce project micro-2023 xfm _run_spec" +``` + +You should find `results.txt` in the `results` folder of current directory. \ No newline at end of file diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml new file mode 100644 index 0000000000..e2ed10c86c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml @@ -0,0 +1,40 @@ +alias: reproduce-ieee-acm-micro2023-paper-28 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,generic-python-lib,_pandas +- tags: get,generic-python-lib,_matplotlib +- tags: get,git,repo,_repo.https://github.com/neel-patel-1/XFM_MICRO2023 +- tags: download,file,url.https://spec2017iso.s3.us-east-2.amazonaws.com/cpu2017-1_0_5.iso + env: + CM_GIT_ENV_KEY: 'XFM' + extra_cache_tags: micro23,artifact,ae,xfm,spec +  force_cache: true +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- 28 +- xfm +uid: 72c44b58be0e4e16 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run + install_spec_deps: + script_name: install_spec_deps.sh + run_spec: + script_name: run_spec.sh diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/customize.py b/script/reproduce-ieee-acm-micro2023-paper-28/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh new file mode 100644 index 0000000000..aba23e8d48 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +# Done via _cm.yaml +#${CM_PYTHON_BIN_WITH_PATH} -m pip install pandas +#${CM_PYTHON_BIN_WITH_PATH} -m pip install matplotlib + +git submodule update --init --recursive . +test $? -eq 0 || exit 1 + +cd memory_channel_interleave_ratios +test $? -eq 0 || exit 1 + +./build_gzip.sh +test $? -eq 0 || exit 1 + +./fetch_corpus.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh new file mode 100644 index 0000000000..46488b66be --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +CUR_DIR=${PWD} +SPEC_EXP_ROOT=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment +SPEC_INSTALL=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/spec +SPEC_MNT=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/spec_mnt + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" +echo "SPEC ISO PATH:${SPEC_ISO}" +echo "Installing to ${SPEC_INSTALL}" + +mkdir -p ${SPEC_MNT} +test $? -eq 0 || exit 1 + +mkdir -p ${SPEC_INSTALL} +test $? -eq 0 || exit 1 + +sudo mount -t iso9660 -o ro,exec,loop /path/to/cpu2017-1_0_5.iso ${CUR_DIR}/spec_mnt +test $? -eq 0 || exit 1 + +cd ${SPEC_MNT} +./install.sh -d ${SPEC_INSTALL} +test $? -eq 0 || exit 1 + +cp ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/config/default.cfg ${SPEC_INSTALL}/config +test $? -eq 0 || exit 1 + +cd ${SPEC_EXP_ROOT} +./fetch_corpus.sh +test $? -eq 0 || exit 1 +cd lzbench +make -j BUILD_STATIC=1 +test $? -eq 0 || exit 1 \ No newline at end of file diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh new file mode 100644 index 0000000000..c79e247206 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +cd xfm_access_model + +${CM_PYTHON_BIN_WITH_PATH} xfm_access_model.py +test $? -eq 0 || exit 1 + +mkdir -p ${CUR_DIR}/results/XFM_Access_Results + +cp XFM_Access_Distribution.png ${CUR_DIR}/results/XFM_Access_Results diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/run.sh b/script/reproduce-ieee-acm-micro2023-paper-28/run.sh new file mode 100644 index 0000000000..49ca2bc6ff --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +cd memory_channel_interleave_ratios + +./run.sh +test $? -eq 0 || exit 1 + +mkdir -p ${CUR_DIR}/results/memory_channel_interleave_ratios +test $? -eq 0 || exit 1 + +cp results.csv ${CUR_DIR}/results/memory_channel_interleave_ratios +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh b/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh new file mode 100644 index 0000000000..5de27e2325 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo's SPEC2017 Directory: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment + +./run.sh +test $? -eq 0 || exit 1 + +echo "" + +mkdir -p ${CUR_DIR}/results/spec +test $? -eq 0 || exit 1 + +./parse.sh | tee ${CUR_DIR}/results/spec/results.txt +test $? -eq 0 || exit 1 + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/README.md b/script/reproduce-ieee-acm-micro2023-paper-33/README.md new file mode 100644 index 0000000000..42d9809e9b --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/README.md @@ -0,0 +1,74 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/filipmazurek/spa-artifact + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Set up and start Docker container + +```bash +cmr "reproduce project m 2023 33 _install_deps" +``` + +You should be within the Docker container now. + +The next step is not yet fully automated by CM and you need to do it manually to set up Conda environment: + +### Set up Conda + +```bash +cd /shared/ +bash ./in-docker-bash-scripts/set-up-conda.sh + +# Use conda with the bash shell +eval "$(/root/miniconda3/bin/conda shell.bash hook)" + +conda activate spa +``` + +### Install CM inside Conda to continue using CM interface + +```bash +python3 -m pip install cmind +cm pull repo mlcommons@ck +cm pull repo ctuning@cm4research +``` + +### Download Ubuntu Image and Kernel + +```bash +cmr "reproduce project m 2023 33 _install_deps_kernel" +``` + +### Copy gem5 PARSEC Binaries + +```bash +cmr "reproduce project m 2023 33 _install_deps_gem5" +``` + +### Run experiments Using gem5 + +```bash +cmr "reproduce project m 2023 33 _run" +``` + +### Collect data and reproduce results + +```bash +cmr "reproduce project m 2023 33 _plot" +``` + +All figures should be available in `/shared/paper-figures/`. diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml new file mode 100644 index 0000000000..4db4f45391 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml @@ -0,0 +1,48 @@ +alias: reproduce-ieee-acm-micro2023-paper-33 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/filipmazurek/spa-artifact + env: + CM_GIT_ENV_KEY: 'SPA_ARTIFACT' + extra_cache_tags: micro23,artifact,ae,spa_artifact + skip_if_env: + CM_RUN_INSIDE_DOCKER: + - yes +script_name: run +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '33' +uid: 5dad99d41c0b422b +variations: + install_deps: + script_name: install_deps + install_deps_kernel: + script_name: install_deps_kernel + env: + CM_RUN_INSIDE_DOCKER: yes + install_deps_gem5: + script_name: install_deps_gem5 + env: + CM_RUN_INSIDE_DOCKER: yes + plot: + script_name: plot + env: + CM_RUN_INSIDE_DOCKER: yes + run: + script_name: run + env: + CM_RUN_INSIDE_DOCKER: yes diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/customize.py b/script/reproduce-ieee-acm-micro2023-paper-33/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh new file mode 100644 index 0000000000..1fa6f8b86a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to SPAM repo: ${CM_GIT_REPO_SPA_ARTIFACT_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_SPA_ARTIFACT_CHECKOUT_PATH} + +echo "" + +bash ./artifact-bash-scripts/set-up-docker.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh new file mode 100644 index 0000000000..667f6a7683 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +cd /shared/ +bash ./in-docker-bash-scripts/copy-parsec-binaries.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh new file mode 100644 index 0000000000..973589a921 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +cd /shared/ +bash ./in-docker-bash-scripts/download-disk.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh new file mode 100644 index 0000000000..89c33b4856 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" + +cd /shared/python-runners/ +${CM_PYTHON_BIN_WITH_PATH} convert-gem5-results-to-csv.py + +test $? -eq 0 || exit 1 + +cd /shared/paper-figures/ + +${CM_PYTHON_BIN_WITH_PATH} figure-1.py +${CM_PYTHON_BIN_WITH_PATH} figure-2.py +${CM_PYTHON_BIN_WITH_PATH} figure-4.py +${CM_PYTHON_BIN_WITH_PATH} figure-5.py +${CM_PYTHON_BIN_WITH_PATH} figure-6_7.py +${CM_PYTHON_BIN_WITH_PATH} figure-8_9.py +${CM_PYTHON_BIN_WITH_PATH} figure-10_11.py +${CM_PYTHON_BIN_WITH_PATH} figure-12.py +${CM_PYTHON_BIN_WITH_PATH} figure-13.py + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/run.sh b/script/reproduce-ieee-acm-micro2023-paper-33/run.sh new file mode 100644 index 0000000000..8e17e45444 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" + +cd /shared/python-runners/ + +chmod 777 /shared/gem5/build/X86/gem5-mesi.fast +${CM_PYTHON_BIN_WITH_PATH} meta-runner.py + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/README.md b/script/reproduce-ieee-acm-micro2023-paper-38/README.md new file mode 100644 index 0000000000..34ea8ce602 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/README.md @@ -0,0 +1,50 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/HieronZhang/G10-Artifact + + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +### Run G10 via CM interface + +Perform the following steps to evaluate G10 Artifact with MLCommons CM automation language: + +1) This command will install all the dependencies for G10 and requires sudo: + +```bash +cmr "reproduce project micro-2023 G10 _install_deps" +``` + +2) This command will prepare and run all experiments: + +```bash +cmr "reproduce project micro-2023 G10 _run" --max_process_num=[nthreads] +``` + +- The variable `max_process_num` is the maximum allowed number of parallel experiments in the script. Note that user need to specify the `max_process_num` based on their machine's main memory capacity. Each experiment process will need a peak memory of 28.5 GB. (We recommend reserving 30 GB for each process to ensure that the program won't crash. For example, if your machine has 128 GB of main memory, `max_process_num` can be set as 4). + +3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the paper: + +```bash +cmr "reproduce project micro-2023 G10 _plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml new file mode 100644 index 0000000000..a7de67b4e0 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml @@ -0,0 +1,36 @@ +alias: reproduce-ieee-acm-micro2023-paper-38 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +default_env: + max_process_num: 1 +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/HieronZhang/G10-Artifact.git + env: + CM_GIT_ENV_KEY: 'G10' + extra_cache_tags: micro23,artifact,ae,G10 +input_mapping: + max_process_num: max_process_num +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- g10 +- G10 +uid: b6ec80696a364ff4 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat new file mode 100644 index 0000000000..47f7e7ce26 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat @@ -0,0 +1,18 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +if exist "%CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt" ( + + echo. + echo Installing requirements.txt ... + echo. + + %CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh new file mode 100644 index 0000000000..02b1446fca --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +sudo apt-get update +sudo apt install flex bison tmux python3-pip + +${CM_PYTHON_BIN_WITH_PATH} -m pip install matplotlib networkx pandas PyPDF2 diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat b/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat new file mode 100644 index 0000000000..7e786771ae --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +rem echo. +rem %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +rem IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh new file mode 100644 index 0000000000..6058cb5a32 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to G10 repo: ${CM_GIT_REPO_G10_CHECKOUT_PATH}" +cd "${CM_GIT_REPO_G10_CHECKOUT_PATH}" + +cd src/resources + +# Collect all the numbers, store it in raw_output/data.json +${CM_PYTHON_BIN_WITH_PATH} gatherKernelInfo.py + +# Gather data for figure 11 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepOverallPerformance.py # The gathered data is stored in figure_drawing/overall_performance + +# Gather data for figure 12 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepBreakdown.py # The gathered data is stored in figure_drawing/overall_breakdown + +# Gather data for figure 13 +./figureDrawingDataPrepKernelCDF.sh # The gathered data is stored in figure_drawing/overall_slowdown_cdf + +# Gather data for figure 14 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepTraffic.py # The gathered data is stored in figure_drawing/overall_traffic + +# Gather data for figure 15 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrep.py # The gathered data is stored in figure_drawing/overall_batchsize + +# Gather data for figure 16 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepCPUsensitivity.py # The gathered data is stored in figure_drawing/sensitivity_cpumem + +# Gather data for figure 17 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepCPUSensitivityCombined.py # The gathered data is stored in figure_drawing/sensitivity_cpumem_combined + +# Gather data for figure 18 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepSSD.py # The gathered data is stored in figure_drawing/sensitivity_ssdbw + +# Gather data for figure 19 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepVariation.py # The gathered data is stored in figure_drawing/sensitivity_variation + +cd figure_drawing + +# Plot figures for Figure 2-4, and Figure 20-21 (Appendix) + +${CM_PYTHON_BIN_WITH_PATH} plot_mem_consumption.py # Figure 2 is output/dnn_memconsumption.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_tensor_time_cdf.py # Figure 3 is output/tensor_time_cdf.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_tensor_period_distribution.py # Figure 4 is output/tensor_periods_distribution.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_detail_mem_breakdown_live.py # Figure 20 is output/dnn_mem_consumption_breakdown_live.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_detail_mem_breakdown_active.py # Figure 21 is output/dnn_mem_consumption_breakdown_active.pdf + +# Draw Figure 11 +${CM_PYTHON_BIN_WITH_PATH} overallPerf.py # Figure 11 is output/OverallPerfNew.pdf + +# Draw Figure 12 +${CM_PYTHON_BIN_WITH_PATH} overallBreakdown.py # Figure 12 is output/Breakdown.pdf + +# Draw Figure 13 +${CM_PYTHON_BIN_WITH_PATH} overallSlowdownCDF.py # Figure 13 is output/KernelTimeCDF.pdf + +# Draw Figure 14 +${CM_PYTHON_BIN_WITH_PATH} overallTraffic.py # Figure 14 is output/OverallTraffic.pdf + +# Draw Figure 15 +${CM_PYTHON_BIN_WITH_PATH} overallBatchSize.py # Figure 15 is output/OverallPerfBatchSize.pdf + +# Draw Figure 16 +${CM_PYTHON_BIN_WITH_PATH} sensitivityCPUMem.py # Figure 16 is output/OverallPerfCPUMem.pdf + +# Draw Figure 17 +${CM_PYTHON_BIN_WITH_PATH} sensitivityCPUMemCombined.py # Figure 17 is output/OverallPerfCPUMemCombined.pdf + +# Draw Figure 18 +${CM_PYTHON_BIN_WITH_PATH} sensitivitySSDbw.py # Figure 18 is output/OverallPerfSSDBW.pdf + +# Draw Figure 19 +${CM_PYTHON_BIN_WITH_PATH} SensitivityKernelVariation.py # Figure 19 is output/SensitivityVariation.pdf diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/run.bat b/script/reproduce-ieee-acm-micro2023-paper-38/run.bat new file mode 100644 index 0000000000..6c1274ce64 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/run.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +echo. +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/run.sh b/script/reproduce-ieee-acm-micro2023-paper-38/run.sh new file mode 100644 index 0000000000..6475bf30fd --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/run.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +if [ -z "$max_process_num" ]; then + printf "\033[0;31m<--max_process_num> is not specified. Please specify it using --max_process_num=[nthreads]\033[0m\n" + exit 1 +fi +echo "Max number of processes: ${max_process_num}" + +echo "Changing to G10 repo: ${CM_GIT_REPO_G10_CHECKOUT_PATH}" +cd "${CM_GIT_REPO_G10_CHECKOUT_PATH}" + +cd src +make clean +make -j"$(nproc)" + +cd resources +${CM_PYTHON_BIN_WITH_PATH} genconfigs.py + +tmux kill-server > /dev/null 2> /dev/null + +# First run experiments for figure 11-14 +./run.sh -p "(BERT\/256|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_(deepUM|prefetch_lru|FlashNeuron|G10GDSSSD|G10GDSFULL|lru)\.config" -dr -j $max_process_num +# The time for running this is about 104m33.975s (for max_process_num=6) + +# Then run experiments for figure 15 +./run.sh -p "(BERT\/(128|256|512|768|1024)|VIT\/(256|512|768|1024|1280)|Inceptionv3\/(512|768|1024|1280|1536|1792)|ResNet152\/(256|512|768|1024|1280)|SENet154\/(256|512|768|1024))-sim_(deepUM|prefetch_lru|FlashNeuron|lru)\.config" -dr -j $max_process_num +# The time for running this is about 155m11.104s (for max_process_num=6) + +# Then run experiments for figure 16 +./run.sh -p "(BERT\/(256|384|512|640)|VIT\/(768|1024|1280|1536)|Inceptionv3\/(512|1024|1280|1536)|ResNet152\/(768|1024|1280|1536)|SENet154\/(256|512|768|1024))-sim_prefetch_lru(-cpu(0|16|32|64|96|192|256))?\.config" -dr -j $max_process_num +# The time for running this is about 406m30.954s (for max_process_num=6) + +# Then run experiments for figure 17 +./run.sh -p "(VIT\/1024|Inceptionv3\/1280)-sim_(deepUM|prefetch_lru|FlashNeuron)-cpu(0|16|32|64|256)\.config" -dr -j $max_process_num +# The time for running this is about 24m8.144s (for max_process_num=6) + +# Then run experiments for figure 18 +./run.sh -p "(BERT\/512|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_(deepUM|prefetch_lru|FlashNeuron|lru)-ssd(6_4|12_8|19_2|25_6|32)-.*\.config" -dr -j $max_process_num +# The time for running this is about 354m40.747s (for max_process_num=6) + +# Then run experiments for figure 19 +./run.sh -p "(BERT\/256|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_prefetch_lru-var0_(05|10|15|20|25)\.config" -dr -j $max_process_num +# The time for running this is about 124m17.909s (for max_process_num=6)] diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/README.md b/script/reproduce-ieee-acm-micro2023-paper-5/README.md new file mode 100644 index 0000000000..637717712e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/README.md @@ -0,0 +1,30 @@ +# CM script to run and reproduce experiments + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +## Install dependencies + +```bash +cmr "reproduce paper m2023 5 _install_deps" +``` + +## Run and create graphs + +```bash +cmr "reproduce paper m2023 5" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml new file mode 100644 index 0000000000..65a520d013 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml @@ -0,0 +1,20 @@ +alias: reproduce-ieee-acm-micro2023-paper-5 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +tags: +- reproduce +- paper +- project +- micro +- micro-2023 +- m2023 +- '5' +uid: e3a42d0dc64b4f8f +variations: + install_deps: + script_name: install_deps + run: + script_name: run +versions: {} diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/customize.py b/script/reproduce-ieee-acm-micro2023-paper-5/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat new file mode 100644 index 0000000000..834ec600df --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat @@ -0,0 +1,4 @@ +rem native script + +echo "Windows is not supported yet" +exit /b 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh new file mode 100644 index 0000000000..322d4671b9 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh @@ -0,0 +1,24 @@ +echo "================== Install Docker container (you can skip if already installed)==================" + +sudo apt-get update +sudo apt-get -y install \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + tar + +# Add Docker’s official GPG key +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt-get update +sudo apt-get -y install docker-ce docker-ce-cli containerd.io + +sudo usermod -aG docker $USER + +su - $USER diff --git a/script/reproduce-micro-paper-2023-victima/main.py b/script/reproduce-ieee-acm-micro2023-paper-5/main.py similarity index 100% rename from script/reproduce-micro-paper-2023-victima/main.py rename to script/reproduce-ieee-acm-micro2023-paper-5/main.py diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/run.bat b/script/reproduce-ieee-acm-micro2023-paper-5/run.bat new file mode 100644 index 0000000000..834ec600df --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/run.bat @@ -0,0 +1,4 @@ +rem native script + +echo "Windows is not supported yet" +exit /b 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/run.sh b/script/reproduce-ieee-acm-micro2023-paper-5/run.sh new file mode 100644 index 0000000000..071e755eb0 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/run.sh @@ -0,0 +1,41 @@ +echo "====================== Atifacts Evaluation for MICRO23 paper==========================" +echo "Sparse-DySta: Sparsity-Aware Dynamic and Static Scheduling for Sparse Multi-DNN Workloads" +container="docker" + +echo "================== Run a container test to make sure container works ==================" +${container} run docker.io/hello-world + +echo "=====================================================================================" + +echo "================== Pulling the Docker image to run the experiments ==================" +${container} pull hxfan/spar-dysta-micro23:ae + +echo "================== Creating Container to run the experiments ==================" +sudo ${container} run -it -d --name spar-dysta --gpus all hxfan/spar-dysta-micro23:ae /bin/bash # Create container + + +echo "================== Generate Figure-12, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/dysta_comparison_sanger_tradeoff_analysis.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Tradeoff_slo10.0.pdf . +echo "================== Generate Figure-12, CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/dysta_comparison_eyerissv2_tradeoff_analysis.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Tradeoff_slo10.0.pdf . + +echo "================== Generate Figure-13, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/effect_sparsity_sanger.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Sparsity_Effect30_sample1000_across_slo10.0_prema.pdf . +echo "================== Generate Figure-13 CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/effect_sparsity_eyerissv2.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Sparsity_Effect3_sample1000_across_slo10.0_prema.pdf . + + +echo "================== Generate Table5 & Figure-14, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/dysta_comparison_sanger_across_slo.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Metrics_rate30_sample1000_across_slo.pdf . +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Metrics_rate40_sample1000_across_slo.pdf . +echo "================== Generate Table5 & Figure-14 CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/dysta_comparison_eyerissv2_across_slo.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Metrics_rate3_sample1000_across_slo.pdf . +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Metrics_rate4_sample1000_across_slo.pdf . + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore b/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore new file mode 100644 index 0000000000..1377554ebe --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/README.md b/script/reproduce-ieee-acm-micro2023-paper-8/README.md new file mode 100644 index 0000000000..c0f9d185c6 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/README.md @@ -0,0 +1,74 @@ +# CM script to run and reproduce experiments + +Original repository: [https://github.com/FPSG-UIUC/micro23-teaal-artifact](https://github.com/FPSG-UIUC/micro23-teaal-artifact) + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +## Run TeAAL via the CM interface + +To install dependencies, run: + +```bash +cmr "reproduce paper m 2023 8 _install_deps" +``` + +Note that the install script makes its best guess for the correct UID and GID +for the container to be using (the current user's UID and GID). If you would +like to change the UID and/or GID of the container, you can do so in the +artifact repository `/path/to//repo/docker-compose.yaml`. +Instructions for finding this repository are below. + +To check that the environment is correctly set up and evaluate each accelerator +configuration on a small example, run: + +```bash +cmr "reproduce paper m 2023 8 _check" +``` + +To run the real experiments, run: + +```bash +cmr "reproduce paper m 2023 8 _run" +``` + +To plot the results of the real experiments, run +```bash +cmr "reproduce paper m 2023 8 _plot" +``` + +The plots will be stored in the artifact repository at `/path/to//repo/data/plots`. Instructions for finding this repository are below. + +To plot pregenerated results (e.g., if you don't want to run the experiments +yourself), run: + +```bash +cmr "reproduce paper m 2023 8 _plot_pregenerated" +``` + +### Finding the Artifact Repository + +You can also find this directory via CM as follows: +```bash +cm show cache --tags=git,artifact,fpsg,teaal +``` +or +```bash +cm find cache --tags=git,artifact,fpsg,teaal +``` + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml new file mode 100644 index 0000000000..79cdc1fa33 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml @@ -0,0 +1,40 @@ +alias: reproduce-ieee-acm-micro2023-paper-8 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +default_env: + CM_EXPERIMENT: '1' +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/FPSG-UIUC/micro23-teaal-artifact + env: + CM_GIT_ENV_KEY: 'FPSG_UIUC_TEAAL' + extra_cache_tags: artifact,fpsg,uiuc,teaal +input_mapping: + experiment: CM_EXPERIMENT +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '8' +uid: 1f15f5f53c6d469a +variations: + install_deps: + script_name: install_deps + check: + script_name: check + run: + script_name: run + plot: + script_name: plot + plot_pregenerated: + script_name: plot_pregenerated diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/check.sh b/script/reproduce-ieee-acm-micro2023-paper-8/check.sh new file mode 100644 index 0000000000..edec77ffe1 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/check.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/check.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/customize.py b/script/reproduce-ieee-acm-micro2023-paper-8/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat new file mode 100644 index 0000000000..47f7e7ce26 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat @@ -0,0 +1,18 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +if exist "%CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt" ( + + echo. + echo Installing requirements.txt ... + echo. + + %CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh new file mode 100644 index 0000000000..15c20da89d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +# We install python venv via CM and cache it inside CM cache +# Path to python from venv will be in ${CM_PYTHON_BIN_WITH_PATH} +#python3 -m venv env +#source env/bin/activate + +${CM_PYTHON_BIN_WITH_PATH} -m pip install -r scripts/cm-requirements.txt + +cd scripts + +${CM_PYTHON_BIN_WITH_PATH} install_deps.py + +docker-compose > /dev/null 2> /dev/null +if [ $? -ne 0 ] +then + sh install_docker.sh +fi + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/main.py b/script/reproduce-ieee-acm-micro2023-paper-8/main.py new file mode 100644 index 0000000000..d851f1450f --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/main.py @@ -0,0 +1,10 @@ +import os + +if __name__ == "__main__": + + print ('') + print ('Main script:') + print ('Experiment: {}'.format(os.environ.get('CM_EXPERIMENT',''))) + print ('') + + exit(0) diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat b/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat new file mode 100644 index 0000000000..7e786771ae --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +rem echo. +rem %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +rem IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh new file mode 100644 index 0000000000..8c11c44a29 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" +echo "ENV CM_EXPERIMENT: ${CM_EXPERIMENT}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" + +docker-compose run cl scripts/plot.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh b/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh new file mode 100644 index 0000000000..9980e7ea43 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/plot_pregenerated.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/run.bat b/script/reproduce-ieee-acm-micro2023-paper-8/run.bat new file mode 100644 index 0000000000..6c1274ce64 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/run.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +echo. +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/run.sh b/script/reproduce-ieee-acm-micro2023-paper-8/run.sh new file mode 100644 index 0000000000..b2c7c1e3c8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/run.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile b/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile new file mode 100644 index 0000000000..62c2dcdae5 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile @@ -0,0 +1,28 @@ +#Bootstrap: docker +From ubuntu:20.04 + +#%post + RUN mkdir /root/artifact_evaluation + RUN apt-get -y clean + RUN apt-get -y update + RUN apt-get -y install python3 build-essential + RUN apt-get -y install git + RUN apt-get -y install vim pip + RUN pip install numpy + WORKDIR /root/artifact_evaluation + RUN git clone https://github.com/lchangxii/sampled-mgpu-sim.git + RUN git clone https://github.com/lchangxii/akita.git + RUN git clone https://github.com/lchangxii/dnn.git + RUN apt-get -y install wget + RUN wget https://go.dev/dl/go1.20.1.linux-amd64.tar.gz + RUN tar -xvzf go1.20.1.linux-amd64.tar.gz + ENV PATH="/root/artifact_evaluation/go/bin:$PATH" + ENV HOME /root + RUN git clone https://github.com/lchangxii/micro2023_figures.git + RUN pip install pandas + RUN pip install matplotlib + RUN pip install openpyxl +#%environment +#export PATH=/opt/riscv/:$PATH + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/README.md b/script/reproduce-ieee-acm-micro2023-paper-85/README.md new file mode 100644 index 0000000000..05954766f3 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/README.md @@ -0,0 +1,40 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/lchangxii/photon + + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Run Photon via CM interface + +Perform the following steps to evaluate Photon with MLCommons CM automation language: + +1) This command will install system dependencies for Docker and require sudo (skip it if you have Docker installed): +```bash +cmr "reproduce project m 2023 photon _install_deps" +``` + +2) This command will prepare and run all experiments via Docker: + +```bash +cmr "reproduce project m 2023 photon _run" +``` + +3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the article: + +```bash +cmr "reproduce project m 2023 photon _plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml new file mode 100644 index 0000000000..392e396b7e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml @@ -0,0 +1,30 @@ +alias: reproduce-ieee-acm-micro2023-paper-85 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- m +- '2023' +- '85' +- photon +uid: 9e0b8254b62c4349 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/customize.py b/script/reproduce-ieee-acm-micro2023-paper-85/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh new file mode 100644 index 0000000000..04998192fd --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + +container="docker" + + +if [ "${container}" = "docker" ]; then + + echo "================== Install Docker container (you can skip if already installed)==================" + + sudo apt-get update + sudo apt-get -y install \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + tar + + # Add Docker’s official GPG key + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get -y install docker-ce docker-ce-cli containerd.io + + sudo usermod -aG docker $USER + + su - $USER + +else + +echo "================== Install Podman container (you can skip if already installed)==================" + +sudo apt-get update +sudo apt-get -y install podman +su - $USER + +fi diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh new file mode 100644 index 0000000000..b3c8f18d1e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + + + + + + +print_colorful_text() { + local text="$1" + local color_code="$2" + echo "\e[${color_code}m${text}\e[0m" +} + +container="docker" +image="micro2023-photon" + +echo "================== Run a container test to make sure container works ==================" + +#${container} run docker.io/hello-world + + +echo "================== Build the Docker image to run the experiments ==================" + +#${container} build -t ${image} -f "${CM_TMP_CURRENT_SCRIPT_PATH}/Dockerfile" . + +echo "================== Get All Results ==================" + +mkdir figures +##get all benchmarks +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -check;cd /root/artifact_evaluation/micro2023_figures/r9nano;./r9nano.py;./r9nanolevels.py;mv *.png /root/figures/;mv *.pdf /root/figures/" + +##get all benchmarks with architecture mi100 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -arch=mi100 -check;cd /root/artifact_evaluation/micro2023_figures/mi100;./mi100.py;mv *.pdf /root/figures/;mv *.png /root/figures" +# +###vgg16 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg16 -check;cd /root/artifact_evaluation/micro2023_figures/vgg16;./vgg16.py;./vgg16speedup.py;mv *.pdf /root/figures/;mv *.png /root/figures" +###vgg19 +echo "Benchmarks MGPUSim-Simtime MGPUSim-Walltime Photon-Simtime Photon-Walltime" +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg19 -check |grep Sum |awk -F Sum '{ printf \"vgg19\";print \$2}' " +###resnet18 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet18 -check |grep Sum|awk -F Sum '{printf \"resnet18\";print \$2}'" +####resnet32 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet32 -check |grep Sum|awk -F Sum '{printf \"resnet32\";print \$2}'" +####resnet50 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet50 -check|grep Sum |awk -F Sum '{printf \"resnet50\";print \$2}'" +####resnet101 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet101 -check|grep Sum|awk -F Sum '{printf \"resnet101\";print \$2}'" +####resnet152 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet152 -check|grep Sum|awk -F Sum '{printf \"resnet152\";print \$2}'" +## +#### +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testpagerank.py -check|grep pagerank|grep -v __pagerank" diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/run.sh b/script/reproduce-ieee-acm-micro2023-paper-85/run.sh new file mode 100644 index 0000000000..885b63322a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/run.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + +print_colorful_text() { + local text="$1" + local color_code="$2" + echo "\e[${color_code}m${text}\e[0m" +} + +container="docker" +image="micro2023-photon" + +echo "================== Run a container test to make sure container works ==================" + +${container} run docker.io/hello-world + + +echo "================== Build the Docker image to run the experiments ==================" + +${container} build -t ${image} -f "${CM_TMP_CURRENT_SCRIPT_PATH}/Dockerfile" . + +echo "================== Execute all benchmarks ==================" +mkdir gpudata +##run all benchmarks +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py" + +##run all benchmarks with architecture mi100 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -arch=mi100" + +##vgg16 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg16" +##vgg19 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg19" +##resnet18 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet18" +##resnet32 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet32" +##resnet50 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet50" +##resnet101 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet101" +##resnet152 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet152" +##pagerank +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testpagerank.py" + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh new file mode 100644 index 0000000000..1cb9d45d60 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd onikiri2/project/gcc/ +make -j$(nproc) +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh new file mode 100644 index 0000000000..0a6d2af25e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd raytracing.github.io/build_micro2023_ae/ +sed s@~@../../../../ClockhandsEvaluation/A-riscv@ -i common.mk +make +cd ../../ +cp raytracing.github.io/build_micro2023_ae/InOneWeekend/a.out onikiri2/benchmark/RayTracing/riscv64/bin/InOneWeekend +cp raytracing.github.io/build_micro2023_ae/TheNextWeek/a.out onikiri2/benchmark/RayTracing/riscv64/bin/TheNextWeek +cp raytracing.github.io/build_micro2023_ae/TheRestOfYourLife/a.out onikiri2/benchmark/RayTracing/riscv64/bin/TheRestOfYourLife diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh new file mode 100644 index 0000000000..f4b7c0d2ff --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd onikiri2/tool/AutoRunTools/ +sed s@/path/to@$(realpath ../../../)@ -i cfg.xml + +# You can change this! +GigaInsns=1 + +echo "Register lifetimes experiment for $GigaInsns giga instructions." +echo "It will take $(echo $GigaInsns \* 4 | bc) minutes." +echo "You can change the number of instructions to evaluate by modifying $BASH_SOURCE" +sed '115 s@".*"@"'"$GigaInsns"'G"@' -i cfg.xml + +perl enqueue.pl -t +cd result/001/sh/exec/ +for i in *.sh; do sh $i & PID="$PID $!"; done +wait $PID +cd ../../../../ +perl summary.pl +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh new file mode 100644 index 0000000000..9f70db2ee1 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +sed '59,74d' -i onikiri2/tool/AutoRunTools/cfg.xml diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh new file mode 100644 index 0000000000..cf0ee26fab --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +echo "" +echo "Please go to $(pwd) and check ClockhandsPreliminaryExperiments*.xlsx ." +echo "The procedure of generating charts are described on them." diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/README.md b/script/reproduce-ieee-acm-micro2023-paper-87/README.md new file mode 100644 index 0000000000..787326bc82 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/README.md @@ -0,0 +1,49 @@ +# CM script to run and reproduce experiments + +Archived artifact: https://zenodo.org/record/8218698 + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). +Note that you need run the following command to install CM automation scripts: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install deps + +To install dependencies, run: + +```bash +cmr "reproduce paper micro-2023 clockhands _install_deps" +``` + +## Run + +```bash +cmr "reproduce paper micro-2023 clockhands _build_compiler" +cmr "reproduce paper micro-2023 clockhands _create_binary" +cmr "reproduce paper micro-2023 clockhands _build_onikiri" +cmr "reproduce paper micro-2023 clockhands _experiment_setup" +cmr "reproduce paper micro-2023 clockhands _experiment" +cmr "reproduce paper micro-2023 clockhands _Preliminary_build_onikiri" +cmr "reproduce paper micro-2023 clockhands _Preliminary_create_binary" +cmr "reproduce paper micro-2023 clockhands _Preliminary_experiment_setup" +cmr "reproduce paper micro-2023 clockhands _Preliminary_experiment" +``` + +## Plot + +To plot the results of the real experiments, run + +```bash +cmr "reproduce paper micro-2023 clockhands _plot" +cmr "reproduce paper micro-2023 clockhands _Preliminary_plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml new file mode 100644 index 0000000000..869258b3e8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml @@ -0,0 +1,55 @@ +alias: reproduce-ieee-acm-micro2023-paper-87 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: download-and-extract,_extract,_url.https://zenodo.org/record/8218698/files/Clockhands_Artifact_MICRO2023.tar?download=1 + env: + CM_DOWNLOAD_FINAL_ENV_NAME: CM_ARTIFACT_CLOCKHANDS + CM_EXTRACT_FINAL_ENV_NAME: CM_ARTIFACT_CLOCKHANDS_EXTRACTED +# CM_DOWNLOAD_CHECKSUM: + force_cache: true + extra_cache_tags: reproduce,paper,artifact,micro,clockhands +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '87' +- clockhands +- Clockhands +uid: bd56037bf32c4b71 +variations: + install_deps: + script_name: install_deps + build_compiler: + script_name: build_compiler + create_binary: + script_name: create_binary + build_onikiri: + script_name: build_onikiri + experiment_setup: + script_name: experiment_setup + experiment: + script_name: experiment + plot: + script_name: plot + Preliminary_build_onikiri: + script_name: Preliminary_build_onikiri + Preliminary_create_binary: + script_name: Preliminary_create_binary + Preliminary_experiment_setup: + script_name: Preliminary_experiment_setup + Preliminary_experiment: + script_name: Preliminary_experiment + Preliminary_plot: + script_name: Preliminary_plot diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh b/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh new file mode 100644 index 0000000000..4a43299a05 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + + +cd A-riscv/ + +git clone https://github.com/riscv-collab/riscv-gnu-toolchain +cd riscv-gnu-toolchain/ +git checkout 2022.01.17 +CFLAGS="-O2 -static" ./configure --prefix=$(realpath ../riscv_gcc111) --with-arch=rv64g +make linux -j$(nproc) +make -j$(nproc) +cd ../ + +cd musl/ +CC=../riscv_gcc111/bin/riscv64-unknown-linux-gnu-gcc CROSS_COMPILE=../riscv_gcc111/bin/riscv64-unknown-linux-gnu- ./configure --prefix=$(realpath ../musl-gcc) --target=riscv64 +make -j$(nproc) +make install +cd ../../ + +wget https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz +tar xf clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz +mv clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu- clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04 diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh b/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh new file mode 100644 index 0000000000..cb0de224b8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ +cd onikiri2/project/gcc/ +make -j$(nproc) +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh b/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh new file mode 100644 index 0000000000..aaf0ebb50d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +sed s@~@..@ -i A-riscv/stuff/make.inc +cd A-riscv/coremark/ +make +cd ../../ + +cd B-straight/toolchain/Test/coremark/ +make +cd ../../../../ + +cd C-clockhands/coremark/ +make +cd ../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh b/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh new file mode 100644 index 0000000000..669eaa0d2b --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cd evaluation/ +make -j$(nproc) diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh b/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh new file mode 100644 index 0000000000..c112258a9a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cp A-riscv/coremark/rvbin/coremark.rvbin evaluation/0.coremark +cp B-straight/toolchain/Test/coremark/stbin/coremark.stbin evaluation/0.coremark +cp C-clockhands/coremark/chbin/coremark.chbin evaluation/0.coremark +cp onikiri2/project/gcc/onikiri2/a.out evaluation/onikiri2 diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh new file mode 100644 index 0000000000..2a8c9c7162 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh @@ -0,0 +1,4 @@ +echo "Install dependencies to build riscv-gcc." +sudo apt install autoconf automake autotools-dev curl python3 python3-pip libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev ninja-build git cmake libglib2.0-dev +echo "Install dependencies to make figures." +sudo apt install gnuplot diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh new file mode 100644 index 0000000000..c6f2910a3e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cd evaluation/ + +grep ExecutedCycles way*/*.xml | grep -v way[^v]*chbin | sort -V | sed -e 's/\(way[0-9]*\)-.*coremark./\1 /g' -e 's/bin.xml.*"\(.*\)"/ \1/' | awk 'NR==1{a=$3}NR%3==1{printf($1)}{printf(" "a/$3)}NR%3==0{print""}' > PerformanceImprovement.dat +echo 'set terminal png; set out "PerformanceImprovement.png"; set style histogram clustered; plot [] [0:2] "PerformanceImprovement.dat" using 2:xtic(1) with histogram title "R", "PerformanceImprovement.dat" using 3 with histogram title "S", "PerformanceImprovement.dat" using 4 with histogram title "C";' | gnuplot + +grep Retirer -B3 way8-*/*.xml | grep NumOpCode | grep -v way[^v]*chbin | sed 'y/",/ /' | awk 'NR==1{for(i=3;i<37;++i){a+=$(i)}}{for(i=3;i<37;++i){$(i)/=a}}{print (NR==1?"R":NR==2?"S":"C"),$4+$5,$9,$7,$10+$20,$11+$21,$14+$15,$16+$17,$22+$23+$24+$25+$26+$27+$28+$29,$13,$33,$30+$31}' > InstructionBreakdown.dat +echo 'set terminal png; set out "InstructionBreakdown.png"; set style histogram rowstacked; set key invert; plot "InstructionBreakdown.dat" using 2:xtic(1) with histogram title "Call+Ret", "InstructionBreakdown.dat" using 3 with histogram title "Jump", "InstructionBreakdown.dat" using 4 with histogram title "CondBr", "InstructionBreakdown.dat" using 5 with histogram title "Load", "InstructionBreakdown.dat" using 6 with histogram title "Store", "InstructionBreakdown.dat" using 7 with histogram title "ALU", "InstructionBreakdown.dat" using 8 with histogram title "Mul+Div", "InstructionBreakdown.dat" using 9 with histogram title "FLOPs", "InstructionBreakdown.dat" using 10 with histogram title "Move", "InstructionBreakdown.dat" using 11 with histogram title "NOP", "InstructionBreakdown.dat" using 12 with histogram title "Others";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep 'Register.*Frequency' skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR!=1{for(s=t=u=v=i=0;i<16;++i){s+=$(2+i);t+=$(18+i);u+=$(34+i);v+=$(50+i)}print (NR==2?"Write":"Read"),s/insns,t/insns,u/insns,v/insns,(NR==2?(insns-s-t-u-v)/insns:0)}' > HandBreakdown.dat +echo 'set terminal png; set out "HandBreakdown.png"; set style histogram rowstacked; set key invert; plot "HandBreakdown.dat" using 2:xtic(1) with histogram title "s hand", "HandBreakdown.dat" using 3 with histogram title "t hand", "HandBreakdown.dat" using 4 with histogram title "u hand", "HandBreakdown.dat" using 5 with histogram title "v hand", "HandBreakdown.dat" using 6 with histogram title "no dst hand";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep LifetimeDistributionKey skip-result/*.chbin.xml) <(grep LifetimeDistributionCount skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR>2{sum=1e-300;for(i=699;i>1;--i){sum+=$(i);b[NR][i]=sum/insns}}END{for(i=2;i<700;++i){print a[i],b[3][i],b[4][i],b[5][i],b[6][i]}}' > LifetimeByHand.dat +echo 'set terminal png; set out "LifetimeByHand.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "LifetimeByHand.dat" using 1:2 with line title "v", "LifetimeByHand.dat" using 1:3 with line title "u", "LifetimeByHand.dat" using 1:4 with line title "t", "LifetimeByHand.dat" using 1:5 with line title "s";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.rvbin.xml) <(grep LifetimeDistributionKey skip-result/*.rvbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.rvbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-RV.dat +echo 'set terminal png; set out "Lifetime-RV.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-RV.dat" using 1:2 with line title "RV";' | gnuplot +cat <(grep SkippedInsns skip-result/*.stbin.xml) <(grep LifetimeDistributionKey skip-result/*.stbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.stbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-ST.dat +echo 'set terminal png; set out "Lifetime-ST.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-ST.dat" using 1:2 with line title "ST";' | gnuplot +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep LifetimeDistributionKey skip-result/*.chbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-CH.dat +echo 'set terminal png; set out "Lifetime-CH.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-CH.dat" using 1:2 with line title "CH";' | gnuplot + +echo "see $(pwd)/*.png!" diff --git a/script/reproduce-micro-paper-2023-victima/README-extra.md b/script/reproduce-ieee-acm-micro2023-paper-96/README.md similarity index 64% rename from script/reproduce-micro-paper-2023-victima/README-extra.md rename to script/reproduce-ieee-acm-micro2023-paper-96/README.md index b4c01e1338..68c190378a 100644 --- a/script/reproduce-micro-paper-2023-victima/README-extra.md +++ b/script/reproduce-ieee-acm-micro2023-paper-96/README.md @@ -10,35 +10,36 @@ Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/ma Install reusable MLCommons automations: ```bash -cm pull repo mlcommons@cm4mlops --checkout=dev +cm pull repo mlcommons@ck ``` -### Run Victima via CM interface - -The core CM script for Victima will be available under ```/CM/repos/mlcommons@cm4mlops/script/reproduce-micro-2023-paper-victima``` +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` -It is described by `_cm.yaml` and several native scripts. +### Run Victima via CM interface Perform the following steps to evaluate Victima with MLCommons CM automation language: 1) This command will install system dependencies for Docker and require sudo (skip it if you have Docker installed): ```bash -cmr "reproduce paper micro 2023 victima _install_deps" +cmr "reproduce project m 2023 victima _install_deps" ``` 2) This command will prepare and run all experiments via Docker: ```bash -cmr "reproduce paper micro 2023 victima _run" +cmr "reproduce project m 2023 victima _run" ``` You can specify --job_manager and --container if needed: ```bash -cmr "reproduce paper micro 2023 victima _run" --job_manager=native|slurm --contianer=docker|podman +cmr "reproduce project m 2023 victima _run" --job_manager=native|slurm --contianer=docker|podman ``` 3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the article: ```bash -cmr "reproduce paper micro 2023 victima _plot" +cmr "reproduce project m 2023 victima _plot" ``` diff --git a/script/reproduce-micro-paper-2023-victima/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml similarity index 84% rename from script/reproduce-micro-paper-2023-victima/_cm.yaml rename to script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml index d20e5436a4..7daa9e6635 100644 --- a/script/reproduce-micro-paper-2023-victima/_cm.yaml +++ b/script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml @@ -1,4 +1,4 @@ -alias: reproduce-micro-paper-2023-victima +alias: reproduce-ieee-acm-micro2023-paper-96 automation_alias: script automation_uid: 5b4e0237da074764 cache: false @@ -15,7 +15,7 @@ deps: - tags: get,git,repo,_repo.https://github.com/CMU-SAFARI/Victima env: CM_GIT_ENV_KEY: 'CMU_SAFARI_VICTIMA' - extra_cache_tags: micro23,artifact,ae,cmu,safari,victima + extra_cache_tags: artifact,cmu,safari,victima input_mapping: job_manager: CM_VICTIMA_JOB_MANAGER container: CM_VICTIMA_CONTAINER @@ -24,8 +24,13 @@ tags: - reproduce - project - paper +- m - micro - micro-2023 +- '2023' +- '96' +- cmu +- safari - victima uid: fc5bee3426174e7b variations: diff --git a/script/reproduce-ieee-acm-micro2023-paper-96/customize.py b/script/reproduce-ieee-acm-micro2023-paper-96/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-96/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-micro-paper-2023-victima/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-96/install_deps.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/install_deps.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/install_deps.sh diff --git a/script/reproduce-ieee-acm-micro2023-paper-96/main.py b/script/reproduce-ieee-acm-micro2023-paper-96/main.py new file mode 100644 index 0000000000..d851f1450f --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-96/main.py @@ -0,0 +1,10 @@ +import os + +if __name__ == "__main__": + + print ('') + print ('Main script:') + print ('Experiment: {}'.format(os.environ.get('CM_EXPERIMENT',''))) + print ('') + + exit(0) diff --git a/script/reproduce-micro-paper-2023-victima/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-96/plot.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/plot.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/plot.sh diff --git a/script/reproduce-micro-paper-2023-victima/run.sh b/script/reproduce-ieee-acm-micro2023-paper-96/run.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/run.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/run.sh diff --git a/script/reproduce-micro-paper-2023-victima/README.md b/script/reproduce-micro-paper-2023-victima/README.md deleted file mode 100644 index fa2adfdf7a..0000000000 --- a/script/reproduce-micro-paper-2023-victima/README.md +++ /dev/null @@ -1 +0,0 @@ -Please see [https://docs.mlcommons.org/cm4mlops/scripts/Reproducibility-and-artifact-evaluation/reproduce-micro-paper-2023-victima](https://docs.mlcommons.org/cm4mlops/scripts/Reproducibility-and-artifact-evaluation/reproduce-micro-paper-2023-victima) for the documentation of this CM script. diff --git a/script/reproduce-micro-paper-2023-xyz/README.md b/script/reproduce-micro-paper-2023-xyz/README.md deleted file mode 100644 index 0b4f7dcaad..0000000000 --- a/script/reproduce-micro-paper-2023-xyz/README.md +++ /dev/null @@ -1,178 +0,0 @@ -
-Click here to see the table of contents. - -* [About](#about) -* [Summary](#summary) -* [Reuse this script in your project](#reuse-this-script-in-your-project) - * [ Install CM automation language](#install-cm-automation-language) - * [ Check CM script flags](#check-cm-script-flags) - * [ Run this script from command line](#run-this-script-from-command-line) - * [ Run this script from Python](#run-this-script-from-python) - * [ Run this script via GUI](#run-this-script-via-gui) - * [ Run this script via Docker (beta)](#run-this-script-via-docker-(beta)) -* [Customization](#customization) - * [ Variations](#variations) - * [ Script flags mapped to environment](#script-flags-mapped-to-environment) - * [ Default environment](#default-environment) -* [Script workflow, dependencies and native scripts](#script-workflow-dependencies-and-native-scripts) -* [Script output](#script-output) -* [New environment keys (filter)](#new-environment-keys-(filter)) -* [New environment keys auto-detected from customize](#new-environment-keys-auto-detected-from-customize) -* [Maintainers](#maintainers) - -
- -*Note that this README is automatically generated - don't edit!* - -### About - - -See extra [notes](README-extra.md) from the authors and contributors. - -#### Summary - -* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* -* GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz)* -* CM meta description for this script: *[_cm.yaml](_cm.yaml)* -* CM "database" tags to find this script: *reproduce,paper,micro,micro-2023,victima* -* Output cached? *False* -___ -### Reuse this script in your project - -#### Install CM automation language - -* [Installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -* [CM intro](https://doi.org/10.5281/zenodo.8105339) - -#### Pull CM repository with this automation - -```cm pull repo mlcommons@cm4mlops --checkout=dev``` - - -#### Run this script from command line - -1. `cm run script --tags=reproduce,paper,micro,micro-2023,victima[,variations] [--input_flags]` - -2. `cmr "reproduce paper micro micro-2023 victima[ variations]" [--input_flags]` - -* `variations` can be seen [here](#variations) - -* `input_flags` can be seen [here](#script-flags-mapped-to-environment) - -#### Run this script from Python - -
-Click here to expand this section. - -```python - -import cmind - -r = cmind.access({'action':'run' - 'automation':'script', - 'tags':'reproduce,paper,micro,micro-2023,victima' - 'out':'con', - ... - (other input keys for this script) - ... - }) - -if r['return']>0: - print (r['error']) - -``` - -
- - -#### Run this script via GUI - -```cmr "cm gui" --script="reproduce,paper,micro,micro-2023,victima"``` - -Use this [online GUI](https://cKnowledge.org/cm-gui/?tags=reproduce,paper,micro,micro-2023,victima) to generate CM CMD. - -#### Run this script via Docker (beta) - -`cm docker script "reproduce paper micro micro-2023 victima[ variations]" [--input_flags]` - -___ -### Customization - - -#### Variations - - * *No group (any variation can be selected)* -
- Click here to expand this section. - - * `_install_deps` - - Workflow: - * `_plot` - - Workflow: - * `_run` - - Workflow: - -
- - -#### Script flags mapped to environment -
-Click here to expand this section. - -* `--container=value` → `CM_VICTIMA_CONTAINER=value` -* `--job_manager=value` → `CM_VICTIMA_JOB_MANAGER=value` - -**Above CLI flags can be used in the Python CM API as follows:** - -```python -r=cm.access({... , "container":...} -``` - -
- -#### Default environment - -
-Click here to expand this section. - -These keys can be updated via `--env.KEY=VALUE` or `env` dictionary in `@input.json` or using script flags. - -* CM_VICTIMA_JOB_MANAGER: `native` -* CM_VICTIMA_CONTAINER: `docker` - -
- -___ -### Script workflow, dependencies and native scripts - -
-Click here to expand this section. - - 1. ***Read "deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml)*** - * detect,os - - CM script: [detect-os](https://github.com/mlcommons/cm4mlops/tree/main/script/detect-os) - * get,python - * CM names: `--adr.['python', 'python3']...` - - CM script: [get-python3](https://github.com/mlcommons/cm4mlops/tree/main/script/get-python3) - * get,git,repo,_repo.https://github.com/CMU-SAFARI/Victima - - CM script: [get-git-repo](https://github.com/mlcommons/cm4mlops/tree/main/script/get-git-repo) - 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/customize.py)*** - 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) - 1. ***Run native script if exists*** - * [run.sh](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/run.sh) - 1. Read "posthook_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) - 1. ***Run "postrocess" function from [customize.py](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/customize.py)*** - 1. Read "post_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) -
- -___ -### Script output -`cmr "reproduce paper micro micro-2023 victima[,variations]" [--input_flags] -j` -#### New environment keys (filter) - -#### New environment keys auto-detected from customize - -___ -### Maintainers - -* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) \ No newline at end of file