From 8d36a7c747b4ba232abe99a572a5b4eb87eb6170 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Mon, 15 Jul 2024 21:42:15 +0530 Subject: [PATCH 01/29] updated script for redhat implementation --- script/get-dataset-openorca/_cm.json | 12 ++++++++++++ script/get-dataset-openorca/customize.py | 15 ++++++++++++--- script/get-dataset-openorca/run.sh | 7 +++++++ 3 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 script/get-dataset-openorca/run.sh diff --git a/script/get-dataset-openorca/_cm.json b/script/get-dataset-openorca/_cm.json index 82d5f4dfdc..3808008a49 100644 --- a/script/get-dataset-openorca/_cm.json +++ b/script/get-dataset-openorca/_cm.json @@ -13,6 +13,11 @@ "force_env_keys": [ "CM_GIT_*" ], + "skip_if_env": { + "CM_MLPERF_IMPLEMENTATION": [ + "redhat" + ] + }, "names": [ "openorca-src" ], @@ -75,6 +80,13 @@ "CM_DATASET_CALIBRATION": "no" }, "group": "dataset-type" + }, + "redhat": { + "deps":[ + { + "tags":"get,rclone" + } + ] } } } diff --git a/script/get-dataset-openorca/customize.py b/script/get-dataset-openorca/customize.py index 059c83826d..8e4fa81b9f 100644 --- a/script/get-dataset-openorca/customize.py +++ b/script/get-dataset-openorca/customize.py @@ -6,14 +6,23 @@ def preprocess(i): env = i['env'] + if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": + auth_s3_bucket = "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com" + download_cmd = "rclone copy mlc-inference:mlcommons-inference-wg-public/open_orca . -P" + run_cmd = f"{auth_s3_bucket} && {download_cmd}" + + print(run_cmd) + env['CM_RUN_CMD'] = run_cmd + return {'return': 0} def postprocess(i): env = i['env'] if env.get('CM_DATASET_CALIBRATION','') == "no": - env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') + if env.get("CM_MLPERF_IMPLEMENTATION", "") != "redhat": + env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'calibration', 'data') diff --git a/script/get-dataset-openorca/run.sh b/script/get-dataset-openorca/run.sh new file mode 100644 index 0000000000..fbcbb42fd2 --- /dev/null +++ b/script/get-dataset-openorca/run.sh @@ -0,0 +1,7 @@ +cmd=${CM_RUN_CMD} +echo "${cmd}" +eval "${cmd}" +test $? -eq 0 || exit $? + +echo "CM_DATASET_OPENORCA_PATH=${PWD}/open_orca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz" > temp_ver.out +test $? -eq 0 || exit $? \ No newline at end of file From 9ce68269fe18370bb2438033b5becfa690060163 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Mon, 15 Jul 2024 21:43:08 +0530 Subject: [PATCH 02/29] added llama2 --- script/app-mlperf-inference-redhat/_cm.yaml | 6 +++++- .../app-mlperf-inference-redhat/customize.py | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-redhat/_cm.yaml b/script/app-mlperf-inference-redhat/_cm.yaml index 43363d8c48..e9473ebd78 100644 --- a/script/app-mlperf-inference-redhat/_cm.yaml +++ b/script/app-mlperf-inference-redhat/_cm.yaml @@ -241,7 +241,10 @@ variations: CM_MODEL: gptj-99.9 llama2-70b_: - {} + deps: + - tags: get,dataset,"openorca","language-processing","original" + env: + CM_MLPERF_IMPLEMENTATION: redhat llama2-70b-99: group: model @@ -256,6 +259,7 @@ variations: - llama2-70b_ env: CM_MODEL: llama2-70b-99.9 + CM_VLLM_SERVER_MODEL_NAME: NousResearch/Meta-Llama-3-8B-Instruct # assigned just for testing purpose singlestream: group: loadgen-scenario diff --git a/script/app-mlperf-inference-redhat/customize.py b/script/app-mlperf-inference-redhat/customize.py index 36d0bafb88..243fe41424 100644 --- a/script/app-mlperf-inference-redhat/customize.py +++ b/script/app-mlperf-inference-redhat/customize.py @@ -52,6 +52,26 @@ def get_run_cmd(model, i): run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", "gptj-99") return {'return': 0, 'run_cmd': run_cmd, 'run_dir': run_dir} + + if "llama2" in model: + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + device = env['CM_MLPERF_DEVICE'] + mode = env['CM_MLPERF_LOADGEN_MODE'] + outdir = env['CM_MLPERF_OUTPUT_DIR'] + mlperf_conf_path = env['CM_MLPERF_CONF'] + user_conf_path = env['CM_MLPERF_USER_CONF'] + api_server = env.get('CM_MLPERF_INFERENCE_API_SERVER', 'localhost:8000/v1') + api_model_name = env['CM_VLLM_SERVER_MODEL_NAME'] + dataset_path = env['CM_DATASET_OPENORCA_PATH'] + precision = env['CM_MLPERF_MODEL_PRECISION'] + if mode == "accuracy": + accuracy_string = " --accuracy " + else: + accuracy_string = "" + + run_cmd = f"python3 -u main.py --scenario {scenario} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} " + submitter = "RedHat-Supermicro" + run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", model) def postprocess(i): From 1fb7e8536cbab1b12a9ac7a13486d84a645dc725 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 15 Jul 2024 17:46:00 +0100 Subject: [PATCH 03/29] Fixes for redhat-openshift --- script/app-mlperf-inference-redhat/_cm.yaml | 2 +- script/app-mlperf-inference-redhat/customize.py | 2 ++ script/get-dataset-openorca/customize.py | 5 +++-- script/get-dataset-openorca/run.sh | 3 --- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/script/app-mlperf-inference-redhat/_cm.yaml b/script/app-mlperf-inference-redhat/_cm.yaml index e9473ebd78..308759bde4 100644 --- a/script/app-mlperf-inference-redhat/_cm.yaml +++ b/script/app-mlperf-inference-redhat/_cm.yaml @@ -242,7 +242,7 @@ variations: llama2-70b_: deps: - - tags: get,dataset,"openorca","language-processing","original" + - tags: get,dataset,openorca,language-processing,original env: CM_MLPERF_IMPLEMENTATION: redhat diff --git a/script/app-mlperf-inference-redhat/customize.py b/script/app-mlperf-inference-redhat/customize.py index 243fe41424..948cc3c21c 100644 --- a/script/app-mlperf-inference-redhat/customize.py +++ b/script/app-mlperf-inference-redhat/customize.py @@ -73,6 +73,8 @@ def get_run_cmd(model, i): submitter = "RedHat-Supermicro" run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", model) + return {'return': 0, 'run_cmd': run_cmd, 'run_dir': run_dir} + def postprocess(i): env = i['env'] diff --git a/script/get-dataset-openorca/customize.py b/script/get-dataset-openorca/customize.py index 8e4fa81b9f..f23870b130 100644 --- a/script/get-dataset-openorca/customize.py +++ b/script/get-dataset-openorca/customize.py @@ -11,7 +11,6 @@ def preprocess(i): download_cmd = "rclone copy mlc-inference:mlcommons-inference-wg-public/open_orca . -P" run_cmd = f"{auth_s3_bucket} && {download_cmd}" - print(run_cmd) env['CM_RUN_CMD'] = run_cmd return {'return': 0} @@ -19,7 +18,9 @@ def preprocess(i): def postprocess(i): env = i['env'] if env.get('CM_DATASET_CALIBRATION','') == "no": - if env.get("CM_MLPERF_IMPLEMENTATION", "") != "redhat": + if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": + env['CM_DATASET_OPENORCA_PATH'] = os.path.join(os.getcwd(), 'open_orca', 'open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz') + else: env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') diff --git a/script/get-dataset-openorca/run.sh b/script/get-dataset-openorca/run.sh index fbcbb42fd2..a797ea9e4e 100644 --- a/script/get-dataset-openorca/run.sh +++ b/script/get-dataset-openorca/run.sh @@ -2,6 +2,3 @@ cmd=${CM_RUN_CMD} echo "${cmd}" eval "${cmd}" test $? -eq 0 || exit $? - -echo "CM_DATASET_OPENORCA_PATH=${PWD}/open_orca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz" > temp_ver.out -test $? -eq 0 || exit $? \ No newline at end of file From 2990f18ae2d537b4b838c94fbcf82bf6dc6932ce Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 15 Jul 2024 18:01:00 +0100 Subject: [PATCH 04/29] Fixes for redhat-openshift --- script/app-mlperf-inference-redhat/_cm.yaml | 14 ++++++++------ script/app-mlperf-inference-redhat/customize.py | 7 ++++++- script/run-mlperf-inference-app/_cm.yaml | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/script/app-mlperf-inference-redhat/_cm.yaml b/script/app-mlperf-inference-redhat/_cm.yaml index 308759bde4..4353379b6e 100644 --- a/script/app-mlperf-inference-redhat/_cm.yaml +++ b/script/app-mlperf-inference-redhat/_cm.yaml @@ -107,13 +107,14 @@ deps: - tags: get,git,repo names: - inference-results - inference-code - updats_tags_from_env_with_prefix: - _repo.: CM_MLPERF_INFERENCE_RESULTS_REPO + - inference-results + - inference-code + update_tags_from_env_with_prefix: + _repo.: + - CM_MLPERF_INFERENCE_RESULTS_REPO env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO - extra_cache_tags: inference-implementation,mlperf + extra_cache_tags: results,repo,mlperf # Post dependencies to run this app including for power measurement post_deps: @@ -296,10 +297,11 @@ variations: fp32: group: precision - r4.0_default: + r4.1-dev_default: group: version default: true env: CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.0 + docker: real_run: False diff --git a/script/app-mlperf-inference-redhat/customize.py b/script/app-mlperf-inference-redhat/customize.py index 948cc3c21c..31cbe08789 100644 --- a/script/app-mlperf-inference-redhat/customize.py +++ b/script/app-mlperf-inference-redhat/customize.py @@ -27,7 +27,12 @@ def preprocess(i): run_dir = r ['run_dir'] print(run_cmd) print(run_dir) - return {'return':1, 'error': 'Run command needs to be tested'} + env['CM_MLPERF_RUN_CMD'] = run_cmd + env['CM_RUN_DIR'] = run_dir + env['CM_RUN_CMD'] = run_cmd + + return {'return':0} + #return {'return':1, 'error': 'Run command needs to be tested'} def get_run_cmd(model, i): env = i['env'] diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 7ab19fa8f4..8f3c7e140d 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -36,6 +36,7 @@ default_env: CM_MLPERF_RUN_STYLE: test input_mapping: + api_server: CM_MLPERF_INFERENCE_API_SERVER backend: CM_MLPERF_BACKEND batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE beam_size: GPTJ_BEAM_SIZE From 3f30d2bfea6d04a525bea782ca4ed4e2c399b673 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 15 Jul 2024 22:50:54 +0530 Subject: [PATCH 05/29] Fix openorca dataset meta --- script/get-dataset-openorca/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-dataset-openorca/customize.py b/script/get-dataset-openorca/customize.py index f23870b130..60aa986672 100644 --- a/script/get-dataset-openorca/customize.py +++ b/script/get-dataset-openorca/customize.py @@ -19,7 +19,7 @@ def postprocess(i): env = i['env'] if env.get('CM_DATASET_CALIBRATION','') == "no": if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": - env['CM_DATASET_OPENORCA_PATH'] = os.path.join(os.getcwd(), 'open_orca', 'open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz') + env['CM_DATASET_OPENORCA_PATH'] = os.path.join(os.getcwd(), 'open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz') else: env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] From d5e6217cbbffb2a68cbda3e8e6f109d821bd277a Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Tue, 16 Jul 2024 17:42:58 +0530 Subject: [PATCH 06/29] relinked to inference repo --- script/app-mlperf-inference-redhat/_cm.yaml | 5 +++-- script/app-mlperf-inference-redhat/customize.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-redhat/_cm.yaml b/script/app-mlperf-inference-redhat/_cm.yaml index 4353379b6e..75f460f370 100644 --- a/script/app-mlperf-inference-redhat/_cm.yaml +++ b/script/app-mlperf-inference-redhat/_cm.yaml @@ -243,9 +243,11 @@ variations: llama2-70b_: deps: - - tags: get,dataset,openorca,language-processing,original + - tags: get,dataset,openorca,language-processing,original,_redhat env: CM_MLPERF_IMPLEMENTATION: redhat + env: + CM_VLLM_SERVER_MODEL_NAME: NousResearch/Meta-Llama-3-8B-Instruct # assigned just for testing purpose llama2-70b-99: group: model @@ -260,7 +262,6 @@ variations: - llama2-70b_ env: CM_MODEL: llama2-70b-99.9 - CM_VLLM_SERVER_MODEL_NAME: NousResearch/Meta-Llama-3-8B-Instruct # assigned just for testing purpose singlestream: group: loadgen-scenario diff --git a/script/app-mlperf-inference-redhat/customize.py b/script/app-mlperf-inference-redhat/customize.py index 31cbe08789..d40536dcbf 100644 --- a/script/app-mlperf-inference-redhat/customize.py +++ b/script/app-mlperf-inference-redhat/customize.py @@ -74,7 +74,7 @@ def get_run_cmd(model, i): else: accuracy_string = "" - run_cmd = f"python3 -u main.py --scenario {scenario} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} " + run_cmd = f"python3 -u {os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'language', 'llama2-70b', 'main.py')} --scenario {scenario} --model-path {api_model_name} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} " submitter = "RedHat-Supermicro" run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", model) From 2b3ec12222bb2206173218ec051bd0bddbd30aa5 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 16 Jul 2024 14:04:08 +0100 Subject: [PATCH 07/29] Fix run script for redhat mlperf inference --- script/app-mlperf-inference-redhat/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-redhat/customize.py b/script/app-mlperf-inference-redhat/customize.py index d40536dcbf..3737101114 100644 --- a/script/app-mlperf-inference-redhat/customize.py +++ b/script/app-mlperf-inference-redhat/customize.py @@ -74,7 +74,7 @@ def get_run_cmd(model, i): else: accuracy_string = "" - run_cmd = f"python3 -u {os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'language', 'llama2-70b', 'main.py')} --scenario {scenario} --model-path {api_model_name} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} " + run_cmd = f"python3 -u 'main.py' --scenario {scenario} --model-path {api_model_name} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} " submitter = "RedHat-Supermicro" run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", model) From fb8d23d6283757fb99402f8d9e9a30fcea6a65e2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 16 Jul 2024 14:52:17 +0100 Subject: [PATCH 08/29] Use mlc preprocessed dataset for openorca by default --- .../_cm.yaml | 2 +- .../customize.py | 6 +++ .../_cm.json | 54 +++++++++++++++++-- .../customize.py | 23 +++++--- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 2cb9367dd9..7c14cf3495 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -517,7 +517,7 @@ deps: - stable-diffusion-xl ## OpenOrca for LLAMA2-70b - - tags: get,preprocessed,dataset,openorca,_validation + - tags: get,preprocessed,dataset,openorca,_validation,_mlcommons names: - openorca-preprocessed enable_if_env: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 70415fc484..fbd603f403 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -306,6 +306,12 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ " --model-path " + env['MODEL_DIR'] + if env.get('CM_API_SERVER', '') != '': + env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" + if env.get('CM_MLPERF_INFERENCE_API_SERVER') == '': + env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" + cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd = cmd.replace("--count", "--total-sample-count") elif "mixtral-8x7b" in env['CM_MODEL']: diff --git a/script/get-preprocessed-dataset-openorca/_cm.json b/script/get-preprocessed-dataset-openorca/_cm.json index 94a1336a05..56c8b6cf8c 100644 --- a/script/get-preprocessed-dataset-openorca/_cm.json +++ b/script/get-preprocessed-dataset-openorca/_cm.json @@ -24,7 +24,13 @@ "openorca-original", "dataset-original" ], - "tags": "get,dataset,original,openorca" + "tags": "get,dataset,original,openorca", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_BY_MLC": [ + "on", + "yes" + ] + } }, { "force_env_keys": [ @@ -33,7 +39,13 @@ "names": [ "inference-src" ], - "tags": "mlperf,inference,source" + "tags": "mlperf,inference,source", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_BY_MLC": [ + "on", + "yes" + ] + } }, { "tags": "get,generic-python-lib,_package.pyarrow", @@ -54,7 +66,14 @@ ] }, { - "tags": "get,ml-model,llama2" + "tags": "get,ml-model,llama2", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_BY_MLC": [ + "on", + "yes" + ] + } + } ], "env": { @@ -73,7 +92,6 @@ "uid": "5614c39cb1564d72", "variations": { "60": { - "default": true, "ad": { "dataset-original": { "tags": "_60" @@ -88,6 +106,7 @@ "group": "dataset-type" }, "full": { + "default": true, "ad": { "dataset-original": { "tags": "_full" @@ -109,6 +128,33 @@ "CM_DATASET_CALIBRATION": "no" }, "group": "dataset-type" + }, + "mlcommons": { + "env": { + "CM_DATASET_PREPROCESSED_BY_MLC": "yes", + "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com", + "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/open_orca" + }, + "deps": [ + { + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT", + "CM_EXTRACT_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT", + "CM_EXTRACT_TO_FOLDER": "openorca-preprocessed" + }, + "tags": "download-and-extract,_rclone", + "update_tags_from_env_with_prefix": { + "_url.": [ + "CM_RCLONE_URL" + ] + }, + "force_cache": true, + "names": [ + "dae" + ], + "extra_cache_tags": "openorca,preprocessed,dataset" + } + ] } }, "docker": { diff --git a/script/get-preprocessed-dataset-openorca/customize.py b/script/get-preprocessed-dataset-openorca/customize.py index b5a3219dad..c432c3b34c 100644 --- a/script/get-preprocessed-dataset-openorca/customize.py +++ b/script/get-preprocessed-dataset-openorca/customize.py @@ -5,21 +5,30 @@ def preprocess(i): env = i['env'] - inference_src = env['CM_MLPERF_INFERENCE_SOURCE'] - run_dir = os.path.join(inference_src, 'language', 'llama2-70b') - model_dir = env['CM_ML_MODEL_PATH'] - run_cmd = env['CM_PYTHON_BIN_WITH_PATH'] + ' processorca.py --dataset_pq_path=' + env['CM_DATASET_OPENORCA_PARQUET'] + ' --model_dir=' + model_dir +' --seqlen_limit=2048 --export_dir=' + os.path.join(os.getcwd(), "processed-openorca") + ' --num_total_samples=' + env['CM_DATASET_SIZE'] + if str(env.get('CM_DATASET_PREPROCESSED_BY_MLC','')).lower() in [ "yes", "1", "true" ]: + run_dir = os.getcwd() + env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(env['CM_OPENORCA_PREPROCESSED_ROOT'], "open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz") + #run_cmd = f"gunzip -k {env['CM_DATASET_PREPROCESSED_PATH']}" + run_cmd = '' + else: + inference_src = env['CM_MLPERF_INFERENCE_SOURCE'] + run_dir = os.path.join(inference_src, 'language', 'llama2-70b') + model_dir = env['CM_ML_MODEL_PATH'] + run_cmd = env['CM_PYTHON_BIN_WITH_PATH'] + ' processorca.py --dataset_pq_path=' + env['CM_DATASET_OPENORCA_PARQUET'] + ' --model_dir=' + model_dir +' --seqlen_limit=2048 --export_dir=' + os.path.join(os.getcwd(), "processed-openorca") + ' --num_total_samples=' + env['CM_DATASET_SIZE'] env['CM_RUN_DIR'] = run_dir env['CM_RUN_CMD'] = run_cmd - - return {'return': 0} def postprocess(i): env = i['env'] - env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(os.path.join(os.getcwd(), "processed-openorca", 'open_orca_gpt4_tokenized_llama.sampled_'+env['CM_DATASET_SIZE']+'.pkl')) + if str(env.get('CM_DATASET_PREPROCESSED_BY_MLC','')).lower() in [ "yes", "1", "true", "on" ]: + pass #set in preprocess + else: + env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(os.path.join(os.getcwd(), "processed-openorca", 'open_orca_gpt4_tokenized_llama.sampled_'+env['CM_DATASET_SIZE']+'.pkl')) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH'] return {'return': 0} From d10b63669b15060294ecf070d03224a1b1a9a44e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 16 Jul 2024 15:04:54 +0100 Subject: [PATCH 09/29] Fix api_server in app-mlperf-inference-reference --- script/app-mlperf-inference-mlcommons-python/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index fbd603f403..352c0d8d6e 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -306,7 +306,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ " --model-path " + env['MODEL_DIR'] - if env.get('CM_API_SERVER', '') != '': + if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" if env.get('CM_MLPERF_INFERENCE_API_SERVER') == '': env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" From a9da06e98457a01b95a265c3bfa4ad84e18694ee Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 01:39:32 +0530 Subject: [PATCH 10/29] added support for vllm server --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 8 +++++--- .../app-mlperf-inference-mlcommons-python/customize.py | 10 +++++----- script/app-mlperf-inference/_cm.yaml | 6 ++++++ script/process-mlperf-accuracy/_cm.json | 7 ++++++- script/process-mlperf-accuracy/customize.py | 6 +++++- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 7c14cf3495..82804d846b 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -82,7 +82,7 @@ new_env_keys: - CM_HW_NAME - CM_ML_MODEL_* - CM_MAX_EXAMPLES - + - CM_VLLM_* new_state_keys: - mlperf-inference-implementation - CM_SUT_* @@ -403,9 +403,11 @@ deps: CM_MODEL: - llama2-70b-99 - llama2-70b-99.9 - skip_if_env: + skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" + CM_MLPERF_INFERENCE_API_SERVER: + - "on" ## mixtral-8x7b - tags: get,ml-model,mixtral diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 352c0d8d6e..8c9f0e7ec0 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -75,7 +75,7 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x - if env.get('CM_NETWORK_LOADGEN', '') != "lon": + if env.get('CM_NETWORK_LOADGEN', '') != "lon" and env.get('CM_MLPERF_INFERENCE_API_SERVER','')=='': env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') if not env['MODEL_DIR']: env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) @@ -304,14 +304,14 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ - ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ - " --model-path " + env['MODEL_DIR'] + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" if env.get('CM_MLPERF_INFERENCE_API_SERVER') == '': env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " - + cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + else: + cmd += f" --model-path {env['MODEL_DIR']}" cmd = cmd.replace("--count", "--total-sample-count") elif "mixtral-8x7b" in env['CM_MODEL']: diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 0c12f798f7..d3041ea3a5 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -280,6 +280,9 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 + skip_if_env: + CM_MLPERF_INFERENCE_API_SERVER: + - "*" nvidia-original,r4.1_default,llama2-70b_: docker: @@ -290,6 +293,9 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 + skip_if_env: + CM_MLPERF_INFERENCE_API_SERVER: + - "*" nvidia-original: docker: diff --git a/script/process-mlperf-accuracy/_cm.json b/script/process-mlperf-accuracy/_cm.json index 17a0594830..9af1a4f997 100644 --- a/script/process-mlperf-accuracy/_cm.json +++ b/script/process-mlperf-accuracy/_cm.json @@ -323,7 +323,12 @@ "names": [ "llama2-model" ], - "tags": "get,ml-model,llama2" + "tags": "get,ml-model,llama2", + "skip_if_env": { + "CM_MLPERF_INFERENCE_API_SERVER": [ + "on" + ] + } } ], "env": { diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index beeab9204d..d6536e13fd 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -77,7 +77,11 @@ def preprocess(i): elif dataset == "openorca": accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b", "evaluate-accuracy.py") - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ + if env.get('CM_VLLM_SERVER_MODEL_NAME','') == '': + checkpoint_path = env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + else: + checkpoint_path = env['CM_VLLM_SERVER_MODEL_NAME'] + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + checkpoint_path + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ "' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32") +" > '" + out_file + "'" elif dataset == "openorca-gsm8k-mbxp-combined": From 108b596b9b14d86ea1abc0c8af32ac342966ac04 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 01:49:49 +0530 Subject: [PATCH 11/29] reverted changes made for redhat api --- script/get-dataset-openorca/_cm.json | 12 ------------ script/get-dataset-openorca/customize.py | 16 +++------------- script/get-dataset-openorca/run.sh | 4 ---- 3 files changed, 3 insertions(+), 29 deletions(-) delete mode 100644 script/get-dataset-openorca/run.sh diff --git a/script/get-dataset-openorca/_cm.json b/script/get-dataset-openorca/_cm.json index 3808008a49..82d5f4dfdc 100644 --- a/script/get-dataset-openorca/_cm.json +++ b/script/get-dataset-openorca/_cm.json @@ -13,11 +13,6 @@ "force_env_keys": [ "CM_GIT_*" ], - "skip_if_env": { - "CM_MLPERF_IMPLEMENTATION": [ - "redhat" - ] - }, "names": [ "openorca-src" ], @@ -80,13 +75,6 @@ "CM_DATASET_CALIBRATION": "no" }, "group": "dataset-type" - }, - "redhat": { - "deps":[ - { - "tags":"get,rclone" - } - ] } } } diff --git a/script/get-dataset-openorca/customize.py b/script/get-dataset-openorca/customize.py index 60aa986672..059c83826d 100644 --- a/script/get-dataset-openorca/customize.py +++ b/script/get-dataset-openorca/customize.py @@ -6,24 +6,14 @@ def preprocess(i): env = i['env'] - if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": - auth_s3_bucket = "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com" - download_cmd = "rclone copy mlc-inference:mlcommons-inference-wg-public/open_orca . -P" - run_cmd = f"{auth_s3_bucket} && {download_cmd}" - - env['CM_RUN_CMD'] = run_cmd - return {'return': 0} def postprocess(i): env = i['env'] if env.get('CM_DATASET_CALIBRATION','') == "no": - if env.get("CM_MLPERF_IMPLEMENTATION", "") == "redhat": - env['CM_DATASET_OPENORCA_PATH'] = os.path.join(os.getcwd(), 'open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz') - else: - env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] - env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') + env['CM_DATASET_PATH_ROOT'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_PATH'] = env['CM_DATASET_OPENORCA_PATH'] + env['CM_DATASET_OPENORCA_PARQUET'] = os.path.join(env['CM_DATASET_OPENORCA_PATH'], '1M-GPT4-Augmented.parquet') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'calibration', 'data') diff --git a/script/get-dataset-openorca/run.sh b/script/get-dataset-openorca/run.sh deleted file mode 100644 index a797ea9e4e..0000000000 --- a/script/get-dataset-openorca/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -cmd=${CM_RUN_CMD} -echo "${cmd}" -eval "${cmd}" -test $? -eq 0 || exit $? From 239c03a0626cdc1e92d9f8db4b8ea31d3f2a82b2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 16 Jul 2024 21:40:22 +0100 Subject: [PATCH 12/29] Minor fixes --- script/app-mlperf-inference-mlcommons-python/customize.py | 3 +-- script/app-mlperf-inference/_cm.yaml | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 8c9f0e7ec0..76a26eef00 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -307,8 +307,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" - if env.get('CM_MLPERF_INFERENCE_API_SERVER') == '': - env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" + #env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " else: cmd += f" --model-path {env['MODEL_DIR']}" diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index d3041ea3a5..0c12f798f7 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -280,9 +280,6 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 - skip_if_env: - CM_MLPERF_INFERENCE_API_SERVER: - - "*" nvidia-original,r4.1_default,llama2-70b_: docker: @@ -293,9 +290,6 @@ variations: - CM_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 - skip_if_env: - CM_MLPERF_INFERENCE_API_SERVER: - - "*" nvidia-original: docker: From 021e7e45b952949cc92bb9a7b85038472fe60626 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 16 Jul 2024 22:01:00 +0100 Subject: [PATCH 13/29] vllm fixes --- script/app-mlperf-inference-mlcommons-python/customize.py | 2 +- script/run-mlperf-inference-app/_cm.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 76a26eef00..c00ba02883 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -306,7 +306,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': - env['CM_VLLM_SERVER_MODEL_NAME'] = "NousResearch/Meta-Llama-3-8B-Instruct" + env['CM_VLLM_SERVER_MODEL_NAME'] = env.get("CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" #env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " else: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 8f3c7e140d..7f9427bc16 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -99,6 +99,7 @@ input_mapping: sut: CM_MLPERF_INFERENCE_SUT_VARIATION nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH tp_size: CM_NVIDIA_TP_SIZE + vllm_model_name: CM_VLLM_SERVER_MODEL_NAME new_state_keys: - app_mlperf_inference_* From 7880a418e0c94b105a70ef1a3e6f86a219d4e05a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 17 Jul 2024 12:58:53 +0100 Subject: [PATCH 14/29] support num_workers in LLAMA2 --- script/app-mlperf-inference-mlcommons-python/customize.py | 6 ++++++ script/run-mlperf-inference-app/_cm.yaml | 1 + 2 files changed, 7 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index c00ba02883..b56896d6ce 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -297,6 +297,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b") backend = env['CM_MLPERF_BACKEND'] device = env['CM_MLPERF_DEVICE'] if env['CM_MLPERF_DEVICE'] != "gpu" else "cuda" + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ " --dataset-path " + env['CM_DATASET_PREPROCESSED_PATH'] + \ @@ -305,12 +306,17 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio scenario_extra_options + mode_extra_options + \ " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '': env['CM_VLLM_SERVER_MODEL_NAME'] = env.get("CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" #env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " else: cmd += f" --model-path {env['MODEL_DIR']}" + + if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '': + cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}" + cmd = cmd.replace("--count", "--total-sample-count") elif "mixtral-8x7b" in env['CM_MODEL']: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 7f9427bc16..706fd1dd2d 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -100,6 +100,7 @@ input_mapping: nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH tp_size: CM_NVIDIA_TP_SIZE vllm_model_name: CM_VLLM_SERVER_MODEL_NAME + num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS new_state_keys: - app_mlperf_inference_* From c271fdebdcb2347e139bf5460e423b5df5bf84ea Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 17:54:23 +0530 Subject: [PATCH 15/29] changes for docker --- script/run-vllm-server/_cm.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index 5a4485e016..d00417d5f8 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -20,16 +20,31 @@ input_mapping: pp_size: CM_VLLM_SERVER_PP_SIZE distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND api_key: CM_VLLM_SERVER_API_KEY + skip_docker_model_download: CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD deps: - tags: get,python3,get-python3 version_max: "3.11.999" version_max_usable: "3.11.0" - + + - tags: get,cuda,_cudnn + names: + - cuda + - tags: get,ml-model,huggingface,zoo,_clone-repo update_tags_from_env_with_prefix: _model-stub.: - CM_VLLM_SERVER_MODEL_NAME enable_if_env: CM_VLLM_SERVER_MODEL_NAME: [ on ] + skip_if_env: + CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD: [ on ] + - tags: get,generic-python-lib,_package.vllm + +docker: + port_maps: + - "8000:8000" + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + interactive: True + extra_run_args: ' --ulimit memlock=-1' From aedd72a502368c25caac2ef024142a66f4cce164 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 18:33:54 +0530 Subject: [PATCH 16/29] mapped host gpus to docker --- script/run-vllm-server/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index d00417d5f8..0a87b31059 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -48,3 +48,4 @@ docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public interactive: True extra_run_args: ' --ulimit memlock=-1' + all_gpus: 'yes' From 1cb8235774dea386bb86f2e9bffeff5e2d5ec7d2 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 20:51:40 +0530 Subject: [PATCH 17/29] ubuntu+pytorch base image upgrade for vllm --- script/run-vllm-server/_cm.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index 0a87b31059..b99b18add5 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -45,7 +45,9 @@ deps: docker: port_maps: - "8000:8000" - base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + base_image: nvcr.io/nvidia/pytorch:24.06-py3 interactive: True extra_run_args: ' --ulimit memlock=-1' all_gpus: 'yes' + os: "ubuntu" + os_version: "22.04" From b5082ff6bd14ffed1055471a7cf8a995381277ab Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:10:34 +0530 Subject: [PATCH 18/29] added all vllm server api arguments --- script/run-vllm-server/_cm.yaml | 90 +++++ script/run-vllm-server/customize.py | 356 ++++++++++++++++++ .../run-vllm-server/dockerfiles/.dockerignore | 1 + ...n8.9-aarch64-ubuntu22.04-public.Dockerfile | 45 +++ ...64-ubuntu22.04-public.Dockerfile.build.bat | 5 + ...h64-ubuntu22.04-public.Dockerfile.build.sh | 5 + ...nn8.9-x86_64-ubuntu20.04-public.Dockerfile | 45 +++ ...64-ubuntu20.04-public.Dockerfile.build.bat | 5 + ..._64-ubuntu20.04-public.Dockerfile.build.sh | 5 + ...6_64-ubuntu20.04-public.Dockerfile.run.bat | 1 + ...86_64-ubuntu20.04-public.Dockerfile.run.sh | 1 + .../dockerfiles/pytorch:24.06-py3.Dockerfile | 45 +++ .../pytorch:24.06-py3.Dockerfile.build.bat | 5 + .../pytorch:24.06-py3.Dockerfile.build.sh | 5 + .../pytorch:24.06-py3.Dockerfile.run.bat | 1 + .../pytorch:24.06-py3.Dockerfile.run.sh | 1 + script/run-vllm-server/dockerfiles/tmp-run.sh | 42 +++ .../dockerfiles/tmp-state.json | 3 + .../dockerfiles/ubuntu_22.04.Dockerfile | 45 +++ .../ubuntu_22.04.Dockerfile.build.bat | 5 + .../ubuntu_22.04.Dockerfile.build.sh | 5 + .../ubuntu_22.04.Dockerfile.run.bat | 1 + .../ubuntu_22.04.Dockerfile.run.sh | 1 + script/run-vllm-server/tmp-state.json | 3 + 24 files changed, 721 insertions(+) create mode 100644 script/run-vllm-server/dockerfiles/.dockerignore create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh create mode 100755 script/run-vllm-server/dockerfiles/tmp-run.sh create mode 100644 script/run-vllm-server/dockerfiles/tmp-state.json create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat create mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh create mode 100644 script/run-vllm-server/tmp-state.json diff --git a/script/run-vllm-server/_cm.yaml b/script/run-vllm-server/_cm.yaml index b99b18add5..f75a3d9fec 100644 --- a/script/run-vllm-server/_cm.yaml +++ b/script/run-vllm-server/_cm.yaml @@ -21,6 +21,96 @@ input_mapping: distributed-executor-backend: CM_VLLM_SERVER_DIST_EXEC_BACKEND api_key: CM_VLLM_SERVER_API_KEY skip_docker_model_download: CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD + host: CM_VLLM_SERVER_HOST + port: CM_VLLM_SERVER_PORT + uvicorn_log_level: CM_VLLM_SERVER_UVICORN_LOG_LEVEL + allow_credentials: CM_VLLM_SERVER_ALLOW_CREDENTIALS + allowed_origins: CM_VLLM_SERVER_ALLOWED_ORIGINS + allowed_methods: CM_VLLM_SERVER_ALLOWED_METHODS + allowed_headers: CM_VLLM_SERVER_ALLOWED_HEADERS + lora_modules: CM_VLLM_SERVER_LORA_MODULES + prompt_adapters: CM_VLLM_SERVER_PROMPT_ADAPTERS + chat_template: CM_VLLM_SERVER_CHAT_TEMPLATE + response_role: CM_VLLM_SERVER_RESPONSE_ROLE + ssl_keyfile: CM_VLLM_SERVER_SSL_KEYFILE + ssl_certfile: CM_VLLM_SERVER_SSL_CERTFILE + ssl_ca_certs: CM_VLLM_SERVER_SSL_CA_CERTS + ssl_cert_reqs: CM_VLLM_SERVER_SSL_CERT_REQS + root_path: CM_VLLM_SERVER_ROOT_PATH + middleware: CM_VLLM_SERVER_MIDDLEWARE + tokenizer: CM_VLLM_SERVER_TOKENIZER + skip_tokenizer_init: CM_VLLM_SERVER_SKIP_TOKENIZER_INIT + revision: CM_VLLM_SERVER_REVISION + code_revision: CM_VLLM_SERVER_CODE_REVISION + tokenizer_revision: CM_VLLM_SERVER_TOKENIZER_REVISION + tokenizer_mode: CM_VLLM_SERVER_TOKENIZER_MODE + trust_remote_code: CM_VLLM_SERVER_TRUST_REMOTE_CODE + download_dir: CM_VLLM_SERVER_DOWNLOAD_DIR + load_format: CM_VLLM_SERVER_LOAD_FORMAT + dtype: CM_VLLM_SERVER_DTYPE + kv_cache_dtype: CM_VLLM_SERVER_KV_CACHE_DTYPE + quantization_param_path: CM_VLLM_SERVER_QUANTIZATION_PARAM_PATH + max_model_len: CM_VLLM_SERVER_MAX_MODEL_LEN + guided_decoding_backend: CM_VLLM_SERVER_GUIDED_DECODING_BACKEND + worker_use_ray: CM_VLLM_SERVER_WORKER_USE_RAY + pipeline_parallel_size: CM_VLLM_SERVER_PIPELINE_PARALLEL_SIZE + max_parallel_loading_workers: CM_VLLM_SERVER_MAX_PARALLEL_LOADING_WORKERS + ray_workers_use_nsight: CM_VLLM_SERVER_RAY_WORKERS_USE_NSIGHT + block_size: CM_VLLM_SERVER_BLOCK_SIZE + enable_prefix_caching: CM_VLLM_SERVER_ENABLE_PREFIX_CACHING + disable_sliding_window: CM_VLLM_SERVER_DISABLE_SLIDING_WINDOW + use_v2_block_manager: CM_VLLM_SERVER_USE_V2_BLOCK_MANAGER + num_lookahead_slots: CM_VLLM_SERVER_NUM_LOOKAHEAD_SLOTS + seed: CM_VLLM_SERVER_SEED + swap_space: CM_VLLM_SERVER_SWAP_SPACE + gpu_memory_utilization: CM_VLLM_SERVER_GPU_MEMORY_UTILIZATION + num_gpu_blocks_override: CM_VLLM_SERVER_NUM_GPU_BLOCKS_OVERRIDE + max_num_batched_tokens: CM_VLLM_SERVER_MAX_NUM_BATCHED_TOKENS + max_num_seqs: CM_VLLM_SERVER_MAX_NUM_SEQS + max_logprobs: CM_VLLM_SERVER_MAX_LOGPROBS + disable_log_stats: CM_VLLM_SERVER_DISABLE_LOG_STATS + quantization: CM_VLLM_SERVER_QUANTIZATION + rope_scaling: CM_VLLM_SERVER_ROPE_SCALING + rope_theta: CM_VLLM_SERVER_ROPE_THETA + enforce_eager: CM_VLLM_SERVER_ENFORCE_EAGER + max_context_len_to_capture: CM_VLLM_SERVER_MAX_CONTEXT_LEN_TO_CAPTURE + max_seq_len_to_capture: CM_VLLM_SERVER_MAX_SEQ_LEN_TO_CAPTURE + disable_custom_all_reduce: CM_VLLM_SERVER_DISABLE_CUSTOM_ALL_REDUCE + tokenizer_pool_size: CM_VLLM_SERVER_TOKENIZER_POOL_SIZE + tokenizer_pool_type: CM_VLLM_SERVER_TOKENIZER_POOL_TYPE + tokenizer_pool_extra_config: CM_VLLM_SERVER_TOKENIZER_POOL_EXTRA_CONFIG + enable_lora: CM_VLLM_SERVER_ENABLE_LORA + max_loras: CM_VLLM_SERVER_MAX_LORAS + max_lora_rank: CM_VLLM_SERVER_MAX_LORA_RANK + lora_extra_vocab_size: CM_VLLM_SERVER_LORA_EXTRA_VOCAB_SIZE + lora_dtype: CM_VLLM_SERVER_LORA_DTYPE + long_lora_scaling_factors: CM_VLLM_SERVER_LONG_LORA_SCALING_FACTORS + max_cpu_loras: CM_VLLM_SERVER_MAX_CPU_LORAS + fully_sharded_loras: CM_VLLM_SERVER_FULLY_SHARDED_LORAS + enable_prompt_adapter: CM_VLLM_SERVER_ENABLE_PROMPT_ADAPTER + max_prompt_adapters: CM_VLLM_SERVER_MAX_PROMPT_ADAPTERS + max_prompt_adapter_token: CM_VLLM_SERVER_MAX_PROMPT_ADAPTER_TOKEN + device: CM_VLLM_SERVER_DEVICE + scheduler_delay_factor: CM_VLLM_SERVER_SCHEDULER_DELAY_FACTOR + enable_chunked_prefill: CM_VLLM_SERVER_ENABLE_CHUNKED_PREFILL + speculative_model: CM_VLLM_SERVER_SPECULATIVE_MODEL + num_speculative_tokens: CM_VLLM_SERVER_NUM_SPECULATIVE_TOKENS + speculative_draft_tensor_parallel_size: CM_VLLM_SERVER_SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE + speculative_max_model_len: CM_VLLM_SERVER_SPECULATIVE_MAX_MODEL_LEN + speculative_disable_by_batch_size: CM_VLLM_SERVER_SPECULATIVE_DISABLE_BY_BATCH_SIZE + ngram_prompt_lookup_max: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MAX + ngram_prompt_lookup_min: CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MIN + spec_decoding_acceptance_method: CM_VLLM_SERVER_SPEC_DECODING_ACCEPTANCE_METHOD + typical_acceptance_sampler_posterior_threshold: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD + typical_acceptance_sampler_posterior_alpha: CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA + model_loader_extra_config: CM_VLLM_SERVER_MODEL_LOADER_EXTRA_CONFIG + preemption_mode: CM_VLLM_SERVER_PREEMPTION_MODE + served_model_name: CM_VLLM_SERVER_SERVED_MODEL_NAME + qlora_adapter_name_or_path: CM_VLLM_SERVER_QLORA_ADAPTER_NAME_OR_PATH + otlp_traces_endpoint: CM_VLLM_SERVER_OTLP_TRACES_ENDPOINT + engine_use_ray: CM_VLLM_SERVER_ENGINE_USE_RAY + disable_log_requests: CM_VLLM_SERVER_DISABLE_LOG_REQUESTS + max_log_len: CM_VLLM_SERVER_MAX_LOG_LEN deps: - tags: get,python3,get-python3 diff --git a/script/run-vllm-server/customize.py b/script/run-vllm-server/customize.py index 65288f42e5..aeffdc2002 100644 --- a/script/run-vllm-server/customize.py +++ b/script/run-vllm-server/customize.py @@ -35,6 +35,362 @@ def preprocess(i): if distributed_executor_backend: cmd_args += f" --distributed-executor-backend {distributed_executor_backend}" + host = env.get("CM_VLLM_SERVER_HOST", False) + if host: + cmd_args += f" --host {host}" + + port = env.get("CM_VLLM_SERVER_PORT", False) + if port: + cmd_args += f" --port {port}" + + uvicorn_log_level = env.get("CM_VLLM_SERVER_UVICORN_LOG_LEVEL", False) + if uvicorn_log_level: + cmd_args += f" --uvicorn-log-level {uvicorn_log_level}" + + allow_credentials = env.get("CM_VLLM_SERVER_ALLOW_CREDENTIALS", False) + if allow_credentials: + cmd_args += f" --allow-credentials" + + allowed_origins = env.get("CM_VLLM_SERVER_ALLOWED_ORIGINS", False) + if allowed_origins: + cmd_args += f" --allowed-origins {allowed_origins}" + + allowed_methods = env.get("CM_VLLM_SERVER_ALLOWED_METHODS", False) + if allowed_methods: + cmd_args += f" --allowed-methods {allowed_methods}" + + allowed_headers = env.get("CM_VLLM_SERVER_ALLOWED_HEADERS", False) + if allowed_headers: + cmd_args += f" --allowed-headers {allowed_headers}" + + lora_modules = env.get("CM_VLLM_SERVER_LORA_MODULES", False) + if lora_modules: + cmd_args += f" --lora-modules {lora_modules}" + + prompt_adapters = env.get("CM_VLLM_SERVER_PROMPT_ADAPTERS", False) + if prompt_adapters: + cmd_args += f" --prompt-adapters {prompt_adapters}" + + chat_template = env.get("CM_VLLM_SERVER_CHAT_TEMPLATE", False) + if chat_template: + cmd_args += f" --chat-template {chat_template}" + + response_role = env.get("CM_VLLM_SERVER_RESPONSE_ROLE", False) + if response_role: + cmd_args += f" --response-role {response_role}" + + ssl_keyfile = env.get("CM_VLLM_SERVER_SSL_KEYFILE", False) + if ssl_keyfile: + cmd_args += f" --ssl-keyfile {ssl_keyfile}" + + ssl_certfile = env.get("CM_VLLM_SERVER_SSL_CERTFILE", False) + if ssl_certfile: + cmd_args += f" --ssl-certfile {ssl_certfile}" + + ssl_ca_certs = env.get("CM_VLLM_SERVER_SSL_CA_CERTS", False) + if ssl_ca_certs: + cmd_args += f" --ssl-ca-certs {ssl_ca_certs}" + + ssl_cert_reqs = env.get("CM_VLLM_SERVER_SSL_CERT_REQS", False) + if ssl_cert_reqs: + cmd_args += f" --ssl-cert-reqs {ssl_cert_reqs}" + + root_path = env.get("CM_VLLM_SERVER_ROOT_PATH", False) + if root_path: + cmd_args += f" --root-path {root_path}" + + middleware = env.get("CM_VLLM_SERVER_MIDDLEWARE", False) + if middleware: + cmd_args += f" --middleware {middleware}" + + tokenizer = env.get("CM_VLLM_SERVER_TOKENIZER", False) + if tokenizer: + cmd_args += f" --tokenizer {tokenizer}" + + skip_tokenizer_init = env.get("CM_VLLM_SERVER_SKIP_TOKENIZER_INIT", False) + if skip_tokenizer_init: + cmd_args += f" --skip-tokenizer-init" + + revision = env.get("CM_VLLM_SERVER_REVISION", False) + if revision: + cmd_args += f" --revision {revision}" + + code_revision = env.get("CM_VLLM_SERVER_CODE_REVISION", False) + if code_revision: + cmd_args += f" --code-revision {code_revision}" + + tokenizer_revision = env.get("CM_VLLM_SERVER_TOKENIZER_REVISION", False) + if tokenizer_revision: + cmd_args += f" --tokenizer-revision {tokenizer_revision}" + + tokenizer_mode = env.get("CM_VLLM_SERVER_TOKENIZER_MODE", False) + if tokenizer_mode: + cmd_args += f" --tokenizer-mode {tokenizer_mode}" + + trust_remote_code = env.get("CM_VLLM_SERVER_TRUST_REMOTE_CODE", False) + if trust_remote_code: + cmd_args += f" --trust-remote-code" + + download_dir = env.get("CM_VLLM_SERVER_DOWNLOAD_DIR", False) + if download_dir: + cmd_args += f" --download-dir {download_dir}" + + load_format = env.get("CM_VLLM_SERVER_LOAD_FORMAT", False) + if load_format: + cmd_args += f" --load-format {load_format}" + + dtype = env.get("CM_VLLM_SERVER_DTYPE", False) + if dtype: + cmd_args += f" --dtype {dtype}" + + kv_cache_dtype = env.get("CM_VLLM_SERVER_KV_CACHE_DTYPE", False) + if kv_cache_dtype: + cmd_args += f" --kv-cache-dtype {kv_cache_dtype}" + + quantization_param_path = env.get("CM_VLLM_SERVER_QUANTIZATION_PARAM_PATH", False) + if quantization_param_path: + cmd_args += f" --quantization-param-path {quantization_param_path}" + + max_model_len = env.get("CM_VLLM_SERVER_MAX_MODEL_LEN", False) + if max_model_len: + cmd_args += f" --max-model-len {max_model_len}" + + guided_decoding_backend = env.get("CM_VLLM_SERVER_GUIDED_DECODING_BACKEND", False) + if guided_decoding_backend: + cmd_args += f" --guided-decoding-backend {guided_decoding_backend}" + + worker_use_ray = env.get("CM_VLLM_SERVER_WORKER_USE_RAY", False) + if worker_use_ray: + cmd_args += f" --worker-use-ray" + + max_parallel_loading_workers = env.get("CM_VLLM_SERVER_MAX_PARALLEL_LOADING_WORKERS", False) + if max_parallel_loading_workers: + cmd_args += f" --max-parallel-loading-workers {max_parallel_loading_workers}" + + ray_workers_use_nsight = env.get("CM_VLLM_SERVER_RAY_WORKERS_USE_NSIGHT", False) + if ray_workers_use_nsight: + cmd_args += f" --ray-workers-use-nsight" + + block_size = env.get("CM_VLLM_SERVER_BLOCK_SIZE", False) + if block_size: + cmd_args += f" --block-size {block_size}" + + enable_prefix_caching = env.get("CM_VLLM_SERVER_ENABLE_PREFIX_CACHING", False) + if enable_prefix_caching: + cmd_args += f" --enable-prefix-caching" + + disable_sliding_window = env.get("CM_VLLM_SERVER_DISABLE_SLIDING_WINDOW", False) + if disable_sliding_window: + cmd_args += f" --disable-sliding-window" + + use_v2_block_manager = env.get("CM_VLLM_SERVER_USE_V2_BLOCK_MANAGER", False) + if use_v2_block_manager: + cmd_args += f" --use-v2-block-manager" + + num_lookahead_slots = env.get("CM_VLLM_SERVER_NUM_LOOKAHEAD_SLOTS", False) + if num_lookahead_slots: + cmd_args += f" --num-lookahead-slots {num_lookahead_slots}" + + seed = env.get("CM_VLLM_SERVER_SEED", False) + if seed: + cmd_args += f" --seed {seed}" + + swap_space = env.get("CM_VLLM_SERVER_SWAP_SPACE", False) + if swap_space: + cmd_args += f" --swap-space {swap_space}" + + gpu_memory_utilization = env.get("CM_VLLM_SERVER_GPU_MEMORY_UTILIZATION", False) + if gpu_memory_utilization: + cmd_args += f" --gpu-memory-utilization {gpu_memory_utilization}" + + num_gpu_blocks_override = env.get("CM_VLLM_SERVER_NUM_GPU_BLOCKS_OVERRIDE", False) + if num_gpu_blocks_override: + cmd_args += f" --num-gpu-blocks-override {num_gpu_blocks_override}" + + max_num_batched_tokens = env.get("CM_VLLM_SERVER_MAX_NUM_BATCHED_TOKENS", False) + if max_num_batched_tokens: + cmd_args += f" --max-num-batched-tokens {max_num_batched_tokens}" + + max_num_seqs = env.get("CM_VLLM_SERVER_MAX_NUM_SEQS", False) + if max_num_seqs: + cmd_args += f" --max-num-seqs {max_num_seqs}" + + max_logprobs = env.get("CM_VLLM_SERVER_MAX_LOGPROBS", False) + if max_logprobs: + cmd_args += f" --max-logprobs {max_logprobs}" + + disable_log_stats = env.get("CM_VLLM_SERVER_DISABLE_LOG_STATS", False) + if disable_log_stats: + cmd_args += f" --disable-log-stats" + + quantization = env.get("CM_VLLM_SERVER_QUANTIZATION", False) + if quantization: + cmd_args += f" --quantization {quantization}" + + rope_scaling = env.get("CM_VLLM_SERVER_ROPE_SCALING", False) + if rope_scaling: + cmd_args += f" --rope-scaling {rope_scaling}" + + rope_theta = env.get("CM_VLLM_SERVER_ROPE_THETA", False) + if rope_theta: + cmd_args += f" --rope-theta {rope_theta}" + + enforce_eager = env.get("CM_VLLM_SERVER_ENFORCE_EAGER", False) + if enforce_eager: + cmd_args += f" --enforce-eager" + + max_context_len_to_capture = env.get("CM_VLLM_SERVER_MAX_CONTEXT_LEN_TO_CAPTURE", False) + if max_context_len_to_capture: + cmd_args += f" --max-context-len-to-capture {max_context_len_to_capture}" + + max_seq_len_to_capture = env.get("CM_VLLM_SERVER_MAX_SEQ_LEN_TO_CAPTURE", False) + if max_seq_len_to_capture: + cmd_args += f" --max-seq-len-to-capture {max_seq_len_to_capture}" + + disable_custom_all_reduce = env.get("CM_VLLM_SERVER_DISABLE_CUSTOM_ALL_REDUCE", False) + if disable_custom_all_reduce: + cmd_args += f" --disable-custom-all-reduce" + + tokenizer_pool_size = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_SIZE", False) + if tokenizer_pool_size: + cmd_args += f" --tokenizer-pool-size {tokenizer_pool_size}" + + tokenizer_pool_type = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_TYPE", False) + if tokenizer_pool_type: + cmd_args += f" --tokenizer-pool-type {tokenizer_pool_type}" + + tokenizer_pool_extra_config = env.get("CM_VLLM_SERVER_TOKENIZER_POOL_EXTRA_CONFIG", False) + if tokenizer_pool_extra_config: + cmd_args += f" --tokenizer-pool-extra-config {tokenizer_pool_extra_config}" + + enable_lora = env.get("CM_VLLM_SERVER_ENABLE_LORA", False) + if enable_lora: + cmd_args += f" --enable-lora" + + max_loras = env.get("CM_VLLM_SERVER_MAX_LORAS", False) + if max_loras: + cmd_args += f" --max-loras {max_loras}" + + max_lora_rank = env.get("CM_VLLM_SERVER_MAX_LORA_RANK", False) + if max_lora_rank: + cmd_args += f" --max-lora-rank {max_lora_rank}" + + lora_extra_vocab_size = env.get("CM_VLLM_SERVER_LORA_EXTRA_VOCAB_SIZE", False) + if lora_extra_vocab_size: + cmd_args += f" --lora-extra-vocab-size {lora_extra_vocab_size}" + + lora_dtype = env.get("CM_VLLM_SERVER_LORA_DTYPE", False) + if lora_dtype: + cmd_args += f" --lora-dtype {lora_dtype}" + + long_lora_scaling_factors = env.get("CM_VLLM_SERVER_LONG_LORA_SCALING_FACTORS", False) + if long_lora_scaling_factors: + cmd_args += f" --long-lora-scaling-factors {long_lora_scaling_factors}" + + max_cpu_loras = env.get("CM_VLLM_SERVER_MAX_CPU_LORAS", False) + if max_cpu_loras: + cmd_args += f" --max-cpu-loras {max_cpu_loras}" + + fully_sharded_loras = env.get("CM_VLLM_SERVER_FULLY_SHARDED_LORAS", False) + if fully_sharded_loras: + cmd_args += f" --fully-sharded-loras" + + enable_prompt_adapter = env.get("CM_VLLM_SERVER_ENABLE_PROMPT_ADAPTER", False) + if enable_prompt_adapter: + cmd_args += f" --enable-prompt-adapter" + + max_prompt_adapters = env.get("CM_VLLM_SERVER_MAX_PROMPT_ADAPTERS", False) + if max_prompt_adapters: + cmd_args += f" --max-prompt-adapters {max_prompt_adapters}" + + max_prompt_adapter_token = env.get("CM_VLLM_SERVER_MAX_PROMPT_ADAPTER_TOKEN", False) + if max_prompt_adapter_token: + cmd_args += f" --max-prompt-adapter-token {max_prompt_adapter_token}" + + device = env.get("CM_VLLM_SERVER_DEVICE", False) + if device: + cmd_args += f" --device {device}" + + scheduler_delay_factor = env.get("CM_VLLM_SERVER_SCHEDULER_DELAY_FACTOR", False) + if scheduler_delay_factor: + cmd_args += f" --scheduler-delay-factor {scheduler_delay_factor}" + + enable_chunked_prefill = env.get("CM_VLLM_SERVER_ENABLE_CHUNKED_PREFILL", False) + if enable_chunked_prefill: + cmd_args += f" --enable-chunked-prefill" + + speculative_model = env.get("CM_VLLM_SERVER_SPECULATIVE_MODEL", False) + if speculative_model: + cmd_args += f" --speculative-model {speculative_model}" + + num_speculative_tokens = env.get("CM_VLLM_SERVER_NUM_SPECULATIVE_TOKENS", False) + if num_speculative_tokens: + cmd_args += f" --num-speculative-tokens {num_speculative_tokens}" + + speculative_draft_tensor_parallel_size = env.get("CM_VLLM_SERVER_SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE", False) + if speculative_draft_tensor_parallel_size: + cmd_args += f" --speculative-draft-tensor-parallel-size {speculative_draft_tensor_parallel_size}" + + speculative_max_model_len = env.get("CM_VLLM_SERVER_SPECULATIVE_MAX_MODEL_LEN", False) + if speculative_max_model_len: + cmd_args += f" --speculative-max-model-len {speculative_max_model_len}" + + speculative_disable_by_batch_size = env.get("CM_VLLM_SERVER_SPECULATIVE_DISABLE_BY_BATCH_SIZE", False) + if speculative_disable_by_batch_size: + cmd_args += f" --speculative-disable-by-batch-size {speculative_disable_by_batch_size}" + + ngram_prompt_lookup_max = env.get("CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MAX", False) + if ngram_prompt_lookup_max: + cmd_args += f" --ngram-prompt-lookup-max {ngram_prompt_lookup_max}" + + ngram_prompt_lookup_min = env.get("CM_VLLM_SERVER_NGRAM_PROMPT_LOOKUP_MIN", False) + if ngram_prompt_lookup_min: + cmd_args += f" --ngram-prompt-lookup-min {ngram_prompt_lookup_min}" + + spec_decoding_acceptance_method = env.get("CM_VLLM_SERVER_SPEC_DECODING_ACCEPTANCE_METHOD", False) + if spec_decoding_acceptance_method: + cmd_args += f" --spec-decoding-acceptance-method {spec_decoding_acceptance_method}" + + typical_acceptance_sampler_posterior_threshold = env.get("CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD", False) + if typical_acceptance_sampler_posterior_threshold: + cmd_args += f" --typical-acceptance-sampler-posterior-threshold {typical_acceptance_sampler_posterior_threshold}" + + typical_acceptance_sampler_posterior_alpha = env.get("CM_VLLM_SERVER_TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA", False) + if typical_acceptance_sampler_posterior_alpha: + cmd_args += f" --typical-acceptance-sampler-posterior-alpha {typical_acceptance_sampler_posterior_alpha}" + + model_loader_extra_config = env.get("CM_VLLM_SERVER_MODEL_LOADER_EXTRA_CONFIG", False) + if model_loader_extra_config: + cmd_args += f" --model-loader-extra-config {model_loader_extra_config}" + + preemption_mode = env.get("CM_VLLM_SERVER_PREEMPTION_MODE", False) + if preemption_mode: + cmd_args += f" --preemption-mode {preemption_mode}" + + served_model_name = env.get("CM_VLLM_SERVER_SERVED_MODEL_NAME", False) + if served_model_name: + cmd_args += f" --served-model-name {served_model_name}" + + qlora_adapter_name_or_path = env.get("CM_VLLM_SERVER_QLORA_ADAPTER_NAME_OR_PATH", False) + if qlora_adapter_name_or_path: + cmd_args += f" --qlora-adapter-name-or-path {qlora_adapter_name_or_path}" + + otlp_traces_endpoint = env.get("CM_VLLM_SERVER_OTLP_TRACES_ENDPOINT", False) + if otlp_traces_endpoint: + cmd_args += f" --otlp-traces-endpoint {otlp_traces_endpoint}" + + engine_use_ray = env.get("CM_VLLM_SERVER_ENGINE_USE_RAY", False) + if engine_use_ray: + cmd_args += f" --engine-use-ray" + + disable_log_requests = env.get("CM_VLLM_SERVER_DISABLE_LOG_REQUESTS", False) + if disable_log_requests: + cmd_args += f" --disable-log-requests" + + max_log_len = env.get("CM_VLLM_SERVER_MAX_LOG_LEN", False) + if max_log_len: + cmd_args += f" --max-log-len {max_log_len}" + cmd = f"{env['CM_PYTHON_BIN_WITH_PATH']} -m vllm.entrypoints.openai.api_server {cmd_args}" print(cmd) diff --git a/script/run-vllm-server/dockerfiles/.dockerignore b/script/run-vllm-server/dockerfiles/.dockerignore new file mode 100644 index 0000000000..6b8710a711 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/.dockerignore @@ -0,0 +1 @@ +.git diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile new file mode 100644 index 0000000000..61cb8bb4c9 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat new file mode 100644 index 0000000000..998a0d9230 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh new file mode 100644 index 0000000000..4420c7341b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile new file mode 100644 index 0000000000..cbf91bdaea --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat new file mode 100644 index 0000000000..4b28e3c6c7 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh new file mode 100644 index 0000000000..17521deb81 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat new file mode 100644 index 0000000000..e09e482494 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh new file mode 100644 index 0000000000..e09e482494 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile new file mode 100644 index 0000000000..2db21f4ae4 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile @@ -0,0 +1,45 @@ +FROM nvcr.io/nvidia/pytorch:24.06-py3 + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat new file mode 100644 index 0000000000..b13a780050 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh new file mode 100644 index 0000000000..71754a9cf1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat new file mode 100644 index 0000000000..24e904e9f1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh new file mode 100644 index 0000000000..24e904e9f1 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/tmp-run.sh b/script/run-vllm-server/dockerfiles/tmp-run.sh new file mode 100755 index 0000000000..65c5270f0e --- /dev/null +++ b/script/run-vllm-server/dockerfiles/tmp-run.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +export CM_DOCKER_BUILD_ARGS="GID=\" $(id -g $USER) \" UID=\" $(id -u $USER) \" ${CM_DOCKER_BUILD_ARGS}" +export CM_BUILD_DOCKERFILE="no" +export CM_DOCKERFILE_WITH_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" +export CM_DOCKER_ADD_ALL_GPUS="True" +export CM_DOCKER_BUILD_ARGS="--build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \"" +export CM_DOCKER_BUILD_CMD="docker build --no-cache --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ." +export CM_DOCKER_CACHE="no" +export CM_DOCKER_CACHE_ARG=" --no-cache" +export CM_DOCKER_DETACHED_MODE="no" +export CM_DOCKER_EXTRA_RUN_ARGS=" --ulimit memlock=-1" +export CM_DOCKER_IMAGE_BASE="ubuntu:22.04" +export CM_DOCKER_IMAGE_NAME="cm-script-run-vllm-server" +export CM_DOCKER_IMAGE_RECREATE="yes" +export CM_DOCKER_IMAGE_REPO="cknowledge" +export CM_DOCKER_IMAGE_TAG="ubuntu-22.04-latest" +export CM_DOCKER_IMAGE_TAG_EXTRA="-latest" +export CM_DOCKER_INTERACTIVE_MODE="True" +export CM_DOCKER_OS="ubuntu" +export CM_DOCKER_OS_VERSION="22.04" +export CM_DOCKER_PORT_MAPS="['8000:8000']" +export CM_DOCKER_PRE_RUN_COMMANDS="[]" +export CM_DOCKER_RUN_CMD="cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --docker_run_deps " +export CM_DOCKER_RUN_SCRIPT_TAGS="run,server,vllm,vllm-server" +export CM_DOCKER_VOLUME_MOUNTS="[]" +export CM_MLOPS_REPO="anandhu-eng@cm4mlops" +export CM_QUIET="yes" +export CM_REAL_RUN="True" +export CM_RUN_STATE_DOCKER="True" +export CM_TMP_CURRENT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server" +export CM_TMP_CURRENT_SCRIPT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image" +export CM_TMP_CURRENT_SCRIPT_REPO_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" +export CM_TMP_CURRENT_SCRIPT_REPO_PATH_WITH_PREFIX="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" +export CM_TMP_CURRENT_SCRIPT_WORK_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles" +export CM_TMP_PIP_VERSION_STRING="" +export CM_VLLM_SERVER_API_KEY="" +export CM_VLLM_SERVER_MODEL_NAME="NousResearch/Hermes-2-Theta-Llama-3-8B" +export CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD="True" + + +. "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image/run.sh" diff --git a/script/run-vllm-server/dockerfiles/tmp-state.json b/script/run-vllm-server/dockerfiles/tmp-state.json new file mode 100644 index 0000000000..e03109d44a --- /dev/null +++ b/script/run-vllm-server/dockerfiles/tmp-state.json @@ -0,0 +1,3 @@ +{ + "docker": {} +} diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile new file mode 100644 index 0000000000..29c6ef0775 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:22.04 + +# Automatically generated by the CM workflow automation meta-framework +# https://github.com/mlcommons/ck + +LABEL github="" +LABEL maintainer="" +LABEL license="" + +SHELL ["/bin/bash", "-c"] + +ARG UID=1000 +ARG GID=1000 +ARG CM_GH_TOKEN + + +# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes +# Install system dependencies +RUN apt-get update -y +RUN apt-get install -y python3 python3-pip git sudo wget python3-venv + +# Setup docker environment +ENTRYPOINT ["/bin/bash", "-c"] +ENV TZ="US/Pacific" +ENV PATH="${PATH}:/home/cmuser/.local/bin" +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone + +# Setup docker user +RUN groupadd -g $GID -o cm +RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser +RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +USER cmuser:cm +WORKDIR /home/cmuser + +# Install python packages +RUN python3 -m pip install --user cmind requests giturlparse tabulate + +# Download CM repo for scripts +RUN cm pull repo anandhu-eng@cm4mlops + +# Install all system dependencies +RUN cm run script --tags=get,sys-utils-cm --quiet + +# Run commands +RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat new file mode 100644 index 0000000000..84acbf7833 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat @@ -0,0 +1,5 @@ +docker build --no-cache ^ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" ^ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ + . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh new file mode 100644 index 0000000000..29217c2769 --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh @@ -0,0 +1,5 @@ +docker build --no-cache \ + --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ + -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" \ + -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ + . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat new file mode 100644 index 0000000000..4ef7c0806b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat @@ -0,0 +1 @@ +docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh new file mode 100644 index 0000000000..4ef7c0806b --- /dev/null +++ b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh @@ -0,0 +1 @@ +docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/tmp-state.json b/script/run-vllm-server/tmp-state.json new file mode 100644 index 0000000000..e03109d44a --- /dev/null +++ b/script/run-vllm-server/tmp-state.json @@ -0,0 +1,3 @@ +{ + "docker": {} +} From 78c1452a716c83551fe963934039596b909e8bf5 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:12:52 +0530 Subject: [PATCH 19/29] clean temp files --- .../run-vllm-server/dockerfiles/.dockerignore | 1 - ...n8.9-aarch64-ubuntu22.04-public.Dockerfile | 45 ------------------- ...64-ubuntu22.04-public.Dockerfile.build.bat | 5 --- ...h64-ubuntu22.04-public.Dockerfile.build.sh | 5 --- ...nn8.9-x86_64-ubuntu20.04-public.Dockerfile | 45 ------------------- ...64-ubuntu20.04-public.Dockerfile.build.bat | 5 --- ..._64-ubuntu20.04-public.Dockerfile.build.sh | 5 --- ...6_64-ubuntu20.04-public.Dockerfile.run.bat | 1 - ...86_64-ubuntu20.04-public.Dockerfile.run.sh | 1 - .../dockerfiles/pytorch:24.06-py3.Dockerfile | 45 ------------------- .../pytorch:24.06-py3.Dockerfile.build.bat | 5 --- .../pytorch:24.06-py3.Dockerfile.build.sh | 5 --- .../pytorch:24.06-py3.Dockerfile.run.bat | 1 - .../pytorch:24.06-py3.Dockerfile.run.sh | 1 - script/run-vllm-server/dockerfiles/tmp-run.sh | 42 ----------------- .../dockerfiles/tmp-state.json | 3 -- .../dockerfiles/ubuntu_22.04.Dockerfile | 45 ------------------- .../ubuntu_22.04.Dockerfile.build.bat | 5 --- .../ubuntu_22.04.Dockerfile.build.sh | 5 --- .../ubuntu_22.04.Dockerfile.run.bat | 1 - .../ubuntu_22.04.Dockerfile.run.sh | 1 - 21 files changed, 272 deletions(-) delete mode 100644 script/run-vllm-server/dockerfiles/.dockerignore delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh delete mode 100755 script/run-vllm-server/dockerfiles/tmp-run.sh delete mode 100644 script/run-vllm-server/dockerfiles/tmp-state.json delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat delete mode 100644 script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh diff --git a/script/run-vllm-server/dockerfiles/.dockerignore b/script/run-vllm-server/dockerfiles/.dockerignore deleted file mode 100644 index 6b8710a711..0000000000 --- a/script/run-vllm-server/dockerfiles/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.git diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile deleted file mode 100644 index 61cb8bb4c9..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat deleted file mode 100644 index 998a0d9230..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh deleted file mode 100644 index 4420c7341b..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-aarch64-ubuntu22.04-public.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile deleted file mode 100644 index cbf91bdaea..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat deleted file mode 100644 index 4b28e3c6c7..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh deleted file mode 100644 index 17521deb81..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat deleted file mode 100644 index e09e482494..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh deleted file mode 100644 index e09e482494..0000000000 --- a/script/run-vllm-server/dockerfiles/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile deleted file mode 100644 index 2db21f4ae4..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM nvcr.io/nvidia/pytorch:24.06-py3 - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat deleted file mode 100644 index b13a780050..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh deleted file mode 100644 index 71754a9cf1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat deleted file mode 100644 index 24e904e9f1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh deleted file mode 100644 index 24e904e9f1..0000000000 --- a/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run -it --entrypoint '' --gpus=all --ulimit memlock=-1 -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c '(cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True && bash ) || bash' diff --git a/script/run-vllm-server/dockerfiles/tmp-run.sh b/script/run-vllm-server/dockerfiles/tmp-run.sh deleted file mode 100755 index 65c5270f0e..0000000000 --- a/script/run-vllm-server/dockerfiles/tmp-run.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -export CM_DOCKER_BUILD_ARGS="GID=\" $(id -g $USER) \" UID=\" $(id -u $USER) \" ${CM_DOCKER_BUILD_ARGS}" -export CM_BUILD_DOCKERFILE="no" -export CM_DOCKERFILE_WITH_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -export CM_DOCKER_ADD_ALL_GPUS="True" -export CM_DOCKER_BUILD_ARGS="--build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \"" -export CM_DOCKER_BUILD_CMD="docker build --no-cache --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/pytorch:24.06-py3.Dockerfile" -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ." -export CM_DOCKER_CACHE="no" -export CM_DOCKER_CACHE_ARG=" --no-cache" -export CM_DOCKER_DETACHED_MODE="no" -export CM_DOCKER_EXTRA_RUN_ARGS=" --ulimit memlock=-1" -export CM_DOCKER_IMAGE_BASE="ubuntu:22.04" -export CM_DOCKER_IMAGE_NAME="cm-script-run-vllm-server" -export CM_DOCKER_IMAGE_RECREATE="yes" -export CM_DOCKER_IMAGE_REPO="cknowledge" -export CM_DOCKER_IMAGE_TAG="ubuntu-22.04-latest" -export CM_DOCKER_IMAGE_TAG_EXTRA="-latest" -export CM_DOCKER_INTERACTIVE_MODE="True" -export CM_DOCKER_OS="ubuntu" -export CM_DOCKER_OS_VERSION="22.04" -export CM_DOCKER_PORT_MAPS="['8000:8000']" -export CM_DOCKER_PRE_RUN_COMMANDS="[]" -export CM_DOCKER_RUN_CMD="cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --adr.cuda.version=12.4.1 --skip_docker_model_download=True --docker_run_deps " -export CM_DOCKER_RUN_SCRIPT_TAGS="run,server,vllm,vllm-server" -export CM_DOCKER_VOLUME_MOUNTS="[]" -export CM_MLOPS_REPO="anandhu-eng@cm4mlops" -export CM_QUIET="yes" -export CM_REAL_RUN="True" -export CM_RUN_STATE_DOCKER="True" -export CM_TMP_CURRENT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server" -export CM_TMP_CURRENT_SCRIPT_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image" -export CM_TMP_CURRENT_SCRIPT_REPO_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" -export CM_TMP_CURRENT_SCRIPT_REPO_PATH_WITH_PREFIX="/home/anandhu/CM/repos/anandhu-eng@cm4mlops" -export CM_TMP_CURRENT_SCRIPT_WORK_PATH="/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles" -export CM_TMP_PIP_VERSION_STRING="" -export CM_VLLM_SERVER_API_KEY="" -export CM_VLLM_SERVER_MODEL_NAME="NousResearch/Hermes-2-Theta-Llama-3-8B" -export CM_VLLM_SKIP_DOCKER_MODEL_DOWNLOAD="True" - - -. "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/build-docker-image/run.sh" diff --git a/script/run-vllm-server/dockerfiles/tmp-state.json b/script/run-vllm-server/dockerfiles/tmp-state.json deleted file mode 100644 index e03109d44a..0000000000 --- a/script/run-vllm-server/dockerfiles/tmp-state.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "docker": {} -} diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile deleted file mode 100644 index 29c6ef0775..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -FROM ubuntu:22.04 - -# Automatically generated by the CM workflow automation meta-framework -# https://github.com/mlcommons/ck - -LABEL github="" -LABEL maintainer="" -LABEL license="" - -SHELL ["/bin/bash", "-c"] - -ARG UID=1000 -ARG GID=1000 -ARG CM_GH_TOKEN - - -# Notes: https://runnable.com/blog/9-common-dockerfile-mistakes -# Install system dependencies -RUN apt-get update -y -RUN apt-get install -y python3 python3-pip git sudo wget python3-venv - -# Setup docker environment -ENTRYPOINT ["/bin/bash", "-c"] -ENV TZ="US/Pacific" -ENV PATH="${PATH}:/home/cmuser/.local/bin" -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ >/etc/timezone - -# Setup docker user -RUN groupadd -g $GID -o cm -RUN useradd -m -u $UID -g $GID -o --create-home --shell /bin/bash cmuser -RUN echo "cmuser ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -USER cmuser:cm -WORKDIR /home/cmuser - -# Install python packages -RUN python3 -m pip install --user cmind requests giturlparse tabulate - -# Download CM repo for scripts -RUN cm pull repo anandhu-eng@cm4mlops - -# Install all system dependencies -RUN cm run script --tags=get,sys-utils-cm --quiet - -# Run commands -RUN cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= --quiet --fake_run --env.CM_RUN_STATE_DOCKER=True diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat deleted file mode 100644 index 84acbf7833..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.bat +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache ^ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" ^ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" ^ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" ^ - . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh deleted file mode 100644 index 29217c2769..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.build.sh +++ /dev/null @@ -1,5 +0,0 @@ -docker build --no-cache \ - --build-arg GID=\" $(id -g $USER) \" --build-arg UID=\" $(id -u $USER) \" \ - -f "/home/anandhu/CM/repos/anandhu-eng@cm4mlops/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile" \ - -t "cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest" \ - . diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat deleted file mode 100644 index 4ef7c0806b..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.bat +++ /dev/null @@ -1 +0,0 @@ -docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' diff --git a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh b/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh deleted file mode 100644 index 4ef7c0806b..0000000000 --- a/script/run-vllm-server/dockerfiles/ubuntu_22.04.Dockerfile.run.sh +++ /dev/null @@ -1 +0,0 @@ -docker run --entrypoint '' -p 8000:8000 cknowledge/cm-script-run-vllm-server:ubuntu-22.04-latest bash -c 'cm run script --tags=run,vllm-server --model=NousResearch/Hermes-2-Theta-Llama-3-8B --api_key= ' From 5ad392405dfd4bdaefbc7b569f7500f5e4016aee Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 17 Jul 2024 23:23:37 +0530 Subject: [PATCH 20/29] delete temp file --- script/run-vllm-server/tmp-state.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 script/run-vllm-server/tmp-state.json diff --git a/script/run-vllm-server/tmp-state.json b/script/run-vllm-server/tmp-state.json deleted file mode 100644 index e03109d44a..0000000000 --- a/script/run-vllm-server/tmp-state.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "docker": {} -} From 87a8bb58b98bc27f1dff53a3c00d01e8a28d2536 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 17 Jul 2024 21:11:40 +0100 Subject: [PATCH 21/29] Support criteo preprocessed dataset from MLC --- .../get-preprocessed-dataset-criteo/_cm.json | 146 ++++++++++++++++-- .../customize.py | 9 ++ 2 files changed, 138 insertions(+), 17 deletions(-) diff --git a/script/get-preprocessed-dataset-criteo/_cm.json b/script/get-preprocessed-dataset-criteo/_cm.json index 38af6754d0..ff22891bb4 100644 --- a/script/get-preprocessed-dataset-criteo/_cm.json +++ b/script/get-preprocessed-dataset-criteo/_cm.json @@ -31,37 +31,87 @@ "names": [ "dlrm-src" ], - "tags": "get,dlrm,src" + "tags": "get,dlrm,src", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { "names": [ "inference-src" ], - "tags": "mlperf,mlcommons,inference,source,src" + "tags": "mlperf,mlcommons,inference,source,src", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_scikit-learn" + "tags": "get,generic-python-lib,_scikit-learn", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_torch" + "tags": "get,generic-python-lib,_torch", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_opencv-python" + "tags": "get,generic-python-lib,_opencv-python", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_decorator" + "tags": "get,generic-python-lib,_decorator", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_psutil" + "tags": "get,generic-python-lib,_psutil", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_onnx" + "tags": "get,generic-python-lib,_onnx", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_tqdm" + "tags": "get,generic-python-lib,_tqdm", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } }, { - "tags": "get,generic-python-lib,_mlperf_logging" + "tags": "get,generic-python-lib,_mlperf_logging", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_PATH": [ + "on" + ] + } } ], "input_mapping": { @@ -70,7 +120,8 @@ "threads": "CM_NUM_PREPROCESS_THREADS" }, "new_env_keys": [ - "CM_DATASET_*" + "CM_DATASET_*", + "CM_CRITEO_PREPROCESSED_PATH" ], "tags": [ "get", @@ -118,6 +169,37 @@ "CM_CRITEO_FAKE": "yes" } }, + "mlc": { + "group": "src", + "env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": "yes", + "CM_DATASET_PREPROCESSED_PATH": "on" + } + }, + "multihot,mlc": { + "env": { + "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com", + "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed", + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH", + "CM_EXTRACT_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH", + "CM_EXTRACT_TO_FOLDER": "criteo-preprocessed" + }, + "deps": [ + { + "tags": "download-and-extract,_rclone", + "update_tags_from_env_with_prefix": { + "_url.": [ + "CM_RCLONE_URL" + ] + }, + "force_cache": true, + "names": [ + "dae" + ], + "extra_cache_tags": "criteo,preprocessed,dataset" + } + ] + }, "multihot": { "group": "type", "default": true, @@ -130,22 +212,52 @@ "names": [ "mlperf-training", "training-src" - ] + ], + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.typing_inspect" + "tags": "get,generic-python-lib,_package.typing_inspect", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.iopath" + "tags": "get,generic-python-lib,_package.iopath", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.fbgemm_gpu" + "tags": "get,generic-python-lib,_package.fbgemm_gpu", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.torchrec" + "tags": "get,generic-python-lib,_package.torchrec", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } }, { - "tags": "get,generic-python-lib,_package.pyre_extensions" + "tags": "get,generic-python-lib,_package.pyre_extensions", + "skip_if_env": { + "CM_DATASET_PREPROCESSED_CRITEO_FROM_MLC": [ + "yes" + ] + } } ] } diff --git a/script/get-preprocessed-dataset-criteo/customize.py b/script/get-preprocessed-dataset-criteo/customize.py index d6826e38cb..b1f2d03bc9 100644 --- a/script/get-preprocessed-dataset-criteo/customize.py +++ b/script/get-preprocessed-dataset-criteo/customize.py @@ -29,3 +29,12 @@ def preprocess(i): print("Using MLCommons Training source from '" + env['CM_MLPERF_TRAINING_SOURCE'] +"'") return {'return': 0} + +def postprocess(i): + + env = i['env'] + env['CM_CRITEO_PREPROCESSED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH'] + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_CRITEO_PREPROCESSED_PATH'] + + return {'return': 0} From 2ea8d867246a457a590f4e83858e7d8e10d3d9c4 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 18 Jul 2024 11:19:20 +0100 Subject: [PATCH 22/29] Added --max_test_duration for test mode --- script/run-mlperf-inference-app/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 706fd1dd2d..a66a7a620a 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -101,6 +101,7 @@ input_mapping: tp_size: CM_NVIDIA_TP_SIZE vllm_model_name: CM_VLLM_SERVER_MODEL_NAME num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS + max_test_duration: CM_MLPERF_MAX_DURATION_TEST new_state_keys: - app_mlperf_inference_* From eecc2fd63bb37cffdff4541a20e59141d8521bea Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 18 Jul 2024 11:27:59 +0100 Subject: [PATCH 23/29] Takes in all mlperf inference durations as seconds --- .../generate-mlperf-inference-user-conf/customize.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index 6f9af42fd0..e64b036025 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -256,7 +256,8 @@ def preprocess(i): ranging_user_conf += ml_model_name + "." + scenario + ".min_duration = 300000" + "\n" if env['CM_MLPERF_RUN_STYLE'] == "test": - max_duration_test = env.get('CM_MLPERF_MAX_DURATION_TEST', 30000) + max_duration_test_s = env.get('CM_MLPERF_MAX_DURATION_TEST', 30) + max_duration_test = max_duration_test_s * 1000 # in milliseconds query_count = env.get('CM_TEST_QUERY_COUNT', "5") user_conf += ml_model_name + "." + scenario + ".max_query_count = " + query_count + "\n" user_conf += ml_model_name + "." + scenario + ".min_query_count = " + query_count + "\n" @@ -271,7 +272,8 @@ def preprocess(i): elif env['CM_MLPERF_RUN_STYLE'] == "fast": user_conf += ml_model_name + "." + scenario + ".sample_concatenate_permutation = 0" + "\n" - max_duration_fast = env.get('CM_MLPERF_MAX_DURATION_FAST', 120000) + max_duration_fast_s = env.get('CM_MLPERF_MAX_DURATION_FAST', 120) + max_duration_fast = max_duration_fast_s * 1000 # in milliseconds if scenario == "Server": user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_fast}" + "\n" target_qps = conf['target_qps'] @@ -280,8 +282,10 @@ def preprocess(i): env['CM_MLPERF_MAX_QUERY_COUNT'] = query_count else: - max_duration_valid = env.get('CM_MLPERF_MAX_DURATION_VALID', 660000) - max_duration_ranging = env.get('CM_MLPERF_MAX_DURATION_RANGING', 300000) + max_duration_valid_s = env.get('CM_MLPERF_MAX_DURATION_VALID', 660) + max_duration_valid = max_duration_valid_s * 1000 # in milliseconds + max_duration_ranging_s = env.get('CM_MLPERF_MAX_DURATION_RANGING', 300) + max_duration_ranging = max_duration_ranging_s * 1000 # in milliseconds if scenario == "MultiStream" or scenario == "SingleStream": if env.get('CM_MLPERF_USE_MAX_DURATION', 'yes').lower() not in [ "no", "false", "0" ] and env.get('CM_MLPERF_MODEL_EQUAL_ISSUE_MODE', 'no').lower() not in [ "yes", "1", "true" ]: user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_valid}" + "\n" From 6863cbcdc4835d45c6e1fbc587bfa43c400d7371 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 18 Jul 2024 11:38:40 +0100 Subject: [PATCH 24/29] fixed dtype for durations --- .../customize.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index e64b036025..e5a2ca133b 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -256,8 +256,8 @@ def preprocess(i): ranging_user_conf += ml_model_name + "." + scenario + ".min_duration = 300000" + "\n" if env['CM_MLPERF_RUN_STYLE'] == "test": - max_duration_test_s = env.get('CM_MLPERF_MAX_DURATION_TEST', 30) - max_duration_test = max_duration_test_s * 1000 # in milliseconds + max_duration_test_s = int(env.get('CM_MLPERF_MAX_DURATION_TEST', 30)) + max_duration_test = str(max_duration_test_s * 1000) # in milliseconds query_count = env.get('CM_TEST_QUERY_COUNT', "5") user_conf += ml_model_name + "." + scenario + ".max_query_count = " + query_count + "\n" user_conf += ml_model_name + "." + scenario + ".min_query_count = " + query_count + "\n" @@ -272,8 +272,8 @@ def preprocess(i): elif env['CM_MLPERF_RUN_STYLE'] == "fast": user_conf += ml_model_name + "." + scenario + ".sample_concatenate_permutation = 0" + "\n" - max_duration_fast_s = env.get('CM_MLPERF_MAX_DURATION_FAST', 120) - max_duration_fast = max_duration_fast_s * 1000 # in milliseconds + max_duration_fast_s = int(env.get('CM_MLPERF_MAX_DURATION_FAST', 120)) + max_duration_fast = str(max_duration_fast_s * 1000) # in milliseconds if scenario == "Server": user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_fast}" + "\n" target_qps = conf['target_qps'] @@ -282,10 +282,10 @@ def preprocess(i): env['CM_MLPERF_MAX_QUERY_COUNT'] = query_count else: - max_duration_valid_s = env.get('CM_MLPERF_MAX_DURATION_VALID', 660) - max_duration_valid = max_duration_valid_s * 1000 # in milliseconds - max_duration_ranging_s = env.get('CM_MLPERF_MAX_DURATION_RANGING', 300) - max_duration_ranging = max_duration_ranging_s * 1000 # in milliseconds + max_duration_valid_s = int(env.get('CM_MLPERF_MAX_DURATION_VALID', 660)) + max_duration_valid = str(max_duration_valid_s * 1000) # in milliseconds + max_duration_ranging_s = int(env.get('CM_MLPERF_MAX_DURATION_RANGING', 300)) + max_duration_ranging = str(max_duration_ranging_s * 1000) # in milliseconds if scenario == "MultiStream" or scenario == "SingleStream": if env.get('CM_MLPERF_USE_MAX_DURATION', 'yes').lower() not in [ "no", "false", "0" ] and env.get('CM_MLPERF_MODEL_EQUAL_ISSUE_MODE', 'no').lower() not in [ "yes", "1", "true" ]: user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_valid}" + "\n" From b4b12eab357b07d789df76b050e4daf5ee0842b7 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 18 Jul 2024 17:23:02 +0100 Subject: [PATCH 25/29] Docs update --- automation/script/module_misc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 33079214b9..2c72e059fb 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1077,6 +1077,10 @@ def doc(i): r = utils.save_txt(output_file, s) if r['return']>0: return r + out_docs_file = os.path.join("..", "docs", "scripts", category, alias, "index.md") + r = utils.save_txt(out_docs_file, s) + if r['return']>0: return r + return {'return':0} From 353e2219a2b00cabca7a6ec45c3464e81819995b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 18 Jul 2024 19:20:58 +0100 Subject: [PATCH 26/29] QPS -> Throughput in results table --- script/get-mlperf-inference-utils/mlperf_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/get-mlperf-inference-utils/mlperf_utils.py b/script/get-mlperf-inference-utils/mlperf_utils.py index 4a61d1c4af..8682c10660 100644 --- a/script/get-mlperf-inference-utils/mlperf_utils.py +++ b/script/get-mlperf-inference-utils/mlperf_utils.py @@ -220,8 +220,9 @@ def get_result_string(version, model, scenario, result_path, has_power, sub_res, return result_string, result def get_result_table(results): + - headers = ["Model", "Scenario", "Accuracy", "QPS", "Latency (in ms)", "Power Efficiency (in samples/J)", "TEST01", "TEST05", "TEST04"] + headers = ["Model", "Scenario", "Accuracy", "Throughput", "Latency (in ms)", "Power Efficiency (in samples/J)", "TEST01", "TEST05", "TEST04"] table = [] for model in results: for scenario in results[model]: From 98dfb2e9347cea1314033e02631193248da9ce7b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 19 Jul 2024 21:57:36 +0100 Subject: [PATCH 27/29] Removed pydantic version fix for bert --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 82804d846b..a29320f9c9 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -851,7 +851,6 @@ variations: CM_MLPERF_MODEL_SKIP_BATCHING: true deps: - tags: get,generic-python-lib,_package.pydantic - version_max: "1.10.9" - tags: get,generic-python-lib,_tokenization - tags: get,generic-python-lib,_six - tags: get,generic-python-lib,_package.absl-py From 20d73cf37df14387ed23e2742fd7361106a15830 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 22 Jul 2024 21:40:29 +0200 Subject: [PATCH 28/29] remove tmp files --- cm-repro/cm-run-script-input.json | 41 ------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 cm-repro/cm-run-script-input.json diff --git a/cm-repro/cm-run-script-input.json b/cm-repro/cm-run-script-input.json deleted file mode 100644 index 9fe20fd5e8..0000000000 --- a/cm-repro/cm-run-script-input.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "action": "run", - "automation": "script", - "tags": "run-mlperf,inference,_find-performance,_full", - "model": "mixtral-8x7b-99", - "implementation": "reference", - "framework": "pytorch", - "category": "edge", - "scenario": "Offline", - "execution_mode": "test", - "device": "cuda", - "test_query_count": "100", - "adr": { - "cuda": { - "version": "12.4.1" - } - }, - "quiet": true, - "repro": true, - "cmd": [ - "--tags=run-mlperf,inference,_find-performance,_full", - "--model=mixtral-8x7b-99", - "--implementation=reference", - "--framework=pytorch", - "--category=edge", - "--scenario=Offline", - "--execution_mode=test", - "--device=cuda", - "--test_query_count=100", - "--adr.cuda.version=12.4.1", - "--quiet", - "--repro" - ], - "out": "con", - "parsed_automation": [ - [ - "script", - "5b4e0237da074764" - ] - ] -} \ No newline at end of file From fb71007baece233785289f5e38839b198112dbee Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Mon, 22 Jul 2024 22:29:35 +0200 Subject: [PATCH 29/29] update automation, reproducibility and optimization challenges --- README.md | 92 ++++- .../README.md | 32 ++ .../_cm.json | 22 ++ .../README.md | 4 + .../_cm.yaml | 21 ++ .../README.md | 10 + .../_cm.yaml | 25 ++ .../README.md | 30 ++ .../_cm.json | 22 ++ .../connect-mlperf-with-medperf/README.md | 23 ++ .../connect-mlperf-with-medperf/_cm.json | 26 ++ .../README.md | 16 + .../_cm.json | 20 ++ .../README.md | 7 + .../_cm.json | 19 ++ .../README.md | 18 + .../_cm.json | 27 ++ .../README.md | 74 +++++ .../_cm.json | 27 ++ ...wd-benchmark-mlperf-bert-inference-cuda.md | 281 ++++++++++++++++ .../docs/generate-3d-unet-submission.md | 59 ++++ .../docs/generate-bert-submission.md | 80 +++++ .../docs/generate-resnet50-submission.md | 82 +++++ .../docs/generate-retinanet-submission.md | 67 ++++ .../docs/generate-rnnt-submission.md | 53 +++ .../docs/setup-aws-instance.md | 48 +++ .../docs/setup-gcp-instance.md | 35 ++ .../docs/setup-nvidia-jetson-orin.md | 53 +++ .../README.md | 83 +++++ .../_cm.json | 26 ++ .../docs/generate-3d-unet-submission.md | 67 ++++ .../docs/generate-bert-submission.md | 113 +++++++ .../docs/generate-resnet50-submission.md | 90 +++++ .../docs/generate-retinanet-submission.md | 75 +++++ .../docs/generate-rnnt-submission.md | 61 ++++ .../docs/setup-aws-instance.md | 50 +++ .../docs/setup-gcp-instance.md | 37 +++ .../docs/setup-nvidia-jetson-orin.md | 54 +++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 20 ++ .../_cm.json | 26 ++ .../README.md | 31 ++ .../_cm.json | 28 ++ .../README.md | 32 ++ .../_cm.json | 27 ++ .../README.md | 52 +++ .../_cm.json | 27 ++ .../README.md | 31 ++ .../_cm.json | 26 ++ .../README.md | 34 ++ .../_cm.json | 28 ++ .../README.md | 33 ++ .../_cm.json | 26 ++ .../README.md | 41 +++ .../_cm.json | 28 ++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 36 ++ .../_cm.json | 28 ++ .../repro-mlperf-inf-v3.0-orin/README.md | 16 + challenge/repro-mlperf-inf-v3.0-orin/_cm.json | 23 ++ .../README.md | 39 +++ .../_cm.json | 20 ++ .../README.md | 3 + .../repro-mlperf-inference-v4.0-2024/_cm.yaml | 25 ++ .../README.md | 4 + .../repro-mlperf-inference-v4.1-2024/_cm.yaml | 22 ++ .../README.md | 36 ++ .../_cm.json | 23 ++ .../README.md | 17 + .../_cm.json | 23 ++ challenge/run-mlperf@home-v3.1-cpu/README.md | 67 ++++ challenge/run-mlperf@home-v3.1-cpu/_cm.json | 21 ++ .../run-cpu-bert-99-deepsparse.md | 100 ++++++ ...cpu-dse-mobilenets-efficientnets-tflite.md | 77 +++++ challenge/run-mlperf@home-v3.1-gpu/README.md | 65 ++++ challenge/run-mlperf@home-v3.1-gpu/_cm.json | 20 ++ ...idia-gpu-bert-99-nvidia-docker-tensorrt.md | 193 +++++++++++ .../run-nvidia-gpu-gpt-j-6b-ref-pytorch.md | 314 ++++++++++++++++++ .../train-llm-for-cm-mlperf-2023/README.md | 20 ++ .../train-llm-for-cm-mlperf-2023/_cm.json | 21 ++ .../README.md | 10 + .../_cm.json | 7 + ...wd-benchmark-mlperf-bert-inference-cuda.md | 285 ++++++++++++++++ .../docs/generate-bert-submission.md | 87 +++++ .../docs/generate-resnet50-submission.md | 74 +++++ .../docs/run-nvidia-implementation.md | 47 +++ .../docs/setup-aws-graviton.md | 25 ++ .../get-mlperf-inference-repos.cmd | 3 + .../README.md | 93 ++++++ .../_cm.json | 16 + .../_cm.json | 17 + .../_cm.json | 17 + .../_cm.json | 17 + .../README.md | 42 +++ .../_cm.yaml | 45 +++ .../customize.py | 0 .../install_deps.sh | 15 + .../install_deps_cuda.sh | 15 + .../install_deps_driver.sh | 15 + .../install_deps_pytorch.sh | 15 + .../install_deps_transformers.sh | 17 + .../run.sh | 21 ++ .../run_figure11.sh | 20 ++ .../run_figure12.sh | 18 + .../run_figure13.sh | 19 ++ .../README.md | 61 ++++ .../_cm.yaml | 40 +++ .../customize.py | 22 ++ .../install_deps.sh | 28 ++ .../install_spec_deps.sh | 37 +++ .../plot.sh | 21 ++ .../run.sh | 23 ++ .../run_spec.sh | 23 ++ .../README.md | 74 +++++ .../_cm.yaml | 48 +++ .../customize.py | 22 ++ .../install_deps.sh | 15 + .../install_deps_gem5.sh | 12 + .../install_deps_kernel.sh | 12 + .../plot.sh | 28 ++ .../run.sh | 16 + .../README.md | 50 +++ .../_cm.yaml | 36 ++ .../install_deps.bat | 18 + .../install_deps.sh | 12 + .../plot.bat | 12 + .../plot.sh | 83 +++++ .../run.bat | 12 + .../run.sh | 49 +++ .../README.md | 30 ++ .../_cm.yaml | 20 ++ .../customize.py | 22 ++ .../install_deps.bat | 4 + .../install_deps.sh | 24 ++ .../main.py | 0 .../run.bat | 4 + .../run.sh | 41 +++ .../.gitignore | 1 + .../README.md | 74 +++++ .../_cm.yaml | 40 +++ .../check.sh | 15 + .../customize.py | 22 ++ .../install_deps.bat | 18 + .../install_deps.sh | 30 ++ .../main.py | 10 + .../plot.bat | 12 + .../plot.sh | 15 + .../plot_pregenerated.sh | 15 + .../run.bat | 12 + .../run.sh | 14 + .../Dockerfile | 28 ++ .../README.md | 40 +++ .../_cm.yaml | 30 ++ .../customize.py | 22 ++ .../install_deps.sh | 49 +++ .../plot.sh | 60 ++++ .../run.sh | 54 +++ .../Preliminary_build_onikiri.sh | 15 + .../Preliminary_create_binary.sh | 19 ++ .../Preliminary_experiment.sh | 30 ++ .../Preliminary_experiment_setup.sh | 13 + .../Preliminary_plot.sh | 15 + .../README.md | 49 +++ .../_cm.yaml | 55 +++ .../build_compiler.sh | 32 ++ .../build_onikiri.sh | 14 + .../create_binary.sh | 24 ++ .../experiment.sh | 14 + .../experiment_setup.sh | 16 + .../install_deps.sh | 4 + .../plot.sh | 34 ++ .../README.md} | 19 +- .../_cm.yaml | 9 +- .../customize.py | 22 ++ .../install_deps.sh | 0 .../main.py | 10 + .../plot.sh | 0 .../run.sh | 0 .../README.md | 1 - .../reproduce-micro-paper-2023-xyz/README.md | 178 ---------- 182 files changed, 6518 insertions(+), 203 deletions(-) create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/README.md create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/_cm.json create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json create mode 100644 challenge/connect-mlperf-with-medperf/README.md create mode 100644 challenge/connect-mlperf-with-medperf/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2023/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2024/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2024/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/README.md create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/README.md create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/_cm.json create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/README.md create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/README.md create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/README.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/_cm.json create mode 100644 project/mlperf-inference-v3.0-submissions/README.md create mode 100644 project/mlperf-inference-v3.0-submissions/_cm.json create mode 100644 project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md create mode 100644 project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md create mode 100644 project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/README.md create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json create mode 100644 report/mlperf-inference-v4.0-press-release-ctuning/_cm.json create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-22}/customize.py (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-33/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/plot.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-38/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-5}/main.py (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-5/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/.gitignore create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/check.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/main.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/run.bat create mode 100644 script/reproduce-ieee-acm-micro2023-paper-8/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/customize.py create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-85/run.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/README.md create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh create mode 100644 script/reproduce-ieee-acm-micro2023-paper-87/plot.sh rename script/{reproduce-micro-paper-2023-victima/README-extra.md => reproduce-ieee-acm-micro2023-paper-96/README.md} (64%) rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/_cm.yaml (84%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-96/customize.py rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/install_deps.sh (100%) create mode 100644 script/reproduce-ieee-acm-micro2023-paper-96/main.py rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/plot.sh (100%) rename script/{reproduce-micro-paper-2023-victima => reproduce-ieee-acm-micro2023-paper-96}/run.sh (100%) delete mode 100644 script/reproduce-micro-paper-2023-victima/README.md delete mode 100644 script/reproduce-micro-paper-2023-xyz/README.md diff --git a/README.md b/README.md index 02656175e9..d3dc40d94e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml) [![MLPerf inference MLCommons C++ ResNet50](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml) -This repository contains reusable and cross-platform automation recipes to run DevOps, MLOps, AIOps and MLPerf +This repository contains reusable and cross-platform automation recipes to run DevOps, MLOps, and MLPerf via a simple and human-readable [Collective Mind interface (CM)](https://github.com/mlcommons/ck) while adapting to different operating systems, software and hardware. @@ -19,31 +19,66 @@ and unified input/output to make them reusable in different projects either indi or by chaining them together into portable automation workflows, applications and web services adaptable to continuously changing models, data sets, software and hardware. -### Citing this project +We develop and test [CM scripts](script) as a community effort to support the following projects: +* [CM for MLPerf](https://docs.mlcommons.org/inference): modularize and automate MLPerf benchmarks + (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org), [OctoML](https://octoml.ai) and [cTuning.org](https://cTuning.org)) +* [CM for research and education](https://cTuning.org/ae): provide a common interface to automate and reproduce results from research papers + and MLPerf benchmarks (maintained by [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org)) +* [CM for ABTF](https://github.com/mlcommons/cm4abtf): provide a unified CM interface to run automotive benchmarks + (maintained by [MLCommons](https://mlcommons.org) and originally developed by [cKnowledge.org](https://cKnowledge.org)) +* [CM for optimization](https://access.cknowledge.org/playground/?action=challenges): co-design efficient and cost-effective + software and hardware for AI, ML and other emerging workloads via open challenges + (maintained by [cKnowledge.org](https://cKnowledge.org)) -Please use this [BibTeX file](https://github.com/mlcommons/ck/blob/master/citation.bib). +You can read this [ArXiv paper](https://arxiv.org/abs/2406.16791) to learn more about the CM motivation and long-term vision. + +Please provide your feedback or submit your issues [here](https://github.com/mlcommons/cm4mlops/issues). ## Catalog Online catalog: [cKnowledge](https://access.cknowledge.org/playground/?action=scripts), [MLCommons](https://docs.mlcommons.org/cm4mlops/scripts). -## Examples +## Citation -### Run image classificaiton via CM +Please use this [BibTeX file](https://github.com/mlcommons/ck/blob/master/citation.bib) to cite this project. -```bash -pip install cmind -U +## A few demos -cm pull repo mlcommons@cm4mlops --branch=dev +### Install CM and virtual env + +Install the [MLCommons CM automation language](https://access.cknowledge.org/playground/?action=install). -cmr "python app image-classification onnx" --quiet +### Pull this repository + +```bash +cm pull repo mlcommons@cm4mlops --branch=dev ``` -### Run MLPerf inference benchmark via CM +### Run image classification using CM ```bash -pip install cm4mlperf -U +cm run script "python app image-classification onnx _cpu" --help + +cm run script "download file _wget" --url=https://cKnowledge.org/ai/data/computer_mouse.jpg --verify=no --env.CM_DOWNLOAD_CHECKSUM=45ae5c940233892c2f860efdf0b66e7e +cm run script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cmr "python app image-classification onnx _cpu" --input=computer_mouse.jpg +cmr --tags=python,app,image-classification,onnx,_cpu --input=computer_mouse.jpg +cmr 3d5e908e472b417e --input=computer_mouse.jpg + +cm docker script "python app image-classification onnx _cpu" --input=computer_mouse.jpg + +cm gui script "python app image-classification onnx _cpu" +``` + +### Re-run experiments from the ACM/IEEE MICRO'23 paper + +Check this [script/reproduce-ieee-acm-micro2023-paper-96](README.md). + +### Run MLPerf ResNet CPU inference benchmark via CM + +```bash cm run script --tags=run-mlperf,inference,_performance-only,_short \ --division=open \ --category=edge \ @@ -62,6 +97,38 @@ cm run script --tags=run-mlperf,inference,_performance-only,_short \ --time ``` +### Run MLPerf BERT CUDA inference benchmark v4.1 via CM + +```bash +cmr "run-mlperf inference _find-performance _full _r4.1" \ + --model=bert-99 \ + --implementation=nvidia \ + --framework=tensorrt \ + --category=datacenter \ + --scenario=Offline \ + --execution_mode=test \ + --device=cuda \ + --docker \ + --docker_cm_repo=mlcommons@cm4mlops \ + --docker_cm_repo_flags="--branch=mlperf-inference" \ + --test_query_count=100 \ + --quiet +``` + +### Run MLPerf SDXL reference inference benchmark v4.1 via CM + +```bash +cm run script \ + --tags=run-mlperf,inference,_r4.1 \ + --model=sdxl \ + --implementation=reference \ + --framework=pytorch \ + --category=datacenter \ + --scenario=Offline \ + --execution_mode=valid \ + --device=cuda \ + --quiet +``` ## License @@ -72,6 +139,5 @@ cm run script --tags=run-mlperf,inference,_performance-only,_short \ We thank [cKnowledge.org](https://cKnowledge.org), [cTuning foundation](https://cTuning.org) and [MLCommons](https://mlcommons.org) for sponsoring this project! - -We also thank all [volunteers, collaborators and contributors](CONTRIBUTING.md) +We also thank all [volunteers, collaborators and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for their support, fruitful discussions, and useful feedback! diff --git a/challenge/add-derived-metrics-to-mlperf-inference/README.md b/challenge/add-derived-metrics-to-mlperf-inference/README.md new file mode 100644 index 0000000000..8302f63d68 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/README.md @@ -0,0 +1,32 @@ +### Challenge + +Check past MLPerf inference results in [this MLCommons repository](https://github.com/mlcommons/cm4mlperf-results) +and add derived metrics such as result/No of cores, power efficiency, device cost, operational costs, etc. + +Add clock speed as a third dimension to graphs and improve Bar graph visualization. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/add-derived-metrics-to-mlperf-inference/_cm.json b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json new file mode 100644 index 0000000000..cbdc212467 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "add-derived-metrics-to-mlperf-inference", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240204", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "derived-metrics", + "mlperf-inference", + "mlperf-inference-derived-metrics" + ], + "title": "Add derived metrics to MLPerf inference benchmarks (power efficiency, results / No of cores, costs, etc)", + "trophies": true, + "uid": "c65b56d7770946ee" +} diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md new file mode 100644 index 0000000000..a2059c0fe8 --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md @@ -0,0 +1,4 @@ +20240220: +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) +* On-going efforts: https://github.com/mlcommons/ck/issues/1052 diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b8b519d27f --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml @@ -0,0 +1,21 @@ +alias: automate-mlperf-inference-v3.1-and-v4.0-2024 +uid: f89f152fc2614240 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Add MLCommons CM workflows and unifed interface to automate MLPerf inference v3.1 and v4.0 benchmarks (Intel, Nvidia, Qualcomm, Arm64, TPU ...) + +date_open: '20231215' +date_close: '20240315' + +hot: true + +tags: +- automate +- mlperf-inference-v3.1-and-v4.0 +- 2024 + +experiments: +- tags: mlperf-inference,v3.1 +- tags: mlperf-inference,v4.0 diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md new file mode 100644 index 0000000000..adfbea7263 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md @@ -0,0 +1,10 @@ +This challenge is under preparation. You can read about the motivation behind this challenge in our [invited talk at MLPerf-Bench @ HPCA'24](https://doi.org/10.5281/zenodo.10786893). + +We plan to extend [MLCommons CM framework](https://github.com/mlcommons/ck) +to automatically compose high-performance and cost-efficient AI systems +based on MLPerf inference v4.0 results and [CM automation recipes](https://access.cknowledge.org/playground/?action=scripts). + +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) + +Contact the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) for more details. diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b1d4fe9f18 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024 +uid: 7c983102d89e4869 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: "Compose high-performance and cost-efficint AI systems using MLCommons' Collective Mind and MLPerf inference" + +date_open: '20240101' + +tags: +- compose +- ai +- systems +- mlperf-inference-v4.0 +- cm +- mlcommons-cm +- mlperf +- v4.0 +- performance +- energy +- cost + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md new file mode 100644 index 0000000000..306341271c --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md @@ -0,0 +1,30 @@ +### Challenge + +Connect CM workflows to run MLPerf inference benchmarks with [OpenBenchmarking.org](https://openbenchmarking.org). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + + +### Organizers + +* Michael Larabel +* Grigori Fursin +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +Results will be available at [OpenBenchmark.org](https://openbenchmarking.org) +and [MLCommons CK playgronud](https://access.cknowledge.org/playground/?action=experiments). diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json new file mode 100644 index 0000000000..c1e65aadbd --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "connect-mlperf-inference-v3.1-with-openbenchmarking", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20240101", + "date_close_extension": true, + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "openbenchmarking", + "mlperf-inference", + "mlperf-inference-openbenchmarking" + ], + "title": "Run MLPerf inference benchmarks using CM via OpenBenchmarking.org", + "trophies": true, + "uid": "534592626eb44efe" +} diff --git a/challenge/connect-mlperf-with-medperf/README.md b/challenge/connect-mlperf-with-medperf/README.md new file mode 100644 index 0000000000..f2f572bd48 --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/README.md @@ -0,0 +1,23 @@ +### Challenge + +Evaluate models from [MLCommons MedPerf platform](https://www.medperf.org) in terms of latency, throughput, power consumption and other metrics +using MLPerf loadgen and MLCommons CM automation language. + +See the [Nature 2023 article about MedPerf](https://www.nature.com/articles/s42256-023-00652-2) +and [ACM REP'23 keynote about CM](https://doi.org/10.5281/zenodo.8105339) to learn more about these projects. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) diff --git a/challenge/connect-mlperf-with-medperf/_cm.json b/challenge/connect-mlperf-with-medperf/_cm.json new file mode 100644 index 0000000000..d48d0a9fea --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "connect-mlperf-with-medperf", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240105", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "medperf", + "mlperf-inference", + "mlperf-inference-medperf", + "mlperf-inference-medperf", + "mlperf-inference-medperf-v3.1", + "mlperf-inference-medperf-v3.1-2023", + "v3.1" + ], + "title": "Connect MedPerf with MLPerf and CM", + "trophies": true, + "uid": "c26d1fbf89164728" +} diff --git a/challenge/optimize-mlperf-inference-scc2023/README.md b/challenge/optimize-mlperf-inference-scc2023/README.md new file mode 100644 index 0000000000..62a4826ad2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/README.md @@ -0,0 +1,16 @@ +### CM tutorial + +https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md + +### Challenge + +Reproduce and optimize MLPerf inference benchmarks during Student Cluster Competition at SuperComputing'23. + +See our [related challange from 2022]()https://access.cknowledge.org/playground/?action=challenges&name=repro-mlperf-inference-retinanet-scc2022). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge.org](https://cKnowledge.org) + diff --git a/challenge/optimize-mlperf-inference-scc2023/_cm.json b/challenge/optimize-mlperf-inference-scc2023/_cm.json new file mode 100644 index 0000000000..021872b15a --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "optimize-mlperf-inference-scc2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20231115", + "date_open": "20230915", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2023" + ], + "title": "Reproduce and optimize MLPerf inference v3.1 benchmarks at the Student Cluster Competition'23 at SuperComputing'23 using CM", + "uid": "ddaf594f84b14bc2" +} diff --git a/challenge/optimize-mlperf-inference-scc2024/README.md b/challenge/optimize-mlperf-inference-scc2024/README.md new file mode 100644 index 0000000000..1f9be23af2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/README.md @@ -0,0 +1,7 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to run MLPerf inference benchmark at the Student Cluster Competition'24. + +See [the CM-MLPerf tutorial for SCC'23](https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md). +Note that the MLPerf model will change in SCC'24 - please stay tuned for more details! + +See https://sc24.supercomputing.org/students/student-cluster-competition for more details about SCC. diff --git a/challenge/optimize-mlperf-inference-scc2024/_cm.json b/challenge/optimize-mlperf-inference-scc2024/_cm.json new file mode 100644 index 0000000000..ab75aa27a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/_cm.json @@ -0,0 +1,19 @@ +{ + "alias": "optimize-mlperf-inference-scc2024", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20241001", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2024" + ], + "title": "Run and optimize the MLPerf inference benchmark using CM at the Student Cluster Competition'24 at SuperComputing'24", + "uid": "f7fcba4c43ab4412" +} diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/README.md b/challenge/optimize-mlperf-inference-v2.1-2022/README.md new file mode 100644 index 0000000000..d0ac7cf15b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/README.md @@ -0,0 +1,18 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf inference v2.1 benchmarks across diverse implementations, software and hardware +using the [MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results are available [here](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v2.1). diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json new file mode 100644 index 0000000000..31cb5dffd2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v2.1-2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20220901", + "date_open": "20220701", + "experiments": [ + { + "tags": "mlperf-inference,v2.1" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v2.1", + "mlperf-inference-v2.1-2022", + "v2.1" + ], + "title": "Run and optimize MLPerf inference v2.1 benchmarks", + "uid": "2e13154b7fbb412d" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/README.md b/challenge/optimize-mlperf-inference-v3.0-2023/README.md new file mode 100644 index 0000000000..da6decc8c7 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/README.md @@ -0,0 +1,74 @@ +### Challenge + +Run MLPerf inference v3.0 benchmarks out-of-the-box across diverse implementations, software and hardware +using the [MLCommons CM automation language](https://github.com/mlcommons/ck) +and submit public results to the MLPerf inference v3.0 via [cTuning foundation](https://cTuning.org). + +* [GUI to run MLPerf inference benchmarks](https://cknowledge.org/mlperf-inference-gui) +* [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +This challenge has been successfully completed. + +### Results + +Official results: +* https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/cTuning +* https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning + +Results in the MLCommons CK/CM format: +* https://github.com/mlcommons/cm4mlperf-results + +Visualization and comparison with derived metrics: +* [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0). + +### The outcome + +We are very pleased to announce the successful outcome of the 1st +community challenge to run, reproduce and optimize MLPerf inference v3.0 +benchmarks: our MLCommons CK/CM workflow automation framework has helped +to prepare more than 80% of all submission results including 98% of power +results with very diverse technology and benchmark implementations from +Neural Magic, Qualcomm, cKnowledge Ltd, KRAI, cTuning foundation, Dell +Technologies, Hewlett Packard Enterprise, Lenovo, Hugging Face, NVIDIA, +Intel Corporation, AMD and Apple across diverse CPUs, GPUs and DSPs with +PyTorch, ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud +providers (GCP, AWS, Azure) and individual servers and edge devices +provided by our [volunteers](https://access.cknowledge.org/playground/?action=contributors). + +You can now see and compare all MLPerf inference results v3.0, v2.1 and +v2.0 online together with reproducibility reports including the +[MLPerf BERT model](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) +from the [Hugging Face Zoo](https://www.linkedin.com/company/huggingface/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D) +on [Nvidia Jetson Orin platform](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md#reproducing-the-nvidia-jetson-agx-orin-submission). +You can even create your own derived metrics (such as performance per Watt), +provide your own constraints using this [MLCommons repository](https://github.com/mlcommons/cm_inference_results) and visualize +them as shown in [this example](https://access.cknowledge.org/playground/?action=experiments&name=e472410ee67c41f9&x=Result&y=Power_Efficiency&filter=result[%27Result_Power%27]%3C35&derived_metrics=result%5B%27Power_Efficiency%27%5D%3D1000%2Fresult%5B%27Result_Power%27%5D&c=accelerator_model_name&axis_key_s=version). + +Additional thanks to [Michael Goin](https://www.linkedin.com/in/michael-goin) +from [Neural Magic](https://www.linkedin.com/company/neural-magic/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), our international +students including [Himanshu Dutta](https://www.linkedin.com/in/ACoAACpPCiMB7zUNStsqBmaOCtd100a7wXBGu_M?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +[Aditya Kumar Shaw](https://www.linkedin.com/in/ACoAACJ3ikUBjuHqi35ibm8CG6IEYv-v_VsobIs?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +Sachin Mudaliyar, [Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +and all [CK/CM users and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for helping us to +validate, use and improve this open-source technology to automate +benchmarking and optimization of AI/ML systems in terms of performance, +accuracy, power and costs! We are also grateful to [HiPEAC](https://www.linkedin.com/company/hipeac) +and [OctoML](https://www.linkedin.com/company/octoml) for +sponsoring initial development and Peter Mattson, David Kanter, Vijay +Janapa Reddi and Alexandros Karargyris for fruitful discussions. + + +### Dissemination + +* [Forbes article](https://www.forbes.com/sites/karlfreund/2023/04/05/nvidia-performance-trounces-all-competitors-who-have-the-guts-to-submit-to-mlperf-inference-30/?sh=3c38d2866676) +* [ZDNet article](https://www.zdnet.com/article/nvidia-dell-qualcomm-speed-up-ai-results-in-latest-benchmark-tests) +* [LinkedIn article from Grigori Fursin (MLCommons Task Force co-chair)]( https://www.linkedin.com/pulse/announcing-my-new-project-reproducible-optimization-co-design-fursin ) +* [Linkedin article from Arjun Suresh (MLCommons Task Force co-chair)](https://www.linkedin.com/posts/arjunsuresh_nvidia-performance-trounces-all-competitors-activity-7049500972275929088-nnnx?utm_source=share&utm_medium=member_desktop) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json new file mode 100644 index 0000000000..0baf3cfeea --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230301", + "date_open": "20230201", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.0", + "mlperf-inference-v3.0-2023", + "v3.0" + ], + "title": "Run and optimize MLPerf inference v3.0 benchmarks", + "uid": "57cbc3384d7640f9" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md new file mode 100644 index 0000000000..f6a17979ca --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md @@ -0,0 +1,281 @@ +# Crowd-benchmarking MLPerf BERT inference + +
+Click here to see the table of contents. + +* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) +* [System preparation](#system-preparation) + * [Minimal system requirements](#minimal-system-requirements) + * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) + * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) + * [Detect or install CUDA](#detect-or-install-cuda) + * [Test CUDA installation](#test-cuda-installation) + * [Install Python virtual environment](#install-python-virtual-environment) + * [Detect or install cuDNN](#detect-or-install-cudnn) + * [Detect or install TensorRT](#detect-or-install-tensorrt) + * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) + * [Try ONNX runtime backend](#try-onnx-runtime-backend) + * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) + * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) + * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) + * [Populate the README files](#populate-the-readme-files) + * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) + * [Push the results to GitHub repo](#push-the-results-to-github-repo) + * [Try PyTorch backend](#try-pytorch-backend) + * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) +* [The next steps](#the-next-steps) + +
+ + +This is a pilot community project to collaboratively run MLPerf BERT inference benchmark +across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). +However, instead of searching for extraterrestrial intelligence, we are +searching for optimal software/hardware combination to run various AI and ML workloads +in terms of performance, accuracy, power and costs ... + +This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment +across continuously evolving software, hardware, models and data. + +*If you submit your results before 1pm PST on Friday 3rd, 2023, + they will be accepted for the official MLPerf inference v3.0 submission round + and your name acknowledged in the notes!* + + +# System preparation + +## Minimal system requirements + +* CPU: any x86-64 or Arm64 based machine +* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ +* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 +* Disk space: ~10GB +* Python: 3.8+ +* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) + +## Install CM (CK2) automation meta-framework + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) +(the 2nd generation on the Collective Mind framework) on your system. + +## Pull CM repository with portable automation recipes + +Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +supporting portable MLOps and DevOps: + +```bash +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. + +We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) +required to run a given software project such as the MLPerf inference benchmark. + +These CM scripts simply wrap existing native scripts and tools as simple micro-services +with a human-readable CLI and simple Python API to be able to easily connect them together +and run on any platform in a unified way. + +## Detect or install CUDA + +Run the following CM script: +```bash +cm run script "get cuda" --out=json +``` + +If CUDA is automatically detected, it will be registered in the CM cache: +```bash +cm show cache --tags=get,cuda +``` + +Otherwise, this script will attempt to download and install the latest CUDA +from Nvidia website. + +Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). + +### Test CUDA installation + +You can test if CUDA toolkit and driver was detected or installed successfully using the following command: +```bash +cm run script "get cuda-devices" +``` + +You should see similar output: +```txt +Checking compiler version ... + +nvcc: NVIDIA (R) Cuda compiler driver +Copyright (c) 2005-2022 NVIDIA Corporation +Built on Wed_Sep_21_10:33:58_PDT_2022 +Cuda compilation tools, release 11.8, V11.8.89 +Build cuda_11.8.r11.8/compiler.31833905_0 + +Compiling program ... + +Running program ... + + - Running postprocess ... +GPU Device ID: 0 +GPU Name: Tesla K80 +GPU compute capability: 3.7 +CUDA driver version: 11.4 +CUDA runtime version: 11.8 +Global memory: 11997020160 +Max clock rate: 823.500000 MHz +Total amount of shared memory per block: 49152 +Total number of registers available per block: 65536 +Warp size: 32 +Maximum number of threads per multiprocessor: 2048 +Maximum number of threads per block: 1024 +Max dimension size of a thread block X: 1024 +Max dimension size of a thread block Y: 1024 +Max dimension size of a thread block Z: 64 +Max dimension size of a grid size X: 2147483647 +Max dimension size of a grid size Y: 65535 +Max dimension size of a grid size Z: 65535 + + - running time of script "get,cuda-devices": 4.16 sec. + +``` + +## Install Python virtual environment + +```bash +cm run script "get sys-utils-cm" --quiet + +cm run script "install python-venv" --name=mlperf-cuda +``` + +If you want to install specific version of Python use the following command: +```bash +cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda +``` + +## Detect or install cuDNN + +```bash +cm run script "get cudnn" +``` + +If cuDNN is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script +to install it as follows: +```bash +cm run script "get cudnn" --tar_file= +``` + +We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. + +## Detect or install TensorRT + +```bash +cm run script "get tensorrt" +``` +If TensorRT is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script +to install it as follows: +```bash +cm run script "get tensorrt" --tar_file= +``` + +We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. + + +## Run MLPerf inference benchmark with BERT + +### Try ONNX runtime backend + +#### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ + --device=cuda --backend=onnxruntime --quiet +``` + +#### Do a full accuracy run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Do a full performance run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Populate the README files + +```bash +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Generate MLPerf submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. +You can use `--hw_note_extra` option to add your name to the notes. + +```bash +cm run script --tags=generate,inference,submission \ + --results_dir=$HOME/inference_3.0_results/valid_results \ + --adr.python.name=mlperf-cuda \ + --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ + --run-checker --submitter=cTuning --adr.inference-src.version=master + --hw_notes_extra="Result taken by " --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). +Then run the following command after replacing `--repo_url` with your fork URL. + +```bash +cm run script --tags=push,github,mlperf,inference,submission \ + --submission_dir=$HOME/inference_submission_tree \ + --adr.python.name=mlperf-cuda \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ + --commit_message="Bert crowd-results added" +``` + +Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + + + +### Try PyTorch backend + +You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. + +For example, + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=pytorch --execution-mode=valid \ + --results_dir=$HOME/inference_3.0_results --quiet +``` + + +## Test composable ML benchmark with other models, data sets, frameworks and platforms + +* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) +* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) + + +# The next steps + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..38f69a5d53 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,59 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..8aebb068f0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md @@ -0,0 +1,80 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.0_results --quiet +``` + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..6d6ba275fd --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,82 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4eedba9f31 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,67 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..d7191c808d --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,53 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..e1691c21ac --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md @@ -0,0 +1,48 @@ +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..6bd16556a3 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md @@ -0,0 +1,35 @@ +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..68db00ea0e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,53 @@ +## Setup +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-2023/README.md new file mode 100644 index 0000000000..6362f3eb66 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/README.md @@ -0,0 +1,83 @@ +### Introduction + +Our goal is to help the community benchmark and optimize various AI/ML applications +across diverse software and hardware provided by volunteers similar to SETI@home! + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, running MLPerf inference benchmarks and submitting results [turned out to be a challenge](https://doi.org/10.5281/zenodo.8144274) +even for experts and could easily take many weeks to prepare. That's why [MLCommons](https://mlcommons.org), +[cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +and [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +decided to develop an open-source, technology-agnostic +and non-intrusive [Collective Mind automation language (CM)](https://github.com/mlcommons/ck) +and [Collective Knowledge Playground (CK)](https://access.cknowledge.org/playground/?action=experiments) +to help anyone run, reproduce, optimize and compare MLPerf inference benchmarks out-of-the-box +across diverse software, hardware, models and data sets. + +You can read more about our vision, open-source technology and future plans +in this [presentation](https://doi.org/10.5281/zenodo.8105339). + + + +### Advanced challenge + +We would like to ask volunteers run various MLPerf inference benchmarks +on diverse CPUs (Intel, AMD, Arm) and Nvidia GPUs similar to SETI@home +across different framework (ONNX, PyTorch, TF, TFLite) +either natively or in a cloud (AWS, Azure, GCP, Alibaba, Oracle, OVHcloud, ...) +and submit results to MLPerf inference v3.1. + +However, since some benchmarks may take 1..2 days to run, we suggest to start in the following order (these links describe CM commands to run benchmarks and submit results): +* [CPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [CPU: TFLite C++ implementation of Image classification with variations of MobileNets and EfficientNets (open division)](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-mlperf-inference-mobilenet-models/README-about.md) +* [Nvidia GPU: Nvidia optimized implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_nvidia.md) +* [Nvidia GPU: Nvidia optimized implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [Nvidia GPU (24GB of memory min): Reference implementation of Language processing with GPT-J 6B (open)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/gpt-j/README_reference.md) +* [Nvidia GPU: Nvidia optimized implementation of all other models (open and closed division)](https://github.com/ctuning/mlcommons-ck/blob/master/docs/mlperf/inference/README.md#run-benchmarks-and-submit-results) + +Please read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to set up and run above benchmarks using CM. + +You can register your participation for the [Collective Knowledge leaderboard]( https://access.cKnowledge.org/playground/?action=contributors ) +using this [guide](https://github.com/mlcommons/ck/blob/master/platform/register.md). + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck/issues) +to help the community +improve the portability of the CM automation for MLPerf and other benchmarks and projects. + +Looking forward to your submissions and happy hacking! + + + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +You can see shared results in [this repostiory](https://github.com/ctuning/mlperf_inference_submissions_v3.1) +with PRs from participants [here](https://github.com/ctuning/mlperf_inference_submissions_v3.1/pulls). + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json new file mode 100644 index 0000000000..a30c26c928 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "experiments": [], + "points": 1, + "sort": -10, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.1", + "mlperf-inference-v3.1-2023", + "v3.1" + ], + "title": "Crowd-benchmark all MLPerf inference benchmarks similar to SETI@home (latency, throughput, power consumption, accuracy, costs)", + "trophies": true, + "uid": "3e971d8089014d1f" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..9806c22647 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,67 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..c43363c1e9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md @@ -0,0 +1,113 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend (Reference implementation) + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.1_results --quiet +``` + +## Pytorch backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +## TensorRT backend (Nvidia implementation) + +For TensorRT backend we are using the [Nvidia implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/reproduce-mlperf-inference-nvidia) and not the [MLPerf inference reference implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-inference-reference) for the below reasons +* TensorRT backend is not supported by default in the reference implementation +* Reference implemnetation is mostly for fp32 models and quantization is not suppoted by default +* Nvidia has done some fantastic work in optimizing performance for TensorRT backend + +To get setup please follow the instructions [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md) to download and install TensorRT and cuDNN unless you already have them installed. This readme also details how to handle the configuration files which are automatically generated by the Nvidia implementation scripts. Once this is done, the following command will run all the modes and scenarios. + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the default performance numbers +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + + +TensorRT backend has an engine generation stage which can be time consuming. For repeated runs `--adr.nvidia-harness.make_cmd=run_harness` option will avoid this engine regeneration and reuse the previously generated one. + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..470930e373 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,90 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4420462cde --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,75 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..a6ca069215 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,61 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..152c612aad --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md @@ -0,0 +1,50 @@ +## Setup ASW instance for MLPerf + +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..a3a0e457a1 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md @@ -0,0 +1,37 @@ +## Setup GCP instance for MLPerf + +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..08c0a8eeb0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,54 @@ +## Setup + +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md new file mode 100644 index 0000000000..b72349ad59 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json new file mode 100644 index 0000000000..66431963a5 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-amazon-inferentia-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "inferentia", + "mlperf-inference", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia-v3.1", + "mlperf-inference-inferentia-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia and submit to MLPerf inference v3.1+", + "uid": "c8f2573320424e2a" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md new file mode 100644 index 0000000000..c08847da6a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md @@ -0,0 +1,20 @@ +### Challenge + +Create any end-to-end AI application with web cam, speech recognition, chat bot, LLM +that uses any MLPerf model and CM automation. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +Looking forward to your submissions and happy hacking! + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) + diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json new file mode 100644 index 0000000000..23fb64d835 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-create-end-to-end-app", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20230704", + "date_close_extension": true, + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "end-to-end-app", + "mlperf-inference", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app-v3.1", + "mlperf-inference-end-to-end-app-v3.1-2023", + "v3.1" + ], + "title": "Generate end-to-end optimized AI apps (LLM, speech, etc) based on MLPerf inference results (with and without container)", + "uid": "96ca61a5aa914063" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md new file mode 100644 index 0000000000..f0f8908d29 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md @@ -0,0 +1,31 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with the DeepSparse library, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json new file mode 100644 index 0000000000..e1cc4f8880 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-deepsparse", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "deepsparse", + "mlperf-inference", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse-v3.1", + "mlperf-inference-deepsparse-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Neural Magic's DeepSparse library", + "trophies": true, + "uid": "c495863b08e74abc" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md new file mode 100644 index 0000000000..94fad05b51 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md @@ -0,0 +1,32 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Note that you can use either GCP TPU or Coral TPU USB-Accelerator CPU card. +In the latter case, you can reuse and extend our CM-MLPerf script for MobileNets! + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json new file mode 100644 index 0000000000..3d5aecc950 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-google-tpu-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "points":3, + "trophies":true, + "date_close_extension": true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tpu", + "mlperf-inference", + "mlperf-inference-tpu", + "mlperf-inference-tpu", + "mlperf-inference-tpu-v3.1", + "mlperf-inference-tpu-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU (GCP or Coral USB accelerator) and submit to MLPerf inference v3.1+", + "uid": "5975fd0e18cd4073" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md new file mode 100644 index 0000000000..014f83f7d9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md @@ -0,0 +1,52 @@ +### Introduction + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, it is difficult to customize and run MLPerf benchmarks with non-reference models. + +That's why the MLCommons Task Force on automation and reproducibility has developed +a [Collective Mind automation language](https://doi.org/10.5281/zenodo.8144274) +to modularize this benchmark and make it easier to run with different models and data sets. + + +### Challenge + +Implement a CM workflow to connect any Hugging Face model +to MLPerf loadgen and run it with random inputs to obtain a preliminary latency and througput +without accuracy. + +Resources: +* [CM script to get ML model from Hugging Face zoo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-huggingface-zoo) +* [CM script to convert Hugging Face model to ONNX](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/convert-ml-model-huggingface-to-onnx) +* [CM script to build MLPerf loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) +* [CM script to run Python Loadgen with any ONNX model](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-loadgen-generic-python/README-extra.md) +* [MLPerf BERT FP32 model is available at Hugging Face](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) + +Some results showcases CK workflow to benchmark Hugging Face models with MLPerf from v3.0 (BERT): +* https://access.cknowledge.org/playground/?action=experiments&name=2f1f70d8b2594149 +* https://access.cknowledge.org/playground/?action=experiments&name=mlperf-inference--v3.0--edge--open-power--language-processing--offline&result_uid=9d2594448bbb4b45 + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json new file mode 100644 index 0000000000..146505b55a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-hugging-face-models-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "huggingface", + "mlperf-inference", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface-v3.1", + "mlperf-inference-huggingface-v3.1-2023", + "v3.1" + ], + "title": "Implement CM automation to run benchmark Hugging Face models using MLPerf loadgen", + "uid": "72b95d08a9e04698" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md new file mode 100644 index 0000000000..aec0514730 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Intel-based platforms. + +You can start from reproducing any past MLPerf inference submission from Intel and their partners +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json new file mode 100644 index 0000000000..c3d9adbe4c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-intel-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "intel", + "mlperf-inference", + "mlperf-inference-intel", + "mlperf-inference-intel", + "mlperf-inference-intel-v3.1", + "mlperf-inference-intel-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Intel-based platforms", + "trophies": true, + "uid": "1c1d5da6766f4afb" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md new file mode 100644 index 0000000000..6aaf4e3947 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md @@ -0,0 +1,34 @@ +### Challenge + +Add support to run a reference implementation of any MLPerf inference benchmark using +[Mojo language]( https://github.com/modularml/mojo ) +from [Modular.ai](https://modular.ai). + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 with Mojo. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *The first implementation will receive a cache prize from organizers.* +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json new file mode 100644 index 0000000000..e805879dee --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-modular-mojo-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mojo", + "mlperf-inference", + "mlperf-inference-mojo", + "mlperf-inference-mojo", + "mlperf-inference-mojo-v3.1", + "mlperf-inference-mojo-v3.1-2023", + "v3.1" + ], + "title": "Run reference implementations of MLperf inference benchmarks using Mojo language from Modular.ai", + "trophies": true, + "uid": "0a8a7bb5572447db" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md new file mode 100644 index 0000000000..c16a9335a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md @@ -0,0 +1,33 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms. + +You can start from reproducing any past submission from Dell, Lenovo or HPE +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json new file mode 100644 index 0000000000..07c626e259 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-qualcomm-ai100-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "ai100", + "mlperf-inference", + "mlperf-inference-ai100", + "mlperf-inference-ai100", + "mlperf-inference-ai100-v3.1", + "mlperf-inference-ai100-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms", + "uid": "09bd5f9e05ff46b1" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md new file mode 100644 index 0000000000..f8d9fbd71b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md @@ -0,0 +1,41 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with Apache TVM, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [Deelvin](https://deelvin.com) +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +This challenge is under preparation. + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json new file mode 100644 index 0000000000..839fb6b86e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-tvm-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":1, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tvm", + "mlperf-inference", + "mlperf-inference-tvm", + "mlperf-inference-tvm", + "mlperf-inference-tvm-v3.1", + "mlperf-inference-tvm-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Apache TVM", + "uid": "29c416e245884746" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md new file mode 100644 index 0000000000..0a5fe9aa2c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add more models and hardware backends to the [universal C++ implementation of MLPerf inference benchmarks)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-mlperf-inference-cpp) +being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json new file mode 100644 index 0000000000..e4e5cae105 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "cpp", + "mlperf-inference", + "mlperf-inference-cpp", + "mlperf-inference-cpp", + "mlperf-inference-cpp-v3.1", + "mlperf-inference-cpp-v3.1-2023", + "v3.1" + ], + "title": "Add more models and hardware backends to the universal C++ implementation of MLPerf inference benchmarks from MLCommons", + "trophies": true, + "uid": "518420b0e6dd4fed" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md new file mode 100644 index 0000000000..d587f62f89 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Prepare, optimize and submit any benchmarking results to MLPerf inference v3.1 using +CM automation language on Windows. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +Open ticket: [GitHub](https://github.com/mlcommons/ck/issues/696) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json new file mode 100644 index 0000000000..1a55dcbe0f --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-windows-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":2, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "windows", + "mlperf-inference", + "mlperf-inference-windows", + "mlperf-inference-windows", + "mlperf-inference-windows-v3.1", + "mlperf-inference-windows-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks on Windows", + "uid": "53e56d714c7649c7" +} diff --git a/challenge/repro-mlperf-inf-v3.0-orin/README.md b/challenge/repro-mlperf-inf-v3.0-orin/README.md new file mode 100644 index 0000000000..54dd4feeb0 --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/README.md @@ -0,0 +1,16 @@ +### Challenge + +Reproduce MLPerf inference v3.0 benchmark results for Nvidia Jetson Orin +(performance, accuracy,power) and automate it using the +[MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +Finished. Preliminary results are available [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md). + diff --git a/challenge/repro-mlperf-inf-v3.0-orin/_cm.json b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json new file mode 100644 index 0000000000..aff0fdba0f --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "repro-mlperf-inf-v3.0-orin", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230406", + "date_open": "20230301", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "_password_hash": "$2b$12$ionIRWe5Ft7jkn4y/7C6/eYoo6uBBMkGy/9SxwtKhaDRqZ1w2s3dO", + "tags": [ + "reproduce", + "replicate", + "automate", + "orin", + "nvidia", + "mlperf-inference-v3.0-orin" + ], + "title": "Reproduce MLPerf inference v3.0 results for Nvidia Jetson Orin", + "uid": "6d377c1a1b224636" +} diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/README.md b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md new file mode 100644 index 0000000000..9917547c15 --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md @@ -0,0 +1,39 @@ +### Challenge + +Reproduce the MLPerf inference RetinaNet benchmark during Student Cluster Competition at SuperComputing'22 +using the following [CM tutorial](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results from 10 international student teams are available at: +* [W&B dashboard 1 (during SCC'22)](https://wandb.ai/cmind/cm-mlperf-sc22-scc-retinanet-offline/table?workspace=user-gfursin) +* [W&B dashboard 2 (after SCC'22)](https://wandb.ai/cmind/cm-mlperf-dse-testing/table?workspace=user-gfursin) + + +### Acknowledgments + +We thank +[Hai Ah Nam](https://www.nersc.gov/about/nersc-staff/advanced-technologies-group/hai-ah-nam), +[Steve Leak](https://www.linkedin.com/in/steve-leak), +[Vijay Janappa Reddi](https://scholar.harvard.edu/vijay-janapa-reddi/home), +[Tom Jablin](https://scholar.google.com/citations?user=L_1FmIMAAAAJ&hl=en), +[Ramesh N Chukka](https://www.linkedin.com/in/ramesh-chukka-74b5b21), +[Peter Mattson](https://www.linkedin.com/in/peter-mattson-33b8863/), +[David Kanter](https://www.linkedin.com/in/kanterd), +[Pablo Gonzalez Mesa](https://www.linkedin.com/in/pablo-gonzalez-mesa-952ab2207), +[Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +[Thomas Schmid](https://www.linkedin.com/in/tschmid) +and [Gaurav Verma](https://www.linkedin.com/in/grverma) +for their suggestions and contributions. + + diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json new file mode 100644 index 0000000000..68352f9c3b --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "repro-mlperf-inference-retinanet-scc2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20221201", + "date_open": "20221101", + "tags": [ + "modularize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2022" + ], + "title": "Automate MLPerf RetinaNet benchmark at the Student Cluster Competition at SuperComputing'22 using CM", + "uid": "e71fa8b396874e68" +} diff --git a/challenge/repro-mlperf-inference-v4.0-2024/README.md b/challenge/repro-mlperf-inference-v4.0-2024/README.md new file mode 100644 index 0000000000..af23eb1205 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/README.md @@ -0,0 +1,3 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to reproduce results from the MLPerf inference benchmark submission v4.0. +Please feel free to join the testing phase using [GitHub issues](https://github.com/mlcommons/ck/issues)! diff --git a/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..01bcfd52a7 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: repro-mlperf-inference-v4.0-2024 +uid: e6b8738383eb46d0 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Reproduce and automate MLPerf inference benchmark results v4.0 from different vendors (Intel, Nvidia, Qualcomm, Google, NeuralMagic, ...) using CM + +date_open: '20240201' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.0 +- mlperf-inference-v4.0-2024 +- v4.0 + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/repro-mlperf-inference-v4.1-2024/README.md b/challenge/repro-mlperf-inference-v4.1-2024/README.md new file mode 100644 index 0000000000..1aacc2d59d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/README.md @@ -0,0 +1,4 @@ +The [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) and [MLCommons](https://mlcommons.org) +are preparing an open reproducibility challenge to reproduce various results from the MLPerf inference benchmark v4.1 +using the MLCommons CM automation framework. Please stay tuned for more details! + diff --git a/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml new file mode 100644 index 0000000000..840d58318d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml @@ -0,0 +1,22 @@ +alias: repro-mlperf-inference-v4.1-2024 +uid: 2093f4d750144df4 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: 'Reproduce the upcoming MLPerf inference benchmark v4.1 results' + +date_open: '20240901' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.1 +- mlperf-inference-v4.1-2024 +- v4.1 diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md new file mode 100644 index 0000000000..0f59f59f0e --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Reproduce and automate [TinyMLPerf benchmarks](https://github.com/mlcommons/tiny). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +We have successfully reproduced [TinyMLPerf v1.0 submission with microTVM on the STMicroelectronics NUCLEO-L4R5ZI board](https://github.com/mlcommons/tiny_results_v1.0/tree/main/closed/OctoML), +automated it with the latest version of the [MLCommons CM automation language](https://github.com/mlcommons/ck/blob/master/docs/README.md), +submit reproduce results to the TinyMLperf v1.1 round, +and added all past TinyMLPerf results to the [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +for further collaborative analysis and improvement. + +Pleases check our tutorial and reproducibility report: +* [Automate TinyMLPerf benchmark](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/automate-mlperf-tiny.md) - useful for all SW/HW stacks and submission rounds. +* [Reproduce TinyMLPerf v1.0 submission](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/reproduce-mlperf-tiny.md). + +TinyMLPerf v1.1 results will be published at te [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +in mid June 2023. + +### Related discussions for the future + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 +* https://github.com/mlcommons/ck/issues/606 + +### Results + +All results will be available in [this GitHub repo](https://github.com/ctuning/cm4mlperf-results) +and can be visualized and compared using the [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny). diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json new file mode 100644 index 0000000000..4e9e248505 --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-and-automate-tinymlperf-v1.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "automate", + "reproduce", + "replicate", + "optimize", + "benchmark", + "tinymlperf", + "tinymlperf-inference", + "tinymlperf-inference-v3.0", + "tinymlperf-inference-v3.0-2023", + "v1.0" + ], + "title": "Reproduce and optimize TinyMLPerf inference v1.1 benchmarks", + "uid": "d98cd66e0e5641f7" +} diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/README.md b/challenge/reproduce-mlperf-training-v3.0-2023/README.md new file mode 100644 index 0000000000..a1f1ea22ac --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/README.md @@ -0,0 +1,17 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf training v3.0 benchmarks +using the [MLCommons CM (CK2) automation framework](https://github.com/mlcommons/ck) + +### Status + +We could not do a successful submission mainly because the training scripts were not converging on a single GPU. We tried resnet and bert training. The below CM scripts are added to do MLPerf training for BERT using the reference and NVIDIA implementations. + +1. [BERT Training using Nvidia code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-nvidia) +2. [BERT Training using MLPerf Reference code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-reference) + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json new file mode 100644 index 0000000000..d1e5eddea8 --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-mlperf-training-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-training", + "mlperf-training-v3.0", + "mlperf-training-v3.0-2023", + "v3.0" + ], + "title": "Reproduce MLPerf training v3.0 benchmarks", + "uid": "1d26149c1cce4da3" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/README.md b/challenge/run-mlperf@home-v3.1-cpu/README.md new file mode 100644 index 0000000000..bd734f7896 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/README.md @@ -0,0 +1,67 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient CPU (Intel/AMD/Arm) for BERT-99 model with DeepSparse engine +and different variations of MobileNets/EfficientNets with TFLite +in terms of latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with BERT and MobileNets/EfficientNets on one or more systems with different CPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (submission deadline: August 4, 2023), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* CPU: Any x86-64 or Arm64 +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS +* Disk space: + * BERT-99: ~ 20GB + * Different variations of MobileNets/EfficientNets: ~ 140GB +* Time to run: + * BERT-99: ~ 2 hours + * Different variations of MobileNets/EfficientNets: ~ 2 days + +### Instructions to run benchmarks and submit results + +You can run any of these benchmarks or all depending on available time: + +* [Automated Design Space Exploration of MobileNets/EfficientNets; TFLite MLPerf implementation; native environment or Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md) +* [BERT-99 model; DeepSparse MLPerf implementation; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-bert-99-deepsparse.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-cpu/_cm.json b/challenge/run-mlperf@home-v3.1-cpu/_cm.json new file mode 100644 index 0000000000..88f4716cda --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "run-mlperf@home-v3.1-cpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -20, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient CPUs (Intel/AMD/Arm) for BERT and MobileNets/EfficientNets (latency, throughput, accuracy, number of cores, frequency, memory size, cost and other metrics)", + "skip": true, + "trophies": true, + "uid": "498f33f3dac647c1" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md new file mode 100644 index 0000000000..b4266ffa97 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md @@ -0,0 +1,100 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with BERT-99 model and DeepSparse engine +on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for offline scenario in open division and edge category. + +It will require ~20GB of disk space and can take ~2 hours to run on 1 system. + + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + + + +### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance \ +--model=bert-99 --implementation=reference --device=cpu --backend=deepsparse \ +--category=edge --division=open --quiet --scenario=Offline +``` + +### Do full accuracy and performance run + +``` +cm run script --tags=generate-run-cmds,inference,_submission --model=bert-99 \ +--device=cpu --implementation=reference --backend=deepsparse \ +--execution-mode=valid --results_dir=$HOME/results_dir \ +--category=edge --division=open --quiet --scenario=Offline +``` +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md new file mode 100644 index 0000000000..f41b1b463b --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md @@ -0,0 +1,77 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with multiple variations of MobileNets and EfficientNets +and TFLite on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for singlestream scenario in open division and edge category. + +It will require ~140GB of disk space and can take ~2 days to run on 1 system producing 243 MLPerf results +during automatic design space exploration to trade off accuracy vs performance. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + diff --git a/challenge/run-mlperf@home-v3.1-gpu/README.md b/challenge/run-mlperf@home-v3.1-gpu/README.md new file mode 100644 index 0000000000..b6482d3835 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/README.md @@ -0,0 +1,65 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient Nvidia GPUs for GPT-J 6B model and BERT-99 in terms of +latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with GPT-J and BERT-99 models on one or more systems with different Nvidia GPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (**submission deadline: August 17, 2023**), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* GPU: Nvidia +* GPU memory: + * GPT-J 6B: min 24GB + * BERT-99: min 8..16GB +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS + any Linux (tested on Ubuntu 22.04) +* Disk space: ~30GB per model/data set +* Time to run: + * GPT-J 6B: ~ 1 day + * BERT-99: ~ 2 hours + +### Instructions to run benchmarks and submit results + +* [GPT-J 6B model (24GB min GPU memory); PyTorch+CUDA; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md) +* [BERT-99 model (8GB min GPU memory); TensorRT; Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-gpu/_cm.json b/challenge/run-mlperf@home-v3.1-gpu/_cm.json new file mode 100644 index 0000000000..af7deeadae --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "run-mlperf@home-v3.1-gpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -30, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient Nvidia GPUs for GPT-J 6B model and BERT (latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics)", + "trophies": true, + "uid": "54230c3b66564cef" +} diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md new file mode 100644 index 0000000000..f543c23621 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md @@ -0,0 +1,193 @@ +# Introduction + +This guide will help you run the Nvidia implementation of the MLPerf inference benchmark v3.1 +with BERT-99 model and TensorRT on any Linux-based system with Nvidia GPU (8..16GB min memory required) +and Docker. + +This benchmark is semi-automated by the [MLCommons CM language](https://doi.org/10.5281/zenodo.8105339) +and you should be able to submit official MLPerf v3.1 inference results +for all scenarios in closed division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + + +It will require ~30GB of disk space and can take ~2 hours to run on 1 system. + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup CUDA and Docker container + +### Download CUDA 11.8 + +Nvidia recommends the following version of CUDA to be used with their MLPerf inference implementation: + +``` +wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run +``` + +However, you are very welcome to try another version! + +### Download cuDNN, TensorRT + +For x86 machines, please download the following TAR files: +1. [cuDNN](https://developer.nvidia.com/cudnn) - note that Nvidia recommends `cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz` + but you are welcome to try another version +2. [TensorRT](https://developer.nvidia.com/tensorrt) - note that Nvidia recommends `TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz` + but you can try another version + + +### Set up Nvidia Docker container with MLPerf benchmarks + +1. [Install Docker](https://docs.docker.com/engine/install/) and [Nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) + +2. Give Docker permission to the current user + ``` + sudo usermod -aG docker $USER + ``` + Logout and login + Restart docker if required and confirm that Nvidia container toolkit is working by + ``` + nvidia-ctk --version + ``` + +3. Check if Nvidia driver is working properly on the host. + ``` + nvidia-smi + ``` + If the above command produces any error you'll need to install Nvidia drivers on the host. You can do this via CM if you have sudo access + ``` + cmr "install cuda prebuilt _driver" --version=11.8.0 + ``` + + +4. Build the docker container and mount the paths from the host machine. + + *You may need to change --cuda_run_file_path, --tensorrt_tar_file_path and --cudnn_tar_file_path if you downloaded other versions than recommended by Nvidia.* + + *You may want to change the `scratch_path` location as it can take 100s of GBs.* + + ```bash + cm docker script --tags=build,nvidia,inference,server \ + --cuda_run_file_path=$HOME/cuda_11.8.0_520.61.05_linux.run \ + --tensorrt_tar_file_path=$HOME/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ + --scratch_path=$HOME/mlperf_scratch \ + --docker_cm_repo=mlcommons@ck \ + --results_dir=$HOME/results_dir \ + --submission_dir=$HOME/submission_dir \ + --adr.compiler.tags=gcc + ``` + +5. At the end of the build you'll get a prompt - please enter your system name such as "aws_nvidia_t4" + (note that space, `-` and other special characters are not allowed), + and say `yes` to generating the configuration files. + + ``` + ============================================ + => A system ID is a string containing only letters, numbers, and underscores + => that is used as the human-readable name of the system. It is also used as + => the system name when creating the measurements/ and results/ entries. + => This string should also start with a letter to be a valid Python enum member name. + => Specify the system ID to use for the current system: phoenix + => Reloaded system list. MATCHED_SYSTEM: KnownSystem.phoenix + => This script will generate Benchmark Configuration stubs for the detected system. + Continue? [y/n]: y + ``` + Now you'll be inside the CM Nvidia docker container and can access Nvidia implementations of MLPerf inference benchmarks. + +6. Once the build is complete, you can run Nvidia implementations of MLPerf inference benchmarks + using the unified CM interface. + + You can also save the container at this stage using [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/) + so that it can be launched later without having to go through the previous steps. + + +### Do a test run to detect and record the system performance + +``` +cmr "generate-run-cmds inference _find-performance _all-scenarios" \ + --model=bert-99 \ + --implementation=nvidia-original \ + --device=cuda \ + --backend=tensorrt \ + --category=edge \ + --division=closed \ + --test_query_count=1000 \ + --quiet +``` + +### Do full accuracy and performance runs + +``` +cmr "generate-run-cmds inference _submission _allscenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +* `--offline_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +### Populate the README files describing your submission + +``` +cmr "generate-run-cmds inference _populate-readme _all-scenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + +## Questions? Suggestions? + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md new file mode 100644 index 0000000000..39b1cc0de2 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md @@ -0,0 +1,314 @@ +# Introduction + +This guide will help you run the reference implementation of the MLPerf inference benchmark v3.1 +with GPT-J 6B model and PyTorch on any Linux-based system with Nvidia GPU (24GB min memory required) +using the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339). + +CM will help you to obtain performance and accuracy numbers for GPT-J 6B model on your system +for the SingleStream scenario and submit them to the official MLPerf v3.1 inference benchmarking round +in open division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + +You can read more about scenarios, divisions and categories of MLPerf inference benchmarks +in this [MLPerf inference benchmark paper](https://arxiv.org/abs/1911.02549) - +our goal is to help the community compare performance, accuracy and other metrics of popular models across diverse systems +in an automated, unified and reproducible way! + +This benchmark will require ~30GB of disk space and can take ~1 day to run on one system +to have a valid MLPerf result. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://github.com/mlcommons/ck) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models, and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + +## Do the performance run + +Now you can run MLPerf inference benchmark to measure performance of GPT-J using CM command as follows +(note that `cmr` is equivalent to `cm run script`): + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +Note that this command will need to automatically download the model (24GB) +and [CNN Daily Mail dataset (relatively small)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-cnndm)! + +The benchmark run is expected to finish within 10-100 minutes depending on the performance of your GPU. + +In the end of the valid run, you should see [output](https://github.com/ctuning/mlperf_inference_submissions_v3.1/blob/main/open/cTuning/results/amd_zen4_workstation-reference-gpu-pytorch-v2.0.1-default_config/gptj-99/singlestream/performance/run_1/mlperf_log_summary.txt) similar to + +```txt +================================================ +MLPerf Results Summary +================================================ +SUT name : PySUT +Scenario : SingleStream +Mode : PerformanceOnly +90th percentile latency (ns) : 4751920830 +Result is : VALID + Min duration satisfied : Yes + Min queries satisfied : Yes + Early stopping satisfied: Yes +Early Stopping Result: + * Processed at least 64 queries (201). + * Would discard 9 highest latency queries. + * Early stopping 90th percentile estimate: 5387449249 + * Not enough queries processed for 99th percentile + early stopping estimate (would need to process at + least 662 total queries). + +================================================ +Additional Stats +================================================ +QPS w/ loadgen overhead : 0.33 +QPS w/o loadgen overhead : 0.33 + +Min latency (ns) : 881803157 +Max latency (ns) : 5939081711 +Mean latency (ns) : 3008773902 +50.00 percentile latency (ns) : 2788885477 +90.00 percentile latency (ns) : 4751920830 +95.00 percentile latency (ns) : 5307244203 +97.00 percentile latency (ns) : 5677375096 +99.00 percentile latency (ns) : 5927209480 +99.90 percentile latency (ns) : 5939081711 + +================================================ +Test Parameters Used +================================================ +samples_per_query : 1 +target_qps : 2000 +target_latency (ns): 0 +max_async_queries : 1 +min_duration (ms): 600000 +max_duration (ms): 620000 +min_query_count : 100 +max_query_count : 0 +qsl_rng_seed : 148687905518835231 +sample_index_rng_seed : 520418551913322573 +schedule_rng_seed : 811580660758947900 +accuracy_log_rng_seed : 0 +accuracy_log_probability : 0 +accuracy_log_sampling_target : 0 +print_timestamps : 0 +performance_issue_unique : 0 +performance_issue_same : 0 +performance_issue_same_index : 0 +performance_sample_count : 13368 + +No warnings encountered during test. + +No errors encountered during test. +``` + + +## Do the accuracy run + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +This accuracy run can take many hours (typically 12..46 hours). You can estimate it using the QPS (queries per second) +from the previous performance run as follows: + +accuracy time = data set / QPS = 13368 / QPS . + +For example, if your reported QPS is 0.1 (equivalent to 10000 ms latency), it will take 13368/0.1 ~ 37 hours. + + + +## Populate the MLPerf README files describing your submission + +Now you can use CM to automatically populate README files mandated by MLPerf to describe your submission +(we also show you a simpler syntax of `cmr` instead of `cm run script --tags=`): + +```bash +cmr "generate-run-cmds inference _populate-readme" \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + +## Generate MLPerf submission + +Unless your organization is an official member of MLCommons, you will be able to participate in the official MLPerf inference community submission +via the cTuning foundation (founding member of MLCommons). + +You should update the following flags in the below CM command: +* Use `--hw_notes_extra` option to add your name to the submission such as `--hw_notes_extra="Result taken by NAME" `. +* Use `--hw_name="My system name"` to give a meaningful system name describing your GPU. + Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems). +* Use `--submitter=` if your organization is an official MLCommons member and you would like to submit under your organization. + +You should use the master branch of MLCommons inference repo for the submission checker: + +```bash +cmr "generate inference submission" \ + --clean \ + --submitter=cTuning \ + --results_dir=$HOME/results_dir/valid_results \ + --submission_dir=$HOME/inference_submission_tree \ + --preprocess_submission=yes \ + --adr.compiler.tags=gcc \ + --adr.inference-src.version=master \ + --run-checker +``` + +## Push the results to GitHub repo + +1. Create a fork of [this cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1). + +2. Run the following command after replacing `--repo_url` with your fork URL. + + ``` + cmr "push github mlperf inference submission" \ + --submission_dir=$HOME/inference_submission_tree \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.1/ \ + --commit_message="GPTJ results on added by " + ``` + +3. Create a PR to the [cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1) + + + + + + + + + +## Additional performance optimization challenge for interested enthusiasts + +The MLPerf GPT-J inference benchmark is implemented in this [backend.py](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py). + +It is automatically installed and cached by CM. You can find it on your system using this command: +```bash +cd `cm find cache --tags=inference,src,_branch.master`/language/gpt-j +ls backend.py +``` + +The original model is available at the [Hugging Face Zoo](https://huggingface.co/EleutherAI/gpt-j-6b). It was fine-tuned by Intel for this benchmark +and is available at the MLCommons cloud. It is automatically downloaded by CM using [this script](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-ml-model-gptj/_cm.json). + +You can try to improve the performance (QPS) on this code or fine-tune model and substitute the default one +in [this line](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py#L27). + +Some examples of fine-tuning can be seen [here](https://betterprogramming.pub/fine-tuning-gpt-j-6b-on-google-colab-or-equivalent-desktop-or-server-gpu-b6dc849cb205). + +Any better performance or accuracy result will be very valuable to the community. + +After any modification, you can redo a quick performance run to see the performance difference. +``` +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + + diff --git a/challenge/train-llm-for-cm-mlperf-2023/README.md b/challenge/train-llm-for-cm-mlperf-2023/README.md new file mode 100644 index 0000000000..4e9f6cf178 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/README.md @@ -0,0 +1,20 @@ +### Challenge + +Improve the prototype of our LLM-based assistant to suggest users how to run MLPerf inference benchmarks +using the MLCommons CM automation language: https://access.cknowledge.org/assistant . + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *Get in touch with organizers for more info!* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) diff --git a/challenge/train-llm-for-cm-mlperf-2023/_cm.json b/challenge/train-llm-for-cm-mlperf-2023/_cm.json new file mode 100644 index 0000000000..ce6009db37 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "train-llm-for-cm-mlperf-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 3, + "tags": [ + "train", + "improve", + "llm", + "assistant", + "mlperf-llm", + "mlperf-llm-assistant", + "mlperf-assistant" + ], + "title": "Train and improve LLM to suggest users how to run MLPerf inference benchmarks using CM automation language", + "trophies": true, + "uid": "d37bf37a24c44ec3" +} diff --git a/project/mlperf-inference-v3.0-submissions/README.md b/project/mlperf-inference-v3.0-submissions/README.md new file mode 100644 index 0000000000..7ad8080b0c --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/README.md @@ -0,0 +1,10 @@ +Graphs: + https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,all,open,edge,image-classification,singlestream + https://cknowledge.org/cm-gui-graph/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy + + http://localhost:8501/?tags=mlperf-inference,v3.0,open,edge,image-classification,singlestream&x=Result&y=Accuracy + http://localhost:8501/?tags=mlperf-inference,all,open,edge,image-classification,singlestream&x=Result&y=Accuracy + +Local: + cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 + cm run script "gui _graph" diff --git a/project/mlperf-inference-v3.0-submissions/_cm.json b/project/mlperf-inference-v3.0-submissions/_cm.json new file mode 100644 index 0000000000..2cc81aa8b0 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/_cm.json @@ -0,0 +1,7 @@ +{ + "alias": "mlperf-inference-v3.0-submissions", + "automation_alias": "project", + "automation_uid": "6882553224164c56", + "tags": [], + "uid": "f571becbcbd44a7d" +} diff --git a/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md new file mode 100644 index 0000000000..9aae9bbe55 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/crowd-benchmark-mlperf-bert-inference-cuda.md @@ -0,0 +1,285 @@ +# Crowd-benchmarking MLPerf BERT inference + +
+Click here to see the table of contents. + +* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) +* [System preparation](#system-preparation) + * [Minimal system requirements](#minimal-system-requirements) + * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) + * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) + * [Detect or install CUDA](#detect-or-install-cuda) + * [Test CUDA installation](#test-cuda-installation) + * [Install Python virtual environment](#install-python-virtual-environment) + * [Detect or install cuDNN](#detect-or-install-cudnn) + * [Detect or install TensorRT](#detect-or-install-tensorrt) + * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) + * [Try ONNX runtime backend](#try-onnx-runtime-backend) + * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) + * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) + * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) + * [Populate the README files](#populate-the-readme-files) + * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) + * [Push the results to GitHub repo](#push-the-results-to-github-repo) + * [Try PyTorch backend](#try-pytorch-backend) + * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) +* [The next steps](#the-next-steps) + +
+ + +This is a pilot community project to collaboratively run MLPerf BERT inference benchmark +across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). +However, instead of searching for extraterrestrial intelligence, we are +searching for optimal software/hardware combination to run various AI and ML workloads +in terms of performance, accuracy, power and costs ... + +This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment +across continuously evolving software, hardware, models and data. + +*If you submit your results before 1pm PST on Friday 3rd, 2023, + they will be accepted for the official MLPerf inference v3.0 submission round + and your name acknowledged in the notes!* + + +# System preparation + +## Minimal system requirements + +* CPU: any x86-64 or Arm64 based machine +* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ +* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 +* Disk space: ~10GB +* Python: 3.8+ +* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) + +## Install CM (CK2) automation meta-framework + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) +(the 2nd generation on the Collective Mind framework) on your system. + +## Pull CM repository with portable automation recipes + +Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +supporting portable MLOps and DevOps: + +```bash +cm pull repo mlcommons@ck +``` + +CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. + +We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) +required to run a given software project such as the MLPerf inference benchmark. + +These CM scripts simply wrap existing native scripts and tools as simple micro-services +with a human-readable CLI and simple Python API to be able to easily connect them together +and run on any platform in a unified way. + +## Detect or install CUDA + +Run the following CM script: +```bash +cm run script "get cuda" --out=json +``` + +If CUDA is automatically detected, it will be registered in the CM cache: +```bash +cm show cache --tags=get,cuda +``` + +Otherwise, this script will attempt to download and install the latest CUDA +from Nvidia website. + +Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). + +### Test CUDA installation + +You can test if CUDA toolkit and driver was detected or installed successfully using the following command: +```bash +cm run script "get cuda-devices" +``` + +You should see similar output: +```txt +Checking compiler version ... + +nvcc: NVIDIA (R) Cuda compiler driver +Copyright (c) 2005-2022 NVIDIA Corporation +Built on Wed_Sep_21_10:33:58_PDT_2022 +Cuda compilation tools, release 11.8, V11.8.89 +Build cuda_11.8.r11.8/compiler.31833905_0 + +Compiling program ... + +Running program ... + + - Running postprocess ... +GPU Device ID: 0 +GPU Name: Tesla K80 +GPU compute capability: 3.7 +CUDA driver version: 11.4 +CUDA runtime version: 11.8 +Global memory: 11997020160 +Max clock rate: 823.500000 MHz +Total amount of shared memory per block: 49152 +Total number of registers available per block: 65536 +Warp size: 32 +Maximum number of threads per multiprocessor: 2048 +Maximum number of threads per block: 1024 +Max dimension size of a thread block X: 1024 +Max dimension size of a thread block Y: 1024 +Max dimension size of a thread block Z: 64 +Max dimension size of a grid size X: 2147483647 +Max dimension size of a grid size Y: 65535 +Max dimension size of a grid size Z: 65535 + + - running time of script "get,cuda-devices": 4.16 sec. + +``` + +## Install Python virtual environment + +```bash +cm run script "get sys-utils-cm" --quiet + +cm run script "install python-venv" --name=mlperf-cuda +``` + +If you want to install specific version of Python use the following command: +```bash +cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda +``` + +## Detect or install cuDNN + +```bash +cm run script "get cudnn" +``` + +If cuDNN is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script +to install it as follows: +```bash +cm run script "get cudnn" --tar_file= +``` + +We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. + +## Detect or install TensorRT + +```bash +cm run script "get tensorrt" +``` +If TensorRT is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script +to install it as follows: +```bash +cm run script "get tensorrt" --tar_file= +``` + +We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. + + +## Run MLPerf inference benchmark with BERT + +### Try ONNX runtime backend + +#### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ + --device=cuda --backend=onnxruntime --quiet +``` + +#### Do a full accuracy run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Do a full performance run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Populate the README files + +```bash +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Generate MLPerf submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. +You can use `--hw_note_extra` option to add your name to the notes. + +```bash +cm run script --tags=generate,inference,submission \ + --results_dir=$HOME/inference_3.0_results/valid_results \ + --adr.python.name=mlperf-cuda \ + --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ + --run-checker --submitter=cTuning --adr.inference-src.version=master + --hw_notes_extra="Result taken by " --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). +Then run the following command after replacing `--repo_url` with your fork URL. + +```bash +cm run script --tags=push,github,mlperf,inference,submission \ + --submission_dir=$HOME/inference_submission_tree \ + --adr.python.name=mlperf-cuda \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ + --commit_message="Bert crowd-results added" +``` + +Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + + + +### Try PyTorch backend + +You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. + +For example, + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=pytorch --execution-mode=valid \ + --results_dir=$HOME/inference_3.0_results --quiet +``` + + +## Test composable ML benchmark with other models, data sets, frameworks and platforms + +* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) +* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) + + +# The next steps + +Feel free to join our [open taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +and the public [Discord server](https://discord.gg/JjWNWXKxwT) to learn about our roadmap and related community projects. + +Our ultimate goal is to help anyone automatically find or generate the optimal software/hardware stack from the cloud to the edge +for their AI/ML tasks based on their requrements and constraints (accuracy, performance, power consumption, costs, etc). + +*Prepared by [Arjun Suresh](https://www.linkedin.com/in/arjunsuresh) and [Grigori Fursin](https://cKnowledge.org/gfursin) (OctoML, MLCommons, cTuning foundation)* diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md new file mode 100644 index 0000000000..824279732e --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/generate-bert-submission.md @@ -0,0 +1,87 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission +``` +cm pull repo mlcommons@ck +``` +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for a closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy run for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Do a full performance run for all the scenarios +``` +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Generate actual submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. +``` +cm run script --tags=push,github,mlperf,inference,submission \ +--submission_dir=$HOME/inference_submission_tree \ +--repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ +--commit_message="Bert results added" +``` + +Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.0_results --quiet +``` + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + diff --git a/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md b/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..9129004321 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/generate-resnet50-submission.md @@ -0,0 +1,74 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_full,_all-scenarios --model=resnet50 \ +--device=cpu --backend=onnxruntime --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for a closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy run for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Do a full performance run for all the scenarios +``` +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=onnxruntime --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +#### Generate actual submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/). Then run the following command after replacing `--repo_url` with your fork URL. +``` +cm run script --tags=push,github,mlperf,inference,submission --submission_dir=$HOME/inference_submission_tree \ +--repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0/ \ +--commit_message="ResNet50 results added" +``` + +Create a PR to [cTuning repo](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=tf --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios --model=resnet50 --device=cpu \ +--implementation=reference --backend=tvm-onnx --execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` diff --git a/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md b/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md new file mode 100644 index 0000000000..c35aada995 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/run-nvidia-implementation.md @@ -0,0 +1,47 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +Requirements: You need to have CUDA, cuDNN and TensorRT installed on your system. + +If CUDA is not detected, CM should download and install it automatically when you run the workflow. + +For x86 machines, you can download the tar files for cuDNN and TensorRT and install them using the following commands +```bash +cm run script --tags=get,cudnn --tar_file= +``` + +```bash +cm run script --tags=get,tensorrt --tar_file= +``` + +On other systems you can do a package manager install and then CM should pick up the installation automatically during the workflow run. + +Nvidia run configuration values for each model-sceraio for known systems are stored in `__init__.py` files under configs directory. For custom systems these are stored under `custom.py` files. When custom config files are generated they override the default config values with empty ones (not desirable). So, you'll probably need to open the custom config file and comment out the overrides. Typically `gpu_batch_size` and `offline_expected_qps` are enough for an offline scenario run on a typical single GPU system. + + +## Build Nvidia Inference Server +``` +cm run script --tags=build,nvidia,inference,server +``` + +## Run ResNet50 + +### Find SUT performance + +``` +cm run script --tags=generate,run-cmds,inference,_find-performance --model=resnet50 --implementation=nvidia-original \ +--device=cuda --adr.nvidia-harness.gpu_batch_size=64 --results_dir=$HOME/nvidia_original_results +``` + +### Do a complete submission run + +``` +cm run script --tags=generate,run-cmds,inference,_submission,_full --execution_mode=valid --model=resnet50 \ +--implementation=nvidia-original --device=cuda --adr.nvidia-harness.gpu_batch_size=64 \ +--adr.nvidia-harness.skip_preprocess=yes --adr.nvidia-harness.make_cmd=run_harness \ +--results_dir=$HOME/nvidia_original_results --submission_dir=$HOME/nvidia_original_submissions \ +--division=open --submitter=cTuning --category=edge +``` + diff --git a/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md b/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md new file mode 100644 index 0000000000..cb74086b54 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/docs/setup-aws-graviton.md @@ -0,0 +1,25 @@ +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +## Create an AWS Graviton Instance + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` + diff --git a/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd b/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd new file mode 100644 index 0000000000..87fa4e9ba2 --- /dev/null +++ b/project/mlperf-inference-v3.0-submissions/get-mlperf-inference-repos.cmd @@ -0,0 +1,3 @@ +cm run script "get git repo _repo.https://github.com/ctuning/mlperf_inference_submissions_v3.0" --extra_cache_tags=mlperf-inference-results,version-3.0 +cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1 +cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.0" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.0 diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/README.md b/report/mlperf-inference-v3.1-analysis-ctuning/README.md new file mode 100644 index 0000000000..9d4b696949 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/README.md @@ -0,0 +1,93 @@ +On this page, we highlight some of the exciting submissions done by CTuning for the MLCommons Inference 3.1 round. + +## Top Results in Edge Category + +In the edge category, Rigel Supercomputers from One Stop Systems achieved the peak offline performance for the four submitted benchmarks - Image classification (ResNet50), Object detection (RetinaNet), Language processing (Bert) and Speech Recognition (RNNT). The below graph compares the peak performance of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/9f8e3367-1ca4-4298-8545-285cdedfc991) + + +Nvidia RTX 4090 has the best performance for performance per accelerator, and this accelerator is assembled on a PC made by PCSPECIALIST UK. The below graph compares the performance per accelerator of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c02120cb-eda9-4eef-9e22-56fff4bf23a7) + + +Nvidia RTX 4090 wins the latency metric too for ResNet50, Bert and 3d-unet in the SingleStream scenario. +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d4b39a0-9f39-474a-ac16-5498e281ebad) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8afb5609-581d-4ee8-be56-731af731f10f) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/5cb88f53-9255-4a0b-98df-a192ba87b125) + + +## Best energy efficient results in Edge category + +For the Speech Recognition model rnnt, CTuning submitted the best power-efficient result on Nvidia Jetson Orin AGX. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d485aa50-a0d4-4a40-a805-cc2ddc3e0ca6) + + +For the Medical Imaging model 3d-unet where the samples per second is quite low, the best 4 energy efficient results are by CTuning. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d15297fb-3eff-47c9-b188-68d438b7f248) + +For the Language Processing model bert-99, gloria highend system from Qualcomm tops the energy efficiency metric and CTuning's Nvidia Jetson Orin AGX is at second place. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/22c85404-51f5-44b7-b128-8df4579c635c) + + + +## Benchmarking Rigel Supercomputer + +Rigel Edge Supercomputer from OneStopSytems wins the peak performance for all four submitted models and comfortably beats the second-place system. It also wins the best latency for ResNet50 MultiStream scenario. + + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/635f5f29-080f-4c7c-85a5-65fcf438f9e1) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c993c2f5-a8b7-4a11-b89f-35d96e357e42) + + + + + +## Benchmarking MLPerf Inference Reference Implementations + +We compared the performance of the reference implementation with that of the Nvidia optimized implementation by running both implementations on an Nvidia RTX 4090 GPU. Reference implementation uses fp32 models whereas Nvidia implementation uses quantized models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/b46bc509-f242-4bc6-a9e8-ec318d09616b) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/404b54d2-a04e-4e5e-861d-43c7d940faf8) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/f5a04e85-269f-485a-8839-348dddcd5eb7) + +## Showcasing Apple Metal Performance + +We benchmarked the performance of Apple metal using Tensorflow-metal. The below graphs show the performance benefit of running inference on Apple meta using tensorflow-metal versus onnxruntime running only on CPUs. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/87385e24-b3b5-4694-8106-2c30eeb393de) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c9a38dc9-0986-461e-b81d-988297e1771e) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/4b8565b4-7a23-4f29-b450-6eaf00d10f63) + + + + + +## Design Space Exploration For NeuralMagic Deepsparse Library + +Using CM experiment automation we did a design space exploration to find the optimal batch size for the bert-99 compatible sparse models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/a18088f2-c864-4c16-b714-5b375cf5fc94) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8bd95c5f-344f-4d9f-9f94-c3024efbce13) + + +## Comparing the performance of Modular MLPerf Inference C++ implementations + +Here we compare the performance of MIL Library used by CTuning and the KILT library used by KRAI both on CPUs and GPUs. This is not an apple-to-apple comparison as KILT used Nvidia Nvidia A1000 GPU and MIL was run on Nvidia RTX 4090 GPU. For CPUs, KILT was run on a [24-core Dell server](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/Krai/systems/7920t-kilt-onnxruntime_cpu.json) with peak frequency of 4000 MHz whereas MIL was run on a [16 core PCSPECIALIST custom workstation](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/CTuning/systems/amd_ryzen_workstation-cpp-cpu-onnxruntime-vdefault-default_config.json) with peak frequency of 5900 MHz. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d73360a-27ab-4158-b4cc-a5724d6d4c73) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d6b5516b-4861-4355-badf-65decbf8d3b0) + diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json new file mode 100644 index 0000000000..4860af17b2 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json @@ -0,0 +1,16 @@ +{ + "alias": "mlperf-inference-v3.1-analysis-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date":"20230917", + "title":"cTuning's analysis of MLPerf inference v3.1 community results", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "uid": "ebc483653dbc45b6" +} diff --git a/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..99d0370a50 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about making MLPerf inference accessible to everyone", + "uid": "85ff4a6ac203411e" +} diff --git a/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json new file mode 100644 index 0000000000..159a986735 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-hpcwire", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "redirect": "https://www.hpcwire.com/2023/09/13/mlperf-releases-latest-inference-results-and-new-storage-benchmark", + "title": "HPCWire about MLPerf inference v3.1 and storage results (with cTuning/cKnowledge coverage)", + "uid": "50960565640142d6" +} diff --git a/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..15c3fa6c42 --- /dev/null +++ b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v4.0-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-cm-mlperf-automation-helps-benchmark-commodity-hardware-fursin-61noe", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v4.0", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about a new version of the CM workflow to automate MLPerf", + "uid": "acc35b8e9ed14c98" +} diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/README.md b/script/reproduce-ieee-acm-micro2023-paper-22/README.md new file mode 100644 index 0000000000..6b86e491da --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/README.md @@ -0,0 +1,42 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/UofT-EcoSystem/Grape-MICRO56-Artifact/wiki#installation + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Install dependencies + +```bash +cmr "reproduce project micro-2023 22 _install_deps" +cmr "reproduce project micro-2023 22 _install_deps_driver" +cmr "reproduce project micro-2023 22 _install_deps_cuda" +cmr "reproduce project micro-2023 22 _install_deps_pytorch" +cmr "reproduce project micro-2023 22 _install_deps_transformers" +``` + +Please reboot the machine after the above installation steps for the GPU driver installation to take effect. This can be verified from the message `NVRM: loading customized kernel module from Grape` when running the command `sudo dmesg`. If the message does not appear, please repeat the command + +```bash +cmr "reproduce project micro-2023 22 _install_deps_driver" +``` + +### Run experiments + +```bash +cmr "reproduce project micro-2023 22 _run_figure13" +cmr "reproduce project micro-2023 22 _run_figure11" +cmr "reproduce project micro-2023 22 _run_figure12" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml new file mode 100644 index 0000000000..8f309ca885 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/_cm.yaml @@ -0,0 +1,45 @@ +alias: reproduce-ieee-acm-micro2023-paper-22 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/UofT-EcoSystem/Grape-MICRO56-Artifact + env: + CM_GIT_ENV_KEY: 'GRAPE_MICRO56' + extra_cache_tags: micro56,artifact,ae,grape +script_name: run +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '22' +uid: e26c9ce3e7b84526 +variations: + install_deps: + script_name: install_deps + install_deps_driver: + script_name: install_deps_driver + install_deps_cuda: + script_name: install_deps_cuda + install_deps_pytorch: + script_name: install_deps_pytorch + install_deps_transformers: + script_name: install_deps_transformers + run: + script_name: run + run_figure11: + script_name: run_figure11 + run_figure12: + script_name: run_figure12 + run_figure13: + script_name: run_figure13 diff --git a/script/reproduce-micro-paper-2023-victima/customize.py b/script/reproduce-ieee-acm-micro2023-paper-22/customize.py similarity index 100% rename from script/reproduce-micro-paper-2023-victima/customize.py rename to script/reproduce-ieee-acm-micro2023-paper-22/customize.py diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh new file mode 100644 index 0000000000..c9d37d0ba6 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/0-install_build_essentials.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh new file mode 100644 index 0000000000..f3a345ec90 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_cuda.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/2-install_CUDA.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh new file mode 100644 index 0000000000..3e6d33783c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_driver.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/1-install_NVIDIA_GPU_driver.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh new file mode 100644 index 0000000000..f961aaa009 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_pytorch.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +. scripts/Installation/3-build_PyTorch.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh new file mode 100644 index 0000000000..effe47e975 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/install_deps_transformers.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +echo "git submodule update --init submodules/transformers" +git submodule update --init submodules/transformers + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run.sh new file mode 100644 index 0000000000..6b50d1b811 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure13.sh +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure11.sh +. ${CM_TMP_CURRENT_SCRIPT_PATH}/run_figure12.sh + + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh new file mode 100644 index 0000000000..bf2c7b0fcf --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure11.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=gpt2 +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=gptj +./scripts/Experiment_Workflow/2-test_runtime_performance.sh --model=wav2vec2 + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh new file mode 100644 index 0000000000..1d9ea80270 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure12.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/3-test_runtime_breakdown.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh new file mode 100644 index 0000000000..6d2f05bf3c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-22/run_figure13.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to GRAPE repo: ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_GRAPE_MICRO56_CHECKOUT_PATH} + +echo "" + +source scripts/Installation/activate + +./scripts/Experiment_Workflow/1-test_metadata_compression.sh + + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/README.md b/script/reproduce-ieee-acm-micro2023-paper-28/README.md new file mode 100644 index 0000000000..c0b235ba21 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/README.md @@ -0,0 +1,61 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/neel-patel-1/XFM_MICRO2023.git + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Regenerate Figures via CM interface + +1) Install deps: +```bash +cmr "reproduce project micro-2023 xfm _install_deps" +``` + +2) Run experiments: + +```bash +cmr "reproduce project micro-2023 xfm _run" +``` + +3) Plot results: + +```bash +cmr "reproduce project micro-2023 xfm _plot" +``` + +You should find `XFM_Access_Distribution.png` and `results.csv` in the `results` folder current directory. + +### Regenerate SPEC Workloads Experiments via CM Interface + +* if hosted SPEC 2017 for artifact evaluation purposes is no longer available, provide path to a local install of SPEC: + +1) (Optional) Provide path to local SPEC2017 .iso file +```bash +# if local spec is available, run below to avoid fetching remote SPEC, otherwise skip this step +cmr "download file _url.https://spec2017iso.s3.us-east-2.amazonaws.com/cpu2017-1_0_5.iso" --local_path=/path/to/local/cpu2017-1_0_5.iso +``` + +1) Install deps: +```bash +cmr "reproduce project micro-2023 xfm _install_spec_deps" +``` + +2) run: +```bash +cmr "reproduce project micro-2023 xfm _run_spec" +``` + +You should find `results.txt` in the `results` folder of current directory. \ No newline at end of file diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml new file mode 100644 index 0000000000..e2ed10c86c --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/_cm.yaml @@ -0,0 +1,40 @@ +alias: reproduce-ieee-acm-micro2023-paper-28 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,generic-python-lib,_pandas +- tags: get,generic-python-lib,_matplotlib +- tags: get,git,repo,_repo.https://github.com/neel-patel-1/XFM_MICRO2023 +- tags: download,file,url.https://spec2017iso.s3.us-east-2.amazonaws.com/cpu2017-1_0_5.iso + env: + CM_GIT_ENV_KEY: 'XFM' + extra_cache_tags: micro23,artifact,ae,xfm,spec +  force_cache: true +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- 28 +- xfm +uid: 72c44b58be0e4e16 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run + install_spec_deps: + script_name: install_spec_deps.sh + run_spec: + script_name: run_spec.sh diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/customize.py b/script/reproduce-ieee-acm-micro2023-paper-28/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh new file mode 100644 index 0000000000..aba23e8d48 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/install_deps.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +# Done via _cm.yaml +#${CM_PYTHON_BIN_WITH_PATH} -m pip install pandas +#${CM_PYTHON_BIN_WITH_PATH} -m pip install matplotlib + +git submodule update --init --recursive . +test $? -eq 0 || exit 1 + +cd memory_channel_interleave_ratios +test $? -eq 0 || exit 1 + +./build_gzip.sh +test $? -eq 0 || exit 1 + +./fetch_corpus.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh new file mode 100644 index 0000000000..46488b66be --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/install_spec_deps.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +CUR_DIR=${PWD} +SPEC_EXP_ROOT=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment +SPEC_INSTALL=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/spec +SPEC_MNT=${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/spec_mnt + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" +echo "SPEC ISO PATH:${SPEC_ISO}" +echo "Installing to ${SPEC_INSTALL}" + +mkdir -p ${SPEC_MNT} +test $? -eq 0 || exit 1 + +mkdir -p ${SPEC_INSTALL} +test $? -eq 0 || exit 1 + +sudo mount -t iso9660 -o ro,exec,loop /path/to/cpu2017-1_0_5.iso ${CUR_DIR}/spec_mnt +test $? -eq 0 || exit 1 + +cd ${SPEC_MNT} +./install.sh -d ${SPEC_INSTALL} +test $? -eq 0 || exit 1 + +cp ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment/config/default.cfg ${SPEC_INSTALL}/config +test $? -eq 0 || exit 1 + +cd ${SPEC_EXP_ROOT} +./fetch_corpus.sh +test $? -eq 0 || exit 1 +cd lzbench +make -j BUILD_STATIC=1 +test $? -eq 0 || exit 1 \ No newline at end of file diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh new file mode 100644 index 0000000000..c79e247206 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/plot.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +cd xfm_access_model + +${CM_PYTHON_BIN_WITH_PATH} xfm_access_model.py +test $? -eq 0 || exit 1 + +mkdir -p ${CUR_DIR}/results/XFM_Access_Results + +cp XFM_Access_Distribution.png ${CUR_DIR}/results/XFM_Access_Results diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/run.sh b/script/reproduce-ieee-acm-micro2023-paper-28/run.sh new file mode 100644 index 0000000000..49ca2bc6ff --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH} + +echo "" + +cd memory_channel_interleave_ratios + +./run.sh +test $? -eq 0 || exit 1 + +mkdir -p ${CUR_DIR}/results/memory_channel_interleave_ratios +test $? -eq 0 || exit 1 + +cp results.csv ${CUR_DIR}/results/memory_channel_interleave_ratios +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh b/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh new file mode 100644 index 0000000000..5de27e2325 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-28/run_spec.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to XFM repo's SPEC2017 Directory: ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment" +cd ${CM_GIT_REPO_XFM_CHECKOUT_PATH}/spec_workload_experiment + +./run.sh +test $? -eq 0 || exit 1 + +echo "" + +mkdir -p ${CUR_DIR}/results/spec +test $? -eq 0 || exit 1 + +./parse.sh | tee ${CUR_DIR}/results/spec/results.txt +test $? -eq 0 || exit 1 + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/README.md b/script/reproduce-ieee-acm-micro2023-paper-33/README.md new file mode 100644 index 0000000000..42d9809e9b --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/README.md @@ -0,0 +1,74 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/filipmazurek/spa-artifact + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Set up and start Docker container + +```bash +cmr "reproduce project m 2023 33 _install_deps" +``` + +You should be within the Docker container now. + +The next step is not yet fully automated by CM and you need to do it manually to set up Conda environment: + +### Set up Conda + +```bash +cd /shared/ +bash ./in-docker-bash-scripts/set-up-conda.sh + +# Use conda with the bash shell +eval "$(/root/miniconda3/bin/conda shell.bash hook)" + +conda activate spa +``` + +### Install CM inside Conda to continue using CM interface + +```bash +python3 -m pip install cmind +cm pull repo mlcommons@ck +cm pull repo ctuning@cm4research +``` + +### Download Ubuntu Image and Kernel + +```bash +cmr "reproduce project m 2023 33 _install_deps_kernel" +``` + +### Copy gem5 PARSEC Binaries + +```bash +cmr "reproduce project m 2023 33 _install_deps_gem5" +``` + +### Run experiments Using gem5 + +```bash +cmr "reproduce project m 2023 33 _run" +``` + +### Collect data and reproduce results + +```bash +cmr "reproduce project m 2023 33 _plot" +``` + +All figures should be available in `/shared/paper-figures/`. diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml new file mode 100644 index 0000000000..4db4f45391 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/_cm.yaml @@ -0,0 +1,48 @@ +alias: reproduce-ieee-acm-micro2023-paper-33 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/filipmazurek/spa-artifact + env: + CM_GIT_ENV_KEY: 'SPA_ARTIFACT' + extra_cache_tags: micro23,artifact,ae,spa_artifact + skip_if_env: + CM_RUN_INSIDE_DOCKER: + - yes +script_name: run +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '33' +uid: 5dad99d41c0b422b +variations: + install_deps: + script_name: install_deps + install_deps_kernel: + script_name: install_deps_kernel + env: + CM_RUN_INSIDE_DOCKER: yes + install_deps_gem5: + script_name: install_deps_gem5 + env: + CM_RUN_INSIDE_DOCKER: yes + plot: + script_name: plot + env: + CM_RUN_INSIDE_DOCKER: yes + run: + script_name: run + env: + CM_RUN_INSIDE_DOCKER: yes diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/customize.py b/script/reproduce-ieee-acm-micro2023-paper-33/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh new file mode 100644 index 0000000000..1fa6f8b86a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to SPAM repo: ${CM_GIT_REPO_SPA_ARTIFACT_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_SPA_ARTIFACT_CHECKOUT_PATH} + +echo "" + +bash ./artifact-bash-scripts/set-up-docker.sh +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh new file mode 100644 index 0000000000..667f6a7683 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_gem5.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +cd /shared/ +bash ./in-docker-bash-scripts/copy-parsec-binaries.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh new file mode 100644 index 0000000000..973589a921 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/install_deps_kernel.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +cd /shared/ +bash ./in-docker-bash-scripts/download-disk.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh new file mode 100644 index 0000000000..89c33b4856 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/plot.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" + +cd /shared/python-runners/ +${CM_PYTHON_BIN_WITH_PATH} convert-gem5-results-to-csv.py + +test $? -eq 0 || exit 1 + +cd /shared/paper-figures/ + +${CM_PYTHON_BIN_WITH_PATH} figure-1.py +${CM_PYTHON_BIN_WITH_PATH} figure-2.py +${CM_PYTHON_BIN_WITH_PATH} figure-4.py +${CM_PYTHON_BIN_WITH_PATH} figure-5.py +${CM_PYTHON_BIN_WITH_PATH} figure-6_7.py +${CM_PYTHON_BIN_WITH_PATH} figure-8_9.py +${CM_PYTHON_BIN_WITH_PATH} figure-10_11.py +${CM_PYTHON_BIN_WITH_PATH} figure-12.py +${CM_PYTHON_BIN_WITH_PATH} figure-13.py + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-33/run.sh b/script/reproduce-ieee-acm-micro2023-paper-33/run.sh new file mode 100644 index 0000000000..8e17e45444 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-33/run.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "" + +cd /shared/python-runners/ + +chmod 777 /shared/gem5/build/X86/gem5-mesi.fast +${CM_PYTHON_BIN_WITH_PATH} meta-runner.py + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/README.md b/script/reproduce-ieee-acm-micro2023-paper-38/README.md new file mode 100644 index 0000000000..34ea8ce602 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/README.md @@ -0,0 +1,50 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/HieronZhang/G10-Artifact + + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +### Run G10 via CM interface + +Perform the following steps to evaluate G10 Artifact with MLCommons CM automation language: + +1) This command will install all the dependencies for G10 and requires sudo: + +```bash +cmr "reproduce project micro-2023 G10 _install_deps" +``` + +2) This command will prepare and run all experiments: + +```bash +cmr "reproduce project micro-2023 G10 _run" --max_process_num=[nthreads] +``` + +- The variable `max_process_num` is the maximum allowed number of parallel experiments in the script. Note that user need to specify the `max_process_num` based on their machine's main memory capacity. Each experiment process will need a peak memory of 28.5 GB. (We recommend reserving 30 GB for each process to ensure that the program won't crash. For example, if your machine has 128 GB of main memory, `max_process_num` can be set as 4). + +3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the paper: + +```bash +cmr "reproduce project micro-2023 G10 _plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml new file mode 100644 index 0000000000..a7de67b4e0 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/_cm.yaml @@ -0,0 +1,36 @@ +alias: reproduce-ieee-acm-micro2023-paper-38 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +default_env: + max_process_num: 1 +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/HieronZhang/G10-Artifact.git + env: + CM_GIT_ENV_KEY: 'G10' + extra_cache_tags: micro23,artifact,ae,G10 +input_mapping: + max_process_num: max_process_num +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- g10 +- G10 +uid: b6ec80696a364ff4 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat new file mode 100644 index 0000000000..47f7e7ce26 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.bat @@ -0,0 +1,18 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +if exist "%CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt" ( + + echo. + echo Installing requirements.txt ... + echo. + + %CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh new file mode 100644 index 0000000000..02b1446fca --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/install_deps.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +sudo apt-get update +sudo apt install flex bison tmux python3-pip + +${CM_PYTHON_BIN_WITH_PATH} -m pip install matplotlib networkx pandas PyPDF2 diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat b/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat new file mode 100644 index 0000000000..7e786771ae --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/plot.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +rem echo. +rem %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +rem IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh new file mode 100644 index 0000000000..6058cb5a32 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/plot.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "Changing to G10 repo: ${CM_GIT_REPO_G10_CHECKOUT_PATH}" +cd "${CM_GIT_REPO_G10_CHECKOUT_PATH}" + +cd src/resources + +# Collect all the numbers, store it in raw_output/data.json +${CM_PYTHON_BIN_WITH_PATH} gatherKernelInfo.py + +# Gather data for figure 11 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepOverallPerformance.py # The gathered data is stored in figure_drawing/overall_performance + +# Gather data for figure 12 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepBreakdown.py # The gathered data is stored in figure_drawing/overall_breakdown + +# Gather data for figure 13 +./figureDrawingDataPrepKernelCDF.sh # The gathered data is stored in figure_drawing/overall_slowdown_cdf + +# Gather data for figure 14 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepTraffic.py # The gathered data is stored in figure_drawing/overall_traffic + +# Gather data for figure 15 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrep.py # The gathered data is stored in figure_drawing/overall_batchsize + +# Gather data for figure 16 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepCPUsensitivity.py # The gathered data is stored in figure_drawing/sensitivity_cpumem + +# Gather data for figure 17 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepCPUSensitivityCombined.py # The gathered data is stored in figure_drawing/sensitivity_cpumem_combined + +# Gather data for figure 18 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepSSD.py # The gathered data is stored in figure_drawing/sensitivity_ssdbw + +# Gather data for figure 19 +${CM_PYTHON_BIN_WITH_PATH} figureDrawingDataPrepVariation.py # The gathered data is stored in figure_drawing/sensitivity_variation + +cd figure_drawing + +# Plot figures for Figure 2-4, and Figure 20-21 (Appendix) + +${CM_PYTHON_BIN_WITH_PATH} plot_mem_consumption.py # Figure 2 is output/dnn_memconsumption.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_tensor_time_cdf.py # Figure 3 is output/tensor_time_cdf.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_tensor_period_distribution.py # Figure 4 is output/tensor_periods_distribution.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_detail_mem_breakdown_live.py # Figure 20 is output/dnn_mem_consumption_breakdown_live.pdf + +${CM_PYTHON_BIN_WITH_PATH} plot_detail_mem_breakdown_active.py # Figure 21 is output/dnn_mem_consumption_breakdown_active.pdf + +# Draw Figure 11 +${CM_PYTHON_BIN_WITH_PATH} overallPerf.py # Figure 11 is output/OverallPerfNew.pdf + +# Draw Figure 12 +${CM_PYTHON_BIN_WITH_PATH} overallBreakdown.py # Figure 12 is output/Breakdown.pdf + +# Draw Figure 13 +${CM_PYTHON_BIN_WITH_PATH} overallSlowdownCDF.py # Figure 13 is output/KernelTimeCDF.pdf + +# Draw Figure 14 +${CM_PYTHON_BIN_WITH_PATH} overallTraffic.py # Figure 14 is output/OverallTraffic.pdf + +# Draw Figure 15 +${CM_PYTHON_BIN_WITH_PATH} overallBatchSize.py # Figure 15 is output/OverallPerfBatchSize.pdf + +# Draw Figure 16 +${CM_PYTHON_BIN_WITH_PATH} sensitivityCPUMem.py # Figure 16 is output/OverallPerfCPUMem.pdf + +# Draw Figure 17 +${CM_PYTHON_BIN_WITH_PATH} sensitivityCPUMemCombined.py # Figure 17 is output/OverallPerfCPUMemCombined.pdf + +# Draw Figure 18 +${CM_PYTHON_BIN_WITH_PATH} sensitivitySSDbw.py # Figure 18 is output/OverallPerfSSDBW.pdf + +# Draw Figure 19 +${CM_PYTHON_BIN_WITH_PATH} SensitivityKernelVariation.py # Figure 19 is output/SensitivityVariation.pdf diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/run.bat b/script/reproduce-ieee-acm-micro2023-paper-38/run.bat new file mode 100644 index 0000000000..6c1274ce64 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/run.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +echo. +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-38/run.sh b/script/reproduce-ieee-acm-micro2023-paper-38/run.sh new file mode 100644 index 0000000000..6475bf30fd --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-38/run.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +if [ -z "$max_process_num" ]; then + printf "\033[0;31m<--max_process_num> is not specified. Please specify it using --max_process_num=[nthreads]\033[0m\n" + exit 1 +fi +echo "Max number of processes: ${max_process_num}" + +echo "Changing to G10 repo: ${CM_GIT_REPO_G10_CHECKOUT_PATH}" +cd "${CM_GIT_REPO_G10_CHECKOUT_PATH}" + +cd src +make clean +make -j"$(nproc)" + +cd resources +${CM_PYTHON_BIN_WITH_PATH} genconfigs.py + +tmux kill-server > /dev/null 2> /dev/null + +# First run experiments for figure 11-14 +./run.sh -p "(BERT\/256|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_(deepUM|prefetch_lru|FlashNeuron|G10GDSSSD|G10GDSFULL|lru)\.config" -dr -j $max_process_num +# The time for running this is about 104m33.975s (for max_process_num=6) + +# Then run experiments for figure 15 +./run.sh -p "(BERT\/(128|256|512|768|1024)|VIT\/(256|512|768|1024|1280)|Inceptionv3\/(512|768|1024|1280|1536|1792)|ResNet152\/(256|512|768|1024|1280)|SENet154\/(256|512|768|1024))-sim_(deepUM|prefetch_lru|FlashNeuron|lru)\.config" -dr -j $max_process_num +# The time for running this is about 155m11.104s (for max_process_num=6) + +# Then run experiments for figure 16 +./run.sh -p "(BERT\/(256|384|512|640)|VIT\/(768|1024|1280|1536)|Inceptionv3\/(512|1024|1280|1536)|ResNet152\/(768|1024|1280|1536)|SENet154\/(256|512|768|1024))-sim_prefetch_lru(-cpu(0|16|32|64|96|192|256))?\.config" -dr -j $max_process_num +# The time for running this is about 406m30.954s (for max_process_num=6) + +# Then run experiments for figure 17 +./run.sh -p "(VIT\/1024|Inceptionv3\/1280)-sim_(deepUM|prefetch_lru|FlashNeuron)-cpu(0|16|32|64|256)\.config" -dr -j $max_process_num +# The time for running this is about 24m8.144s (for max_process_num=6) + +# Then run experiments for figure 18 +./run.sh -p "(BERT\/512|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_(deepUM|prefetch_lru|FlashNeuron|lru)-ssd(6_4|12_8|19_2|25_6|32)-.*\.config" -dr -j $max_process_num +# The time for running this is about 354m40.747s (for max_process_num=6) + +# Then run experiments for figure 19 +./run.sh -p "(BERT\/256|VIT\/1280|Inceptionv3\/1536|ResNet152\/1280|SENet154\/1024)-sim_prefetch_lru-var0_(05|10|15|20|25)\.config" -dr -j $max_process_num +# The time for running this is about 124m17.909s (for max_process_num=6)] diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/README.md b/script/reproduce-ieee-acm-micro2023-paper-5/README.md new file mode 100644 index 0000000000..637717712e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/README.md @@ -0,0 +1,30 @@ +# CM script to run and reproduce experiments + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +## Install dependencies + +```bash +cmr "reproduce paper m2023 5 _install_deps" +``` + +## Run and create graphs + +```bash +cmr "reproduce paper m2023 5" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml new file mode 100644 index 0000000000..65a520d013 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/_cm.yaml @@ -0,0 +1,20 @@ +alias: reproduce-ieee-acm-micro2023-paper-5 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +tags: +- reproduce +- paper +- project +- micro +- micro-2023 +- m2023 +- '5' +uid: e3a42d0dc64b4f8f +variations: + install_deps: + script_name: install_deps + run: + script_name: run +versions: {} diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/customize.py b/script/reproduce-ieee-acm-micro2023-paper-5/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat new file mode 100644 index 0000000000..834ec600df --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.bat @@ -0,0 +1,4 @@ +rem native script + +echo "Windows is not supported yet" +exit /b 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh new file mode 100644 index 0000000000..322d4671b9 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/install_deps.sh @@ -0,0 +1,24 @@ +echo "================== Install Docker container (you can skip if already installed)==================" + +sudo apt-get update +sudo apt-get -y install \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + tar + +# Add Docker’s official GPG key +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt-get update +sudo apt-get -y install docker-ce docker-ce-cli containerd.io + +sudo usermod -aG docker $USER + +su - $USER diff --git a/script/reproduce-micro-paper-2023-victima/main.py b/script/reproduce-ieee-acm-micro2023-paper-5/main.py similarity index 100% rename from script/reproduce-micro-paper-2023-victima/main.py rename to script/reproduce-ieee-acm-micro2023-paper-5/main.py diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/run.bat b/script/reproduce-ieee-acm-micro2023-paper-5/run.bat new file mode 100644 index 0000000000..834ec600df --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/run.bat @@ -0,0 +1,4 @@ +rem native script + +echo "Windows is not supported yet" +exit /b 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-5/run.sh b/script/reproduce-ieee-acm-micro2023-paper-5/run.sh new file mode 100644 index 0000000000..071e755eb0 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-5/run.sh @@ -0,0 +1,41 @@ +echo "====================== Atifacts Evaluation for MICRO23 paper==========================" +echo "Sparse-DySta: Sparsity-Aware Dynamic and Static Scheduling for Sparse Multi-DNN Workloads" +container="docker" + +echo "================== Run a container test to make sure container works ==================" +${container} run docker.io/hello-world + +echo "=====================================================================================" + +echo "================== Pulling the Docker image to run the experiments ==================" +${container} pull hxfan/spar-dysta-micro23:ae + +echo "================== Creating Container to run the experiments ==================" +sudo ${container} run -it -d --name spar-dysta --gpus all hxfan/spar-dysta-micro23:ae /bin/bash # Create container + + +echo "================== Generate Figure-12, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/dysta_comparison_sanger_tradeoff_analysis.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Tradeoff_slo10.0.pdf . +echo "================== Generate Figure-12, CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/dysta_comparison_eyerissv2_tradeoff_analysis.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Tradeoff_slo10.0.pdf . + +echo "================== Generate Figure-13, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/effect_sparsity_sanger.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Sparsity_Effect30_sample1000_across_slo10.0_prema.pdf . +echo "================== Generate Figure-13 CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/effect_sparsity_eyerissv2.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Sparsity_Effect3_sample1000_across_slo10.0_prema.pdf . + + +echo "================== Generate Table5 & Figure-14, Attention ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/attnn/dysta_comparison_sanger_across_slo.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Metrics_rate30_sample1000_across_slo.pdf . +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/Sanger_Metrics_rate40_sample1000_across_slo.pdf . +echo "================== Generate Table5 & Figure-14 CNN ==================" +sudo ${container} exec --workdir /workspace/dysta-sparse/dysta_scheduler spar-dysta script/cnn/dysta_comparison_eyerissv2_across_slo.sh +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Metrics_rate3_sample1000_across_slo.pdf . +${container} cp -r spar-dysta:/workspace/dysta-sparse/dysta_scheduler/EyerissV2_Metrics_rate4_sample1000_across_slo.pdf . + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore b/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore new file mode 100644 index 0000000000..1377554ebe --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/README.md b/script/reproduce-ieee-acm-micro2023-paper-8/README.md new file mode 100644 index 0000000000..c0f9d185c6 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/README.md @@ -0,0 +1,74 @@ +# CM script to run and reproduce experiments + +Original repository: [https://github.com/FPSG-UIUC/micro23-teaal-artifact](https://github.com/FPSG-UIUC/micro23-teaal-artifact) + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install Python virtual environment via CM + +```bash +cm run script "install python-venv" --name=reproducibility +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=reproducibility" +``` + +## Run TeAAL via the CM interface + +To install dependencies, run: + +```bash +cmr "reproduce paper m 2023 8 _install_deps" +``` + +Note that the install script makes its best guess for the correct UID and GID +for the container to be using (the current user's UID and GID). If you would +like to change the UID and/or GID of the container, you can do so in the +artifact repository `/path/to//repo/docker-compose.yaml`. +Instructions for finding this repository are below. + +To check that the environment is correctly set up and evaluate each accelerator +configuration on a small example, run: + +```bash +cmr "reproduce paper m 2023 8 _check" +``` + +To run the real experiments, run: + +```bash +cmr "reproduce paper m 2023 8 _run" +``` + +To plot the results of the real experiments, run +```bash +cmr "reproduce paper m 2023 8 _plot" +``` + +The plots will be stored in the artifact repository at `/path/to//repo/data/plots`. Instructions for finding this repository are below. + +To plot pregenerated results (e.g., if you don't want to run the experiments +yourself), run: + +```bash +cmr "reproduce paper m 2023 8 _plot_pregenerated" +``` + +### Finding the Artifact Repository + +You can also find this directory via CM as follows: +```bash +cm show cache --tags=git,artifact,fpsg,teaal +``` +or +```bash +cm find cache --tags=git,artifact,fpsg,teaal +``` + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml new file mode 100644 index 0000000000..79cdc1fa33 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/_cm.yaml @@ -0,0 +1,40 @@ +alias: reproduce-ieee-acm-micro2023-paper-8 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +default_env: + CM_EXPERIMENT: '1' +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: get,git,repo,_repo.https://github.com/FPSG-UIUC/micro23-teaal-artifact + env: + CM_GIT_ENV_KEY: 'FPSG_UIUC_TEAAL' + extra_cache_tags: artifact,fpsg,uiuc,teaal +input_mapping: + experiment: CM_EXPERIMENT +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '8' +uid: 1f15f5f53c6d469a +variations: + install_deps: + script_name: install_deps + check: + script_name: check + run: + script_name: run + plot: + script_name: plot + plot_pregenerated: + script_name: plot_pregenerated diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/check.sh b/script/reproduce-ieee-acm-micro2023-paper-8/check.sh new file mode 100644 index 0000000000..edec77ffe1 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/check.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/check.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/customize.py b/script/reproduce-ieee-acm-micro2023-paper-8/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat new file mode 100644 index 0000000000..47f7e7ce26 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.bat @@ -0,0 +1,18 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +if exist "%CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt" ( + + echo. + echo Installing requirements.txt ... + echo. + + %CM_PYTHON_BIN_WITH_PATH% -m pip install -r %CM_TMP_CURRENT_SCRIPT_PATH%\requirements.txt + IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% +) diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh new file mode 100644 index 0000000000..15c20da89d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/install_deps.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +# We install python venv via CM and cache it inside CM cache +# Path to python from venv will be in ${CM_PYTHON_BIN_WITH_PATH} +#python3 -m venv env +#source env/bin/activate + +${CM_PYTHON_BIN_WITH_PATH} -m pip install -r scripts/cm-requirements.txt + +cd scripts + +${CM_PYTHON_BIN_WITH_PATH} install_deps.py + +docker-compose > /dev/null 2> /dev/null +if [ $? -ne 0 ] +then + sh install_docker.sh +fi + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/main.py b/script/reproduce-ieee-acm-micro2023-paper-8/main.py new file mode 100644 index 0000000000..d851f1450f --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/main.py @@ -0,0 +1,10 @@ +import os + +if __name__ == "__main__": + + print ('') + print ('Main script:') + print ('Experiment: {}'.format(os.environ.get('CM_EXPERIMENT',''))) + print ('') + + exit(0) diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat b/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat new file mode 100644 index 0000000000..7e786771ae --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +rem echo. +rem %CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +rem IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh new file mode 100644 index 0000000000..8c11c44a29 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" +echo "ENV CM_EXPERIMENT: ${CM_EXPERIMENT}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" + +docker-compose run cl scripts/plot.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh b/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh new file mode 100644 index 0000000000..9980e7ea43 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/plot_pregenerated.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/plot_pregenerated.sh + +test $? -eq 0 || exit 1 + diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/run.bat b/script/reproduce-ieee-acm-micro2023-paper-8/run.bat new file mode 100644 index 0000000000..6c1274ce64 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/run.bat @@ -0,0 +1,12 @@ +@echo off + +set CUR_DIR=%cd% + +echo. +echo Current execution path: %CUR_DIR% +echo Path to script: %CM_TMP_CURRENT_SCRIPT_PATH% +echo ENV CM_EXPERIMENT: %CM_EXPERIMENT% + +echo. +%CM_PYTHON_BIN_WITH_PATH% %CM_TMP_CURRENT_SCRIPT_PATH%\main.py +IF %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% diff --git a/script/reproduce-ieee-acm-micro2023-paper-8/run.sh b/script/reproduce-ieee-acm-micro2023-paper-8/run.sh new file mode 100644 index 0000000000..b2c7c1e3c8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-8/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH}" +cd ${CM_GIT_REPO_FPSG_UIUC_TEAAL_CHECKOUT_PATH} + +docker-compose run cl scripts/run.sh + +test $? -eq 0 || exit 1 diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile b/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile new file mode 100644 index 0000000000..62c2dcdae5 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/Dockerfile @@ -0,0 +1,28 @@ +#Bootstrap: docker +From ubuntu:20.04 + +#%post + RUN mkdir /root/artifact_evaluation + RUN apt-get -y clean + RUN apt-get -y update + RUN apt-get -y install python3 build-essential + RUN apt-get -y install git + RUN apt-get -y install vim pip + RUN pip install numpy + WORKDIR /root/artifact_evaluation + RUN git clone https://github.com/lchangxii/sampled-mgpu-sim.git + RUN git clone https://github.com/lchangxii/akita.git + RUN git clone https://github.com/lchangxii/dnn.git + RUN apt-get -y install wget + RUN wget https://go.dev/dl/go1.20.1.linux-amd64.tar.gz + RUN tar -xvzf go1.20.1.linux-amd64.tar.gz + ENV PATH="/root/artifact_evaluation/go/bin:$PATH" + ENV HOME /root + RUN git clone https://github.com/lchangxii/micro2023_figures.git + RUN pip install pandas + RUN pip install matplotlib + RUN pip install openpyxl +#%environment +#export PATH=/opt/riscv/:$PATH + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/README.md b/script/reproduce-ieee-acm-micro2023-paper-85/README.md new file mode 100644 index 0000000000..05954766f3 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/README.md @@ -0,0 +1,40 @@ +# CM script to run and reproduce experiments + +Original repository: https://github.com/lchangxii/photon + + +### Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). + +Install reusable MLCommons automations: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` + +### Run Photon via CM interface + +Perform the following steps to evaluate Photon with MLCommons CM automation language: + +1) This command will install system dependencies for Docker and require sudo (skip it if you have Docker installed): +```bash +cmr "reproduce project m 2023 photon _install_deps" +``` + +2) This command will prepare and run all experiments via Docker: + +```bash +cmr "reproduce project m 2023 photon _run" +``` + +3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the article: + +```bash +cmr "reproduce project m 2023 photon _plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml new file mode 100644 index 0000000000..392e396b7e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/_cm.yaml @@ -0,0 +1,30 @@ +alias: reproduce-ieee-acm-micro2023-paper-85 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +script_name: run +tags: +- reproduce +- project +- paper +- micro +- micro-2023 +- m +- '2023' +- '85' +- photon +uid: 9e0b8254b62c4349 +variations: + install_deps: + script_name: install_deps + plot: + script_name: plot + run: + script_name: run diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/customize.py b/script/reproduce-ieee-acm-micro2023-paper-85/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh new file mode 100644 index 0000000000..04998192fd --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/install_deps.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + +container="docker" + + +if [ "${container}" = "docker" ]; then + + echo "================== Install Docker container (you can skip if already installed)==================" + + sudo apt-get update + sudo apt-get -y install \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + tar + + # Add Docker’s official GPG key + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get -y install docker-ce docker-ce-cli containerd.io + + sudo usermod -aG docker $USER + + su - $USER + +else + +echo "================== Install Podman container (you can skip if already installed)==================" + +sudo apt-get update +sudo apt-get -y install podman +su - $USER + +fi diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh new file mode 100644 index 0000000000..b3c8f18d1e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/plot.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + + + + + + +print_colorful_text() { + local text="$1" + local color_code="$2" + echo "\e[${color_code}m${text}\e[0m" +} + +container="docker" +image="micro2023-photon" + +echo "================== Run a container test to make sure container works ==================" + +#${container} run docker.io/hello-world + + +echo "================== Build the Docker image to run the experiments ==================" + +#${container} build -t ${image} -f "${CM_TMP_CURRENT_SCRIPT_PATH}/Dockerfile" . + +echo "================== Get All Results ==================" + +mkdir figures +##get all benchmarks +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -check;cd /root/artifact_evaluation/micro2023_figures/r9nano;./r9nano.py;./r9nanolevels.py;mv *.png /root/figures/;mv *.pdf /root/figures/" + +##get all benchmarks with architecture mi100 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -arch=mi100 -check;cd /root/artifact_evaluation/micro2023_figures/mi100;./mi100.py;mv *.pdf /root/figures/;mv *.png /root/figures" +# +###vgg16 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg16 -check;cd /root/artifact_evaluation/micro2023_figures/vgg16;./vgg16.py;./vgg16speedup.py;mv *.pdf /root/figures/;mv *.png /root/figures" +###vgg19 +echo "Benchmarks MGPUSim-Simtime MGPUSim-Walltime Photon-Simtime Photon-Walltime" +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg19 -check |grep Sum |awk -F Sum '{ printf \"vgg19\";print \$2}' " +###resnet18 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet18 -check |grep Sum|awk -F Sum '{printf \"resnet18\";print \$2}'" +####resnet32 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet32 -check |grep Sum|awk -F Sum '{printf \"resnet32\";print \$2}'" +####resnet50 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet50 -check|grep Sum |awk -F Sum '{printf \"resnet50\";print \$2}'" +####resnet101 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet101 -check|grep Sum|awk -F Sum '{printf \"resnet101\";print \$2}'" +####resnet152 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet152 -check|grep Sum|awk -F Sum '{printf \"resnet152\";print \$2}'" +## +#### +${container} run --rm -v $PWD/gpudata:/root/gpudata/ -v $PWD/figures:/root/figures/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testpagerank.py -check|grep pagerank|grep -v __pagerank" diff --git a/script/reproduce-ieee-acm-micro2023-paper-85/run.sh b/script/reproduce-ieee-acm-micro2023-paper-85/run.sh new file mode 100644 index 0000000000..885b63322a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-85/run.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + + + +print_colorful_text() { + local text="$1" + local color_code="$2" + echo "\e[${color_code}m${text}\e[0m" +} + +container="docker" +image="micro2023-photon" + +echo "================== Run a container test to make sure container works ==================" + +${container} run docker.io/hello-world + + +echo "================== Build the Docker image to run the experiments ==================" + +${container} build -t ${image} -f "${CM_TMP_CURRENT_SCRIPT_PATH}/Dockerfile" . + +echo "================== Execute all benchmarks ==================" +mkdir gpudata +##run all benchmarks +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py" + +##run all benchmarks with architecture mi100 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testallbench.py -arch=mi100" + +##vgg16 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg16" +##vgg19 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=vgg19" +##resnet18 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet18" +##resnet32 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet32" +##resnet50 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet50" +##resnet101 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet101" +##resnet152 +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testdlapps.py -bench=resnet152" +##pagerank +${container} run --rm -v $PWD/gpudata:/root/gpudata/ ${image} /bin/bash -c "cd /root/artifact_evaluation/sampled-mgpu-sim/samples/sampledrunner;./testpagerank.py" + + diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh new file mode 100644 index 0000000000..1cb9d45d60 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_build_onikiri.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd onikiri2/project/gcc/ +make -j$(nproc) +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh new file mode 100644 index 0000000000..0a6d2af25e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_create_binary.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd raytracing.github.io/build_micro2023_ae/ +sed s@~@../../../../ClockhandsEvaluation/A-riscv@ -i common.mk +make +cd ../../ +cp raytracing.github.io/build_micro2023_ae/InOneWeekend/a.out onikiri2/benchmark/RayTracing/riscv64/bin/InOneWeekend +cp raytracing.github.io/build_micro2023_ae/TheNextWeek/a.out onikiri2/benchmark/RayTracing/riscv64/bin/TheNextWeek +cp raytracing.github.io/build_micro2023_ae/TheRestOfYourLife/a.out onikiri2/benchmark/RayTracing/riscv64/bin/TheRestOfYourLife diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh new file mode 100644 index 0000000000..f4b7c0d2ff --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +cd onikiri2/tool/AutoRunTools/ +sed s@/path/to@$(realpath ../../../)@ -i cfg.xml + +# You can change this! +GigaInsns=1 + +echo "Register lifetimes experiment for $GigaInsns giga instructions." +echo "It will take $(echo $GigaInsns \* 4 | bc) minutes." +echo "You can change the number of instructions to evaluate by modifying $BASH_SOURCE" +sed '115 s@".*"@"'"$GigaInsns"'G"@' -i cfg.xml + +perl enqueue.pl -t +cd result/001/sh/exec/ +for i in *.sh; do sh $i & PID="$PID $!"; done +wait $PID +cd ../../../../ +perl summary.pl +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh new file mode 100644 index 0000000000..9f70db2ee1 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_experiment_setup.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +sed '59,74d' -i onikiri2/tool/AutoRunTools/cfg.xml diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh new file mode 100644 index 0000000000..cf0ee26fab --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/Preliminary_plot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsPreliminaryExperiments/ + +echo "" +echo "Please go to $(pwd) and check ClockhandsPreliminaryExperiments*.xlsx ." +echo "The procedure of generating charts are described on them." diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/README.md b/script/reproduce-ieee-acm-micro2023-paper-87/README.md new file mode 100644 index 0000000000..787326bc82 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/README.md @@ -0,0 +1,49 @@ +# CM script to run and reproduce experiments + +Archived artifact: https://zenodo.org/record/8218698 + +## Reusability using MLCommons CM automation language + +Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md). +Note that you need run the following command to install CM automation scripts: + +```bash +cm pull repo mlcommons@ck +``` + +Install this repository with CM interface for reproduced experiments: + +```bash +cm pull repo ctuning@cm4research +``` + +## Install deps + +To install dependencies, run: + +```bash +cmr "reproduce paper micro-2023 clockhands _install_deps" +``` + +## Run + +```bash +cmr "reproduce paper micro-2023 clockhands _build_compiler" +cmr "reproduce paper micro-2023 clockhands _create_binary" +cmr "reproduce paper micro-2023 clockhands _build_onikiri" +cmr "reproduce paper micro-2023 clockhands _experiment_setup" +cmr "reproduce paper micro-2023 clockhands _experiment" +cmr "reproduce paper micro-2023 clockhands _Preliminary_build_onikiri" +cmr "reproduce paper micro-2023 clockhands _Preliminary_create_binary" +cmr "reproduce paper micro-2023 clockhands _Preliminary_experiment_setup" +cmr "reproduce paper micro-2023 clockhands _Preliminary_experiment" +``` + +## Plot + +To plot the results of the real experiments, run + +```bash +cmr "reproduce paper micro-2023 clockhands _plot" +cmr "reproduce paper micro-2023 clockhands _Preliminary_plot" +``` diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml new file mode 100644 index 0000000000..869258b3e8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/_cm.yaml @@ -0,0 +1,55 @@ +alias: reproduce-ieee-acm-micro2023-paper-87 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +category: Reproducibility and artifact evaluation +deps: +- tags: detect,os +- names: + - python + - python3 + tags: get,python +- tags: download-and-extract,_extract,_url.https://zenodo.org/record/8218698/files/Clockhands_Artifact_MICRO2023.tar?download=1 + env: + CM_DOWNLOAD_FINAL_ENV_NAME: CM_ARTIFACT_CLOCKHANDS + CM_EXTRACT_FINAL_ENV_NAME: CM_ARTIFACT_CLOCKHANDS_EXTRACTED +# CM_DOWNLOAD_CHECKSUM: + force_cache: true + extra_cache_tags: reproduce,paper,artifact,micro,clockhands +tags: +- reproduce +- project +- paper +- m +- micro +- micro-2023 +- '2023' +- '87' +- clockhands +- Clockhands +uid: bd56037bf32c4b71 +variations: + install_deps: + script_name: install_deps + build_compiler: + script_name: build_compiler + create_binary: + script_name: create_binary + build_onikiri: + script_name: build_onikiri + experiment_setup: + script_name: experiment_setup + experiment: + script_name: experiment + plot: + script_name: plot + Preliminary_build_onikiri: + script_name: Preliminary_build_onikiri + Preliminary_create_binary: + script_name: Preliminary_create_binary + Preliminary_experiment_setup: + script_name: Preliminary_experiment_setup + Preliminary_experiment: + script_name: Preliminary_experiment + Preliminary_plot: + script_name: Preliminary_plot diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh b/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh new file mode 100644 index 0000000000..4a43299a05 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/build_compiler.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + + +cd A-riscv/ + +git clone https://github.com/riscv-collab/riscv-gnu-toolchain +cd riscv-gnu-toolchain/ +git checkout 2022.01.17 +CFLAGS="-O2 -static" ./configure --prefix=$(realpath ../riscv_gcc111) --with-arch=rv64g +make linux -j$(nproc) +make -j$(nproc) +cd ../ + +cd musl/ +CC=../riscv_gcc111/bin/riscv64-unknown-linux-gnu-gcc CROSS_COMPILE=../riscv_gcc111/bin/riscv64-unknown-linux-gnu- ./configure --prefix=$(realpath ../musl-gcc) --target=riscv64 +make -j$(nproc) +make install +cd ../../ + +wget https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.1/clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz +tar xf clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz +mv clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu- clang+llvm-12.0.1-x86_64-linux-gnu-ubuntu-16.04 diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh b/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh new file mode 100644 index 0000000000..cb0de224b8 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/build_onikiri.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ +cd onikiri2/project/gcc/ +make -j$(nproc) +cd ../../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh b/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh new file mode 100644 index 0000000000..aaf0ebb50d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/create_binary.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +sed s@~@..@ -i A-riscv/stuff/make.inc +cd A-riscv/coremark/ +make +cd ../../ + +cd B-straight/toolchain/Test/coremark/ +make +cd ../../../../ + +cd C-clockhands/coremark/ +make +cd ../../ diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh b/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh new file mode 100644 index 0000000000..669eaa0d2b --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/experiment.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cd evaluation/ +make -j$(nproc) diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh b/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh new file mode 100644 index 0000000000..c112258a9a --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/experiment_setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cp A-riscv/coremark/rvbin/coremark.rvbin evaluation/0.coremark +cp B-straight/toolchain/Test/coremark/stbin/coremark.stbin evaluation/0.coremark +cp C-clockhands/coremark/chbin/coremark.chbin evaluation/0.coremark +cp onikiri2/project/gcc/onikiri2/a.out evaluation/onikiri2 diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh new file mode 100644 index 0000000000..2a8c9c7162 --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/install_deps.sh @@ -0,0 +1,4 @@ +echo "Install dependencies to build riscv-gcc." +sudo apt install autoconf automake autotools-dev curl python3 python3-pip libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev ninja-build git cmake libglib2.0-dev +echo "Install dependencies to make figures." +sudo apt install gnuplot diff --git a/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh new file mode 100644 index 0000000000..c6f2910a3e --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-87/plot.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +CUR_DIR=${PWD} + +echo "" +echo "Current execution path: ${CUR_DIR}" +echo "Path to script: ${CM_TMP_CURRENT_SCRIPT_PATH}" + +echo "${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}" + +cd ${CM_ARTIFACT_CLOCKHANDS_EXTRACTED}/Clockhands_Artifact_MICRO2023/ClockhandsEvaluation/ + +cd evaluation/ + +grep ExecutedCycles way*/*.xml | grep -v way[^v]*chbin | sort -V | sed -e 's/\(way[0-9]*\)-.*coremark./\1 /g' -e 's/bin.xml.*"\(.*\)"/ \1/' | awk 'NR==1{a=$3}NR%3==1{printf($1)}{printf(" "a/$3)}NR%3==0{print""}' > PerformanceImprovement.dat +echo 'set terminal png; set out "PerformanceImprovement.png"; set style histogram clustered; plot [] [0:2] "PerformanceImprovement.dat" using 2:xtic(1) with histogram title "R", "PerformanceImprovement.dat" using 3 with histogram title "S", "PerformanceImprovement.dat" using 4 with histogram title "C";' | gnuplot + +grep Retirer -B3 way8-*/*.xml | grep NumOpCode | grep -v way[^v]*chbin | sed 'y/",/ /' | awk 'NR==1{for(i=3;i<37;++i){a+=$(i)}}{for(i=3;i<37;++i){$(i)/=a}}{print (NR==1?"R":NR==2?"S":"C"),$4+$5,$9,$7,$10+$20,$11+$21,$14+$15,$16+$17,$22+$23+$24+$25+$26+$27+$28+$29,$13,$33,$30+$31}' > InstructionBreakdown.dat +echo 'set terminal png; set out "InstructionBreakdown.png"; set style histogram rowstacked; set key invert; plot "InstructionBreakdown.dat" using 2:xtic(1) with histogram title "Call+Ret", "InstructionBreakdown.dat" using 3 with histogram title "Jump", "InstructionBreakdown.dat" using 4 with histogram title "CondBr", "InstructionBreakdown.dat" using 5 with histogram title "Load", "InstructionBreakdown.dat" using 6 with histogram title "Store", "InstructionBreakdown.dat" using 7 with histogram title "ALU", "InstructionBreakdown.dat" using 8 with histogram title "Mul+Div", "InstructionBreakdown.dat" using 9 with histogram title "FLOPs", "InstructionBreakdown.dat" using 10 with histogram title "Move", "InstructionBreakdown.dat" using 11 with histogram title "NOP", "InstructionBreakdown.dat" using 12 with histogram title "Others";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep 'Register.*Frequency' skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR!=1{for(s=t=u=v=i=0;i<16;++i){s+=$(2+i);t+=$(18+i);u+=$(34+i);v+=$(50+i)}print (NR==2?"Write":"Read"),s/insns,t/insns,u/insns,v/insns,(NR==2?(insns-s-t-u-v)/insns:0)}' > HandBreakdown.dat +echo 'set terminal png; set out "HandBreakdown.png"; set style histogram rowstacked; set key invert; plot "HandBreakdown.dat" using 2:xtic(1) with histogram title "s hand", "HandBreakdown.dat" using 3 with histogram title "t hand", "HandBreakdown.dat" using 4 with histogram title "u hand", "HandBreakdown.dat" using 5 with histogram title "v hand", "HandBreakdown.dat" using 6 with histogram title "no dst hand";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep LifetimeDistributionKey skip-result/*.chbin.xml) <(grep LifetimeDistributionCount skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR>2{sum=1e-300;for(i=699;i>1;--i){sum+=$(i);b[NR][i]=sum/insns}}END{for(i=2;i<700;++i){print a[i],b[3][i],b[4][i],b[5][i],b[6][i]}}' > LifetimeByHand.dat +echo 'set terminal png; set out "LifetimeByHand.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "LifetimeByHand.dat" using 1:2 with line title "v", "LifetimeByHand.dat" using 1:3 with line title "u", "LifetimeByHand.dat" using 1:4 with line title "t", "LifetimeByHand.dat" using 1:5 with line title "s";' | gnuplot + +cat <(grep SkippedInsns skip-result/*.rvbin.xml) <(grep LifetimeDistributionKey skip-result/*.rvbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.rvbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-RV.dat +echo 'set terminal png; set out "Lifetime-RV.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-RV.dat" using 1:2 with line title "RV";' | gnuplot +cat <(grep SkippedInsns skip-result/*.stbin.xml) <(grep LifetimeDistributionKey skip-result/*.stbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.stbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-ST.dat +echo 'set terminal png; set out "Lifetime-ST.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-ST.dat" using 1:2 with line title "ST";' | gnuplot +cat <(grep SkippedInsns skip-result/*.chbin.xml) <(grep LifetimeDistributionKey skip-result/*.chbin.xml) <(grep LifetimeDistributionCountAll skip-result/*.chbin.xml) | sed 'y/",/ /' | awk 'NR==1{insns=$2}NR==2{for(i=2;i<700;++i){a[i]=$(i)}}NR==3{for(i=699;i>1;--i){sum+=$(i);print a[i],sum/insns}}' > Lifetime-CH.dat +echo 'set terminal png; set out "Lifetime-CH.png"; set logscale x; set logscale y; plot [1:1e6] [1e-6:1] "Lifetime-CH.dat" using 1:2 with line title "CH";' | gnuplot + +echo "see $(pwd)/*.png!" diff --git a/script/reproduce-micro-paper-2023-victima/README-extra.md b/script/reproduce-ieee-acm-micro2023-paper-96/README.md similarity index 64% rename from script/reproduce-micro-paper-2023-victima/README-extra.md rename to script/reproduce-ieee-acm-micro2023-paper-96/README.md index b4c01e1338..68c190378a 100644 --- a/script/reproduce-micro-paper-2023-victima/README-extra.md +++ b/script/reproduce-ieee-acm-micro2023-paper-96/README.md @@ -10,35 +10,36 @@ Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/ma Install reusable MLCommons automations: ```bash -cm pull repo mlcommons@cm4mlops --checkout=dev +cm pull repo mlcommons@ck ``` -### Run Victima via CM interface - -The core CM script for Victima will be available under ```/CM/repos/mlcommons@cm4mlops/script/reproduce-micro-2023-paper-victima``` +Install this repository with CM interface for reproduced experiments: +```bash +cm pull repo ctuning@cm4research +``` -It is described by `_cm.yaml` and several native scripts. +### Run Victima via CM interface Perform the following steps to evaluate Victima with MLCommons CM automation language: 1) This command will install system dependencies for Docker and require sudo (skip it if you have Docker installed): ```bash -cmr "reproduce paper micro 2023 victima _install_deps" +cmr "reproduce project m 2023 victima _install_deps" ``` 2) This command will prepare and run all experiments via Docker: ```bash -cmr "reproduce paper micro 2023 victima _run" +cmr "reproduce project m 2023 victima _run" ``` You can specify --job_manager and --container if needed: ```bash -cmr "reproduce paper micro 2023 victima _run" --job_manager=native|slurm --contianer=docker|podman +cmr "reproduce project m 2023 victima _run" --job_manager=native|slurm --contianer=docker|podman ``` 3) In case of successful execution of a previous command, this command will generate plots to help you validate results from the article: ```bash -cmr "reproduce paper micro 2023 victima _plot" +cmr "reproduce project m 2023 victima _plot" ``` diff --git a/script/reproduce-micro-paper-2023-victima/_cm.yaml b/script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml similarity index 84% rename from script/reproduce-micro-paper-2023-victima/_cm.yaml rename to script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml index d20e5436a4..7daa9e6635 100644 --- a/script/reproduce-micro-paper-2023-victima/_cm.yaml +++ b/script/reproduce-ieee-acm-micro2023-paper-96/_cm.yaml @@ -1,4 +1,4 @@ -alias: reproduce-micro-paper-2023-victima +alias: reproduce-ieee-acm-micro2023-paper-96 automation_alias: script automation_uid: 5b4e0237da074764 cache: false @@ -15,7 +15,7 @@ deps: - tags: get,git,repo,_repo.https://github.com/CMU-SAFARI/Victima env: CM_GIT_ENV_KEY: 'CMU_SAFARI_VICTIMA' - extra_cache_tags: micro23,artifact,ae,cmu,safari,victima + extra_cache_tags: artifact,cmu,safari,victima input_mapping: job_manager: CM_VICTIMA_JOB_MANAGER container: CM_VICTIMA_CONTAINER @@ -24,8 +24,13 @@ tags: - reproduce - project - paper +- m - micro - micro-2023 +- '2023' +- '96' +- cmu +- safari - victima uid: fc5bee3426174e7b variations: diff --git a/script/reproduce-ieee-acm-micro2023-paper-96/customize.py b/script/reproduce-ieee-acm-micro2023-paper-96/customize.py new file mode 100644 index 0000000000..d12f9b3e1d --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-96/customize.py @@ -0,0 +1,22 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + return {'return':0} diff --git a/script/reproduce-micro-paper-2023-victima/install_deps.sh b/script/reproduce-ieee-acm-micro2023-paper-96/install_deps.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/install_deps.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/install_deps.sh diff --git a/script/reproduce-ieee-acm-micro2023-paper-96/main.py b/script/reproduce-ieee-acm-micro2023-paper-96/main.py new file mode 100644 index 0000000000..d851f1450f --- /dev/null +++ b/script/reproduce-ieee-acm-micro2023-paper-96/main.py @@ -0,0 +1,10 @@ +import os + +if __name__ == "__main__": + + print ('') + print ('Main script:') + print ('Experiment: {}'.format(os.environ.get('CM_EXPERIMENT',''))) + print ('') + + exit(0) diff --git a/script/reproduce-micro-paper-2023-victima/plot.sh b/script/reproduce-ieee-acm-micro2023-paper-96/plot.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/plot.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/plot.sh diff --git a/script/reproduce-micro-paper-2023-victima/run.sh b/script/reproduce-ieee-acm-micro2023-paper-96/run.sh similarity index 100% rename from script/reproduce-micro-paper-2023-victima/run.sh rename to script/reproduce-ieee-acm-micro2023-paper-96/run.sh diff --git a/script/reproduce-micro-paper-2023-victima/README.md b/script/reproduce-micro-paper-2023-victima/README.md deleted file mode 100644 index fa2adfdf7a..0000000000 --- a/script/reproduce-micro-paper-2023-victima/README.md +++ /dev/null @@ -1 +0,0 @@ -Please see [https://docs.mlcommons.org/cm4mlops/scripts/Reproducibility-and-artifact-evaluation/reproduce-micro-paper-2023-victima](https://docs.mlcommons.org/cm4mlops/scripts/Reproducibility-and-artifact-evaluation/reproduce-micro-paper-2023-victima) for the documentation of this CM script. diff --git a/script/reproduce-micro-paper-2023-xyz/README.md b/script/reproduce-micro-paper-2023-xyz/README.md deleted file mode 100644 index 0b4f7dcaad..0000000000 --- a/script/reproduce-micro-paper-2023-xyz/README.md +++ /dev/null @@ -1,178 +0,0 @@ -
-Click here to see the table of contents. - -* [About](#about) -* [Summary](#summary) -* [Reuse this script in your project](#reuse-this-script-in-your-project) - * [ Install CM automation language](#install-cm-automation-language) - * [ Check CM script flags](#check-cm-script-flags) - * [ Run this script from command line](#run-this-script-from-command-line) - * [ Run this script from Python](#run-this-script-from-python) - * [ Run this script via GUI](#run-this-script-via-gui) - * [ Run this script via Docker (beta)](#run-this-script-via-docker-(beta)) -* [Customization](#customization) - * [ Variations](#variations) - * [ Script flags mapped to environment](#script-flags-mapped-to-environment) - * [ Default environment](#default-environment) -* [Script workflow, dependencies and native scripts](#script-workflow-dependencies-and-native-scripts) -* [Script output](#script-output) -* [New environment keys (filter)](#new-environment-keys-(filter)) -* [New environment keys auto-detected from customize](#new-environment-keys-auto-detected-from-customize) -* [Maintainers](#maintainers) - -
- -*Note that this README is automatically generated - don't edit!* - -### About - - -See extra [notes](README-extra.md) from the authors and contributors. - -#### Summary - -* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* -* GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz)* -* CM meta description for this script: *[_cm.yaml](_cm.yaml)* -* CM "database" tags to find this script: *reproduce,paper,micro,micro-2023,victima* -* Output cached? *False* -___ -### Reuse this script in your project - -#### Install CM automation language - -* [Installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -* [CM intro](https://doi.org/10.5281/zenodo.8105339) - -#### Pull CM repository with this automation - -```cm pull repo mlcommons@cm4mlops --checkout=dev``` - - -#### Run this script from command line - -1. `cm run script --tags=reproduce,paper,micro,micro-2023,victima[,variations] [--input_flags]` - -2. `cmr "reproduce paper micro micro-2023 victima[ variations]" [--input_flags]` - -* `variations` can be seen [here](#variations) - -* `input_flags` can be seen [here](#script-flags-mapped-to-environment) - -#### Run this script from Python - -
-Click here to expand this section. - -```python - -import cmind - -r = cmind.access({'action':'run' - 'automation':'script', - 'tags':'reproduce,paper,micro,micro-2023,victima' - 'out':'con', - ... - (other input keys for this script) - ... - }) - -if r['return']>0: - print (r['error']) - -``` - -
- - -#### Run this script via GUI - -```cmr "cm gui" --script="reproduce,paper,micro,micro-2023,victima"``` - -Use this [online GUI](https://cKnowledge.org/cm-gui/?tags=reproduce,paper,micro,micro-2023,victima) to generate CM CMD. - -#### Run this script via Docker (beta) - -`cm docker script "reproduce paper micro micro-2023 victima[ variations]" [--input_flags]` - -___ -### Customization - - -#### Variations - - * *No group (any variation can be selected)* -
- Click here to expand this section. - - * `_install_deps` - - Workflow: - * `_plot` - - Workflow: - * `_run` - - Workflow: - -
- - -#### Script flags mapped to environment -
-Click here to expand this section. - -* `--container=value` → `CM_VICTIMA_CONTAINER=value` -* `--job_manager=value` → `CM_VICTIMA_JOB_MANAGER=value` - -**Above CLI flags can be used in the Python CM API as follows:** - -```python -r=cm.access({... , "container":...} -``` - -
- -#### Default environment - -
-Click here to expand this section. - -These keys can be updated via `--env.KEY=VALUE` or `env` dictionary in `@input.json` or using script flags. - -* CM_VICTIMA_JOB_MANAGER: `native` -* CM_VICTIMA_CONTAINER: `docker` - -
- -___ -### Script workflow, dependencies and native scripts - -
-Click here to expand this section. - - 1. ***Read "deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml)*** - * detect,os - - CM script: [detect-os](https://github.com/mlcommons/cm4mlops/tree/main/script/detect-os) - * get,python - * CM names: `--adr.['python', 'python3']...` - - CM script: [get-python3](https://github.com/mlcommons/cm4mlops/tree/main/script/get-python3) - * get,git,repo,_repo.https://github.com/CMU-SAFARI/Victima - - CM script: [get-git-repo](https://github.com/mlcommons/cm4mlops/tree/main/script/get-git-repo) - 1. ***Run "preprocess" function from [customize.py](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/customize.py)*** - 1. Read "prehook_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) - 1. ***Run native script if exists*** - * [run.sh](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/run.sh) - 1. Read "posthook_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) - 1. ***Run "postrocess" function from [customize.py](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/customize.py)*** - 1. Read "post_deps" on other CM scripts from [meta](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz/_cm.yaml) -
- -___ -### Script output -`cmr "reproduce paper micro micro-2023 victima[,variations]" [--input_flags] -j` -#### New environment keys (filter) - -#### New environment keys auto-detected from customize - -___ -### Maintainers - -* [Open MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) \ No newline at end of file