From 832e55b48ebf10f9b35a3833ec571de5c9d5631f Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Fri, 23 Aug 2024 14:25:20 +0200 Subject: [PATCH] reference experiment results --- .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 563 ++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 34 + .../sysinfo.yaml | 560 ++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 49 ++ .../sysinfo.yaml | 558 ++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 36 + .../sysinfo.yaml | 544 +++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 541 +++++++++++++++ .../experiment.yaml | 6 + .../2024-08-22_12-54-54_1x_a100x/output.yaml | 8 + .../run.local.log | 15 + .../2024-08-22_12-54-54_1x_a100x/stdout.txt | 19 + .../2024-08-22_12-54-54_1x_a100x/sysinfo.yaml | 542 +++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 12 + .../stdout.txt | 19 + .../sysinfo.yaml | 510 +++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 606 +++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 617 ++++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 606 +++++++++++++++++ .../experiment.yaml | 6 + .../output.yaml | 8 + .../run.local.log | 15 + .../stdout.txt | 19 + .../sysinfo.yaml | 510 +++++++++++++++ 55 files changed, 6744 insertions(+) create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/sysinfo.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/experiment.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output.yaml create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/stdout.txt create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/sysinfo.yaml diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml new file mode 100644 index 0000000..94bc969 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml @@ -0,0 +1,6 @@ +comment: 1x A100 SXM4 80GB +experiment: vllm_llama_3_70b_instruct_awq +experiment_hash: exp_hash_v1:7aa490 +run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb +slug: 1x_a100_sxm4_80gb +timestamp: 2024-08-22_12-16-19 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml new file mode 100644 index 0000000..88b431d --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: 5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e +Describe justice system in UK vs USA in 2000-5000 words: 83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede +Describe schooling system in UK vs USA in 2000-5000 words: f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1 +Explain me some random problem for me in 2000-5000 words: 143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c +Tell me entire history of USA: 210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518 +Write a ballad. Pick a random theme.: 21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3 +Write an epic story about a dragon and a knight: 81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94 +Write an essay about being a Senior developer.: 0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log new file mode 100644 index 0000000..331afd3 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log @@ -0,0 +1,15 @@ +2024-08-22 12:16:19,452 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x A100 SXM4 80GB +2024-08-22 12:16:19,455 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log +2024-08-22 12:16:19,564 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-22 12:16:19,769 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-22 12:16:19,772 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-22 12:16:19,774 - __main__ - INFO - Syncing files to remote +2024-08-22 12:16:19,961 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output' stdout: '' stderr: '' status_code: 0 +2024-08-22 12:16:22,432 - __main__ - INFO - Setting up remote environment +2024-08-22 12:16:25,588 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 57ms\nPrepared 1 package in 2ms\nInstalled 108 packages in 473ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0 +2024-08-22 12:16:25,608 - __main__ - INFO - Gathering system info +2024-08-22 12:16:28,471 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:16:28,485 - __main__ - INFO - Running experiment code on remote +2024-08-22 12:20:56,768 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:16:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:16:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:16:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:16:36 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:17:10 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 10:17:16 gpu_executor.py:102] # GPU blocks: 6068, # CPU blocks: 819\nmodel loading took 46.38 seconds\nStarting 8 responses generation\n8 responses generation took 213.59 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-23-42_2x_a100_sxm4_40gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-23-42_2x_a100_sxm4_40gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-23-42_2x_a100_sxm4_40gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:24:25,130 - __main__ - INFO - Running experiment code on remote +2024-08-22 12:30:14,815 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/stdout.txt' stdout: "gpu_count=2\nStarting model loading\nINFO 08-22 10:24:31 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:24:31 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:24:31 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:24:32 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 32 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:24:32 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:32 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5\nINFO 08-22 10:24:33 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:24:40 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=, local_subscribe_port=48445, remote_subscribe_port=None)\nINFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:27:21 model_runner.py:732] Loading model weights took 18.5516 GB\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:27:22 model_runner.py:732] Loading model weights took 18.5516 GB\nINFO 08-22 10:27:27 distributed_gpu_executor.py:56] # GPU blocks: 5022, # CPU blocks: 1638\nmodel loading took 178.84 seconds\nStarting 8 responses generation\n8 responses generation took 160.77 seconds\n{'Count to 1000, skip unpopular numbers': 'ceff25d8303b1a21729c9c6685541976d5236852cac0ef5626e5cdf54f76e7be4504d7e21d8c6805258794ffb0ec1a6635486797c1ac098666be3f1605650b70',\n 'Describe justice system in UK vs USA in 2000-5000 words': '5efab605ab72f01ce1f06b22898d59584fe9395fe478029d3120c282332331dbdad870ce6a77771eddbfacc28bc09e58de88a53ae623610bf7dfda7f56d91b6e',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',\n 'Explain me some random problem for me in 2000-5000 words': 'c504642a6f6c90f2ad6d46b9e43ebfd8d055f76613df17609c360734069a9acd7e0c9cbc2f49b636d80c510adb84b04d962cb5d5d23a179fbf60873ed66fefa7',\n 'Tell me entire history of USA': '24bc6463cf8635c4ed91234966fe4c1013318f86de7e1071d44c3d84ca1dda2a891998d5ad2454ef6701e9173fa66f3d4a2655d541d9b08ff057b0bfa15e56fa',\n 'Write a ballad. Pick a random theme.': 'cbe4bcec1e75a20c03396126efc338bb10b49fa75e86494095b71d985f2f071ea611b3ca76781abe52ca9b14c3720f15459d00ec755e02ac1113adeb91cd73e5',\n 'Write an epic story about a dragon and a knight': '2fc7b6d29f3c45a25c2aa80295bb038239e6641b65eec7e108e560d399378b69556dfbfeb728a2e7fd7efd0173e1b0fd62ae7240f69f5c810c2a6671f26e197e',\n 'Write an essay about being a Senior developer.': '36cd178f3af54c90392779118a679a8905e3aeea8e4f471469886b61ab047b1c05beaef7e04d6f22eefe5dc6b0ca5ea1b1bbbe31a1961f7a3138b087734f5b09'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-23-42_2x_a100_sxm4_40gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-23-42_2x_a100_sxm4_40gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-23-42_2x_a100_sxm4_40gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00, local_subscribe_port=48445, remote_subscribe_port=None) +INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=842) INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=842) INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors'] +INFO 08-22 10:27:21 model_runner.py:732] Loading model weights took 18.5516 GB +(VllmWorkerProcess pid=842) INFO 08-22 10:27:22 model_runner.py:732] Loading model weights took 18.5516 GB +INFO 08-22 10:27:27 distributed_gpu_executor.py:56] # GPU blocks: 5022, # CPU blocks: 1638 +model loading took 178.84 seconds +Starting 8 responses generation +8 responses generation took 160.77 seconds +{'Count to 1000, skip unpopular numbers': 'ceff25d8303b1a21729c9c6685541976d5236852cac0ef5626e5cdf54f76e7be4504d7e21d8c6805258794ffb0ec1a6635486797c1ac098666be3f1605650b70', + 'Describe justice system in UK vs USA in 2000-5000 words': '5efab605ab72f01ce1f06b22898d59584fe9395fe478029d3120c282332331dbdad870ce6a77771eddbfacc28bc09e58de88a53ae623610bf7dfda7f56d91b6e', + 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd', + 'Explain me some random problem for me in 2000-5000 words': 'c504642a6f6c90f2ad6d46b9e43ebfd8d055f76613df17609c360734069a9acd7e0c9cbc2f49b636d80c510adb84b04d962cb5d5d23a179fbf60873ed66fefa7', + 'Tell me entire history of USA': '24bc6463cf8635c4ed91234966fe4c1013318f86de7e1071d44c3d84ca1dda2a891998d5ad2454ef6701e9173fa66f3d4a2655d541d9b08ff057b0bfa15e56fa', + 'Write a ballad. Pick a random theme.': 'cbe4bcec1e75a20c03396126efc338bb10b49fa75e86494095b71d985f2f071ea611b3ca76781abe52ca9b14c3720f15459d00ec755e02ac1113adeb91cd73e5', + 'Write an epic story about a dragon and a knight': '2fc7b6d29f3c45a25c2aa80295bb038239e6641b65eec7e108e560d399378b69556dfbfeb728a2e7fd7efd0173e1b0fd62ae7240f69f5c810c2a6671f26e197e', + 'Write an essay about being a Senior developer.': '36cd178f3af54c90392779118a679a8905e3aeea8e4f471469886b61ab047b1c05beaef7e04d6f22eefe5dc6b0ca5ea1b1bbbe31a1961f7a3138b087734f5b09'} diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml new file mode 100644 index 0000000..07f1b3d --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml @@ -0,0 +1,560 @@ +cuda: + cuda: '12.1' + cudnn: 90100 +machine: + cpu: + clocks: + - 1500.0 + - 1500.0 + - 2122.33 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 3719.05 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1487.609 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1486.393 + - 1500.0 + - 1500.0 + - 1500.0 + - 2116.689 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 3100.0 + - 1500.0 + - 1500.0 + - 1497.615 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 3100.0 + - 1500.0 + - 1500.0 + - 3100.0 + - 1500.0 + - 1500.0 + - 3100.0 + - 3709.581 + - 2121.854 + - 2200.0 + - 1500.0 + - 3100.0 + - 1500.0 + - 1500.0 + - 1500.0 + - 3100.0 + - 2170.196 + - 1500.0 + - 1486.641 + count: 64 + model: AMD EPYC 9384X 32-Core Processor + docker_support: + nvidia: false + runc: false + gpu: + count: 2 + details: + - capacity: '40960' + cuda: '8.0' + driver: 560.28.03 + graphics_speed: '210' + memory_speed: '1215' + name: NVIDIA A100-SXM4-40GB + power_limit: '400.00' + - capacity: '40960' + cuda: '8.0' + driver: 560.28.03 + graphics_speed: '210' + memory_speed: '1215' + name: NVIDIA A100-SXM4-40GB + power_limit: '400.00' + hard_disk: + free: 77360568 + total: 83886080 + used: 6525512 + os: Ubuntu 22.04.4 LTS + ram: + available: 387095904 + free: 129260144 + total: 395784328 + used: 266524184 +python: + packages: + - aiohappyeyeballs==2.4.0 + - aiohttp==3.10.5 + - aiosignal==1.3.1 + - annotated-types==0.7.0 + - anyio==4.4.0 + - attrs==24.2.0 + - certifi==2024.7.4 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.30.2 + - datasets==2.21.0 + - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl + - dill==0.3.8 + - diskcache==5.6.3 + - distro==1.9.0 + - fastapi==0.112.1 + - filelock==3.15.4 + - frozenlist==1.4.1 + - fsspec==2024.6.1 + - h11==0.14.0 + - httpcore==1.0.5 + - httptools==0.6.1 + - httpx==0.27.0 + - huggingface-hub==0.24.6 + - idna==3.7 + - interegular==0.3.3 + - jinja2==3.1.4 + - jiter==0.5.0 + - jsonschema==4.23.0 + - jsonschema-specifications==2023.12.1 + - lark==1.2.2 + - llvmlite==0.43.0 + - lm-format-enforcer==0.10.3 + - markupsafe==2.1.5 + - mpmath==1.3.0 + - msgpack==1.0.8 + - multidict==6.0.5 + - multiprocess==0.70.16 + - nest-asyncio==1.6.0 + - networkx==3.3 + - ninja==1.11.1.1 + - numba==0.60.0 + - numpy==1.26.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==9.1.0.70 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-ml-py==12.560.30 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.20 + - nvidia-nvtx-cu12==12.1.105 + - openai==1.42.0 + - outlines==0.0.46 + - packaging==24.1 + - pandas==2.2.2 + - pillow==10.4.0 + - prometheus-client==0.20.0 + - prometheus-fastapi-instrumentator==7.0.0 + - protobuf==5.27.3 + - psutil==6.0.0 + - py-cpuinfo==9.0.0 + - pyairports==2.1.1 + - pyarrow==17.0.0 + - pycountry==24.6.1 + - pydantic==2.8.2 + - pydantic-core==2.20.1 + - python-dateutil==2.9.0.post0 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - ray==2.34.0 + - referencing==0.35.1 + - regex==2024.7.24 + - requests==2.32.3 + - rpds-py==0.20.0 + - safetensors==0.4.4 + - sentencepiece==0.2.0 + - setuptools==73.0.1 + - six==1.16.0 + - sniffio==1.3.1 + - starlette==0.38.2 + - sympy==1.13.2 + - tiktoken==0.7.0 + - tokenizers==0.19.1 + - torch==2.4.0 + - torchvision==0.19.0 + - tqdm==4.66.5 + - transformers==4.44.1 + - triton==3.0.0 + - typing-extensions==4.12.2 + - tzdata==2024.1 + - urllib3==2.2.2 + - uvicorn==0.30.6 + - uvloop==0.20.0 + - vllm==0.5.4 + - vllm-flash-attn==2.6.1 + - watchfiles==0.23.0 + - websockets==13.0 + - xformers==0.0.27.post2 + - xxhash==3.5.0 + - yarl==1.9.4 + version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ] +system: + dpkg_packages: + - adduser==3.118ubuntu5 + - apt==2.4.12 + - base-files==12ubuntu4.6 + - base-passwd==3.5.52build1 + - bash==5.1-6ubuntu1.1 + - binutils==2.38-4ubuntu2.6 + - binutils-common==2.38-4ubuntu2.6 + - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6 + - bsdutils==1:2.37.2-4ubuntu3.4 + - build-essential==12.9ubuntu3 + - bzip2==1.0.8-5build1 + - ca-certificates==20230311ubuntu0.22.04.1 + - coreutils==8.32-4.1ubuntu1.2 + - cpp==4:11.2.0-1ubuntu1 + - cpp-11==11.4.0-1ubuntu1~22.04 + - cuda-cccl-12-5==12.5.39-1 + - cuda-command-line-tools-12-5==12.5.1-1 + - cuda-compat-12-5==555.42.06-1 + - cuda-compiler-12-5==12.5.1-1 + - cuda-crt-12-5==12.5.82-1 + - cuda-cudart-12-5==12.5.82-1 + - cuda-cudart-dev-12-5==12.5.82-1 + - cuda-cuobjdump-12-5==12.5.39-1 + - cuda-cupti-12-5==12.5.82-1 + - cuda-cupti-dev-12-5==12.5.82-1 + - cuda-cuxxfilt-12-5==12.5.82-1 + - cuda-driver-dev-12-5==12.5.82-1 + - cuda-gdb-12-5==12.5.82-1 + - cuda-keyring==1.1-1 + - cuda-libraries-12-5==12.5.1-1 + - cuda-libraries-dev-12-5==12.5.1-1 + - cuda-minimal-build-12-5==12.5.1-1 + - cuda-nsight-compute-12-5==12.5.1-1 + - cuda-nvcc-12-5==12.5.82-1 + - cuda-nvdisasm-12-5==12.5.39-1 + - cuda-nvml-dev-12-5==12.5.82-1 + - cuda-nvprof-12-5==12.5.82-1 + - cuda-nvprune-12-5==12.5.82-1 + - cuda-nvrtc-12-5==12.5.82-1 + - cuda-nvrtc-dev-12-5==12.5.82-1 + - cuda-nvtx-12-5==12.5.82-1 + - cuda-nvvm-12-5==12.5.82-1 + - cuda-opencl-12-5==12.5.39-1 + - cuda-opencl-dev-12-5==12.5.39-1 + - cuda-profiler-api-12-5==12.5.39-1 + - cuda-sanitizer-12-5==12.5.81-1 + - cuda-toolkit-12-5-config-common==12.5.82-1 + - cuda-toolkit-12-config-common==12.5.82-1 + - cuda-toolkit-config-common==12.5.82-1 + - curl==7.81.0-1ubuntu1.17 + - dash==0.5.11+git20210903+057cd650a4ed-3build1 + - dbus==1.12.20-2ubuntu4.1 + - debconf==1.5.79ubuntu1 + - debianutils==5.5-1ubuntu2 + - diffutils==1:3.8-0ubuntu2 + - dirmngr==2.2.27-3ubuntu2.1 + - distro-info-data==0.52ubuntu0.7 + - dpkg==1.21.1ubuntu2.3 + - dpkg-dev==1.21.1ubuntu2.3 + - e2fsprogs==1.46.5-2ubuntu1.1 + - findutils==4.8.0-1ubuntu3 + - g++==4:11.2.0-1ubuntu1 + - g++-11==11.4.0-1ubuntu1~22.04 + - gcc==4:11.2.0-1ubuntu1 + - gcc-11==11.4.0-1ubuntu1~22.04 + - gcc-11-base==11.4.0-1ubuntu1~22.04 + - gcc-12-base==12.3.0-1ubuntu1~22.04 + - gir1.2-glib-2.0==1.72.0-1 + - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2 + - git==1:2.34.1-1ubuntu1.11 + - git-man==1:2.34.1-1ubuntu1.11 + - gnupg==2.2.27-3ubuntu2.1 + - gnupg-l10n==2.2.27-3ubuntu2.1 + - gnupg-utils==2.2.27-3ubuntu2.1 + - gnupg2==2.2.27-3ubuntu2.1 + - gpg==2.2.27-3ubuntu2.1 + - gpg-agent==2.2.27-3ubuntu2.1 + - gpg-wks-client==2.2.27-3ubuntu2.1 + - gpg-wks-server==2.2.27-3ubuntu2.1 + - gpgconf==2.2.27-3ubuntu2.1 + - gpgsm==2.2.27-3ubuntu2.1 + - gpgv==2.2.27-3ubuntu2.1 + - grep==3.7-1build1 + - gzip==1.10-4ubuntu4.1 + - hostname==3.23ubuntu2 + - init-system-helpers==1.62 + - iso-codes==4.9.0-1 + - less==590-1ubuntu0.22.04.3 + - libacl1==2.3.1-1 + - libapparmor1==3.0.4-2ubuntu2.3 + - libappstream4==0.15.2-2 + - libapt-pkg6.0==2.4.12 + - libargon2-1==0~20171227-0.3 + - libasan6==11.4.0-1ubuntu1~22.04 + - libassuan0==2.5.5-1build1 + - libatomic1==12.3.0-1ubuntu1~22.04 + - libattr1==1:2.5.1-1build1 + - libaudit-common==1:3.0.7-1build1 + - libaudit1==1:3.0.7-1build1 + - libbinutils==2.38-4ubuntu2.6 + - libblkid1==2.37.2-4ubuntu3.4 + - libbrotli1==1.0.9-2build6 + - libbsd0==0.11.5-1 + - libbz2-1.0==1.0.8-5build1 + - libc-bin==2.35-0ubuntu3.8 + - libc-dev-bin==2.35-0ubuntu3.8 + - libc6==2.35-0ubuntu3.8 + - libc6-dev==2.35-0ubuntu3.8 + - libcap-ng0==0.7.9-2.2build3 + - libcap2==1:2.44-1ubuntu0.22.04.1 + - libcap2-bin==1:2.44-1ubuntu0.22.04.1 + - libcbor0.8==0.8.0-2ubuntu1 + - libcc1-0==12.3.0-1ubuntu1~22.04 + - libcom-err2==1.46.5-2ubuntu1.1 + - libcrypt-dev==1:4.4.27-1 + - libcrypt1==1:4.4.27-1 + - libcryptsetup12==2:2.4.3-1ubuntu1.2 + - libctf-nobfd0==2.38-4ubuntu2.6 + - libctf0==2.38-4ubuntu2.6 + - libcublas-12-5==12.5.3.2-1 + - libcublas-dev-12-5==12.5.3.2-1 + - libcudnn9-cuda-12==9.2.1.18-1 + - libcudnn9-dev-cuda-12==9.2.1.18-1 + - libcufft-12-5==11.2.3.61-1 + - libcufft-dev-12-5==11.2.3.61-1 + - libcufile-12-5==1.10.1.7-1 + - libcufile-dev-12-5==1.10.1.7-1 + - libcurand-12-5==10.3.6.82-1 + - libcurand-dev-12-5==10.3.6.82-1 + - libcurl3-gnutls==7.81.0-1ubuntu1.17 + - libcurl4==7.81.0-1ubuntu1.17 + - libcusolver-12-5==11.6.3.83-1 + - libcusolver-dev-12-5==11.6.3.83-1 + - libcusparse-12-5==12.5.1.3-1 + - libcusparse-dev-12-5==12.5.1.3-1 + - libdb5.3==5.3.28+dfsg1-0.8ubuntu3 + - libdbus-1-3==1.12.20-2ubuntu4.1 + - libdebconfclient0==0.261ubuntu1 + - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4 + - libdpkg-perl==1.21.1ubuntu2.3 + - libdw1==0.186-1build1 + - libedit2==3.1-20210910-1build1 + - libelf1==0.186-1build1 + - liberror-perl==0.17029-1 + - libevent-core-2.1-7==2.1.12-stable-1build3 + - libexpat1==2.4.7-1ubuntu0.3 + - libext2fs2==1.46.5-2ubuntu1.1 + - libffi8==3.4.2-4 + - libfido2-1==1.10.0-1 + - libgcc-11-dev==11.4.0-1ubuntu1~22.04 + - libgcc-s1==12.3.0-1ubuntu1~22.04 + - libgcrypt20==1.9.4-3ubuntu3 + - libgdbm-compat4==1.23-1 + - libgdbm6==1.23-1 + - libgirepository-1.0-1==1.72.0-1 + - libglib2.0-0==2.72.4-0ubuntu2.3 + - libglib2.0-bin==2.72.4-0ubuntu2.3 + - libglib2.0-data==2.72.4-0ubuntu2.3 + - libgmp10==2:6.2.1+dfsg-3ubuntu1 + - libgnutls30==3.7.3-4ubuntu1.5 + - libgomp1==12.3.0-1ubuntu1~22.04 + - libgpg-error0==1.43-3 + - libgssapi-krb5-2==1.19.2-2ubuntu0.3 + - libgstreamer1.0-0==1.20.3-0ubuntu1 + - libhogweed6==3.7.3-1build2 + - libicu70==70.1-2 + - libidn2-0==2.3.2-2build1 + - libip4tc2==1.8.7-1ubuntu5.2 + - libisl23==0.24-2build1 + - libitm1==12.3.0-1ubuntu1~22.04 + - libjson-c5==0.15-3~ubuntu1.22.04.2 + - libk5crypto3==1.19.2-2ubuntu0.3 + - libkeyutils1==1.6.1-2ubuntu3 + - libkmod2==29-1ubuntu1 + - libkrb5-3==1.19.2-2ubuntu0.3 + - libkrb5support0==1.19.2-2ubuntu0.3 + - libksba8==1.6.0-2ubuntu0.2 + - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1 + - liblsan0==12.3.0-1ubuntu1~22.04 + - liblz4-1==1.9.3-2build2 + - liblzma5==5.2.5-2ubuntu1 + - libmd0==1.0.4-1build1 + - libmount1==2.37.2-4ubuntu3.4 + - libmpc3==1.2.1-2build1 + - libmpdec3==2.5.1-2build2 + - libmpfr6==4.1.0-3build3 + - libnccl-dev==2.22.3-1+cuda12.5 + - libnccl2==2.22.3-1+cuda12.5 + - libncurses6==6.3-2ubuntu0.1 + - libncursesw6==6.3-2ubuntu0.1 + - libnettle8==3.7.3-1build2 + - libnghttp2-14==1.43.0-1ubuntu0.2 + - libnpp-12-5==12.3.0.159-1 + - libnpp-dev-12-5==12.3.0.159-1 + - libnpth0==1.6-3build2 + - libnsl-dev==1.3.0-2build2 + - libnsl2==1.3.0-2build2 + - libnvfatbin-12-5==12.5.82-1 + - libnvfatbin-dev-12-5==12.5.82-1 + - libnvjitlink-12-5==12.5.82-1 + - libnvjitlink-dev-12-5==12.5.82-1 + - libnvjpeg-12-5==12.3.2.81-1 + - libnvjpeg-dev-12-5==12.3.2.81-1 + - libp11-kit0==0.24.0-6build1 + - libpackagekit-glib2-18==1.2.5-2ubuntu2 + - libpam-modules==1.4.0-11ubuntu2.4 + - libpam-modules-bin==1.4.0-11ubuntu2.4 + - libpam-runtime==1.4.0-11ubuntu2.4 + - libpam-systemd==249.11-0ubuntu3.12 + - libpam0g==1.4.0-11ubuntu2.4 + - libpcre2-8-0==10.39-3ubuntu0.1 + - libpcre3==2:8.39-13ubuntu0.22.04.1 + - libperl5.34==5.34.0-3ubuntu1.3 + - libpolkit-agent-1-0==0.105-33 + - libpolkit-gobject-1-0==0.105-33 + - libpopt0==1.18-3build1 + - libprocps8==2:3.3.17-6ubuntu2.1 + - libpsl5==0.21.0-1.2build2 + - libpython3-stdlib==3.10.6-1~22.04 + - libpython3.10-minimal==3.10.12-1~22.04.5 + - libpython3.10-stdlib==3.10.12-1~22.04.5 + - libquadmath0==12.3.0-1ubuntu1~22.04 + - libreadline8==8.1.2-1 + - librtmp1==2.4+20151223.gitfa8646d.1-2build4 + - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2 + - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2 + - libseccomp2==2.5.3-2ubuntu2 + - libselinux1==3.3-1build2 + - libsemanage-common==3.3-1build2 + - libsemanage2==3.3-1build2 + - libsepol2==3.3-1build1 + - libsmartcols1==2.37.2-4ubuntu3.4 + - libsqlite3-0==3.37.2-2ubuntu0.3 + - libss2==1.46.5-2ubuntu1.1 + - libssh-4==0.9.6-2ubuntu0.22.04.3 + - libssl3==3.0.2-0ubuntu1.16 + - libstdc++-11-dev==11.4.0-1ubuntu1~22.04 + - libstdc++6==12.3.0-1ubuntu1~22.04 + - libstemmer0d==2.2.0-1build1 + - libsystemd0==249.11-0ubuntu3.12 + - libtasn1-6==4.18.0-4build1 + - libtinfo6==6.3-2ubuntu0.1 + - libtirpc-common==1.3.2-2ubuntu0.1 + - libtirpc-dev==1.3.2-2ubuntu0.1 + - libtirpc3==1.3.2-2ubuntu0.1 + - libtsan0==11.4.0-1ubuntu1~22.04 + - libubsan1==12.3.0-1ubuntu1~22.04 + - libudev1==249.11-0ubuntu3.12 + - libunistring2==1.0-1 + - libunwind8==1.3.2-2build2.1 + - libutempter0==1.2.1-2build2 + - libuuid1==2.37.2-4ubuntu3.4 + - libwrap0==7.6.q-31build2 + - libxml2==2.9.13+dfsg-1ubuntu0.4 + - libxmlb2==0.3.6-2build1 + - libxxhash0==0.8.1-1 + - libyaml-0-2==0.2.2-1build2 + - libzstd1==1.4.8+dfsg-3build1 + - linux-libc-dev==5.15.0-113.123 + - locales==2.35-0ubuntu3.8 + - login==1:4.8.1-2ubuntu2.2 + - logsave==1.46.5-2ubuntu1.1 + - lsb-base==11.1.0ubuntu4 + - lsb-release==11.1.0ubuntu4 + - lto-disabled-list==24 + - make==4.3-4.1build1 + - mawk==1.3.4.20200120-3 + - media-types==7.0.0 + - mount==2.37.2-4ubuntu3.4 + - ncurses-base==6.3-2ubuntu0.1 + - ncurses-bin==6.3-2ubuntu0.1 + - nsight-compute-2024.2.1==2024.2.1.2-1 + - openssh-client==1:8.9p1-3ubuntu0.10 + - openssh-server==1:8.9p1-3ubuntu0.10 + - openssh-sftp-server==1:8.9p1-3ubuntu0.10 + - openssl==3.0.2-0ubuntu1.16 + - packagekit==1.2.5-2ubuntu2 + - passwd==1:4.8.1-2ubuntu2.2 + - patch==2.7.6-7build2 + - perl==5.34.0-3ubuntu1.3 + - perl-base==5.34.0-3ubuntu1.3 + - perl-modules-5.34==5.34.0-3ubuntu1.3 + - pinentry-curses==1.1.1-1build2 + - pkexec==0.105-33 + - policykit-1==0.105-33 + - polkitd==0.105-33 + - procps==2:3.3.17-6ubuntu2.1 + - python-apt-common==2.4.0ubuntu3 + - python3==3.10.6-1~22.04 + - python3-apt==2.4.0ubuntu3 + - python3-blinker==1.4+dfsg1-0.4 + - python3-cffi-backend==1.15.0-1build2 + - python3-cryptography==3.4.8-1ubuntu2.2 + - python3-dbus==1.2.18-3build1 + - python3-distro==1.7.0-1 + - python3-distutils==3.10.8-1~22.04 + - python3-gi==3.42.1-0ubuntu1 + - python3-httplib2==0.20.2-2 + - python3-importlib-metadata==4.6.4-1 + - python3-jeepney==0.7.1-3 + - python3-jwt==2.3.0-1ubuntu0.2 + - python3-keyring==23.5.0-1 + - python3-launchpadlib==1.10.16-1 + - python3-lazr.restfulclient==0.14.4-1 + - python3-lazr.uri==1.0.6-2 + - python3-lib2to3==3.10.8-1~22.04 + - python3-minimal==3.10.6-1~22.04 + - python3-more-itertools==8.10.0-2 + - python3-oauthlib==3.2.0-1ubuntu0.1 + - python3-pip==22.0.2+dfsg-1ubuntu0.4 + - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1 + - python3-pyparsing==2.4.7-1 + - python3-secretstorage==3.3.1-1 + - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1 + - python3-six==1.16.0-3ubuntu1 + - python3-software-properties==0.99.22.9 + - python3-wadllib==1.3.6-1 + - python3-wheel==0.37.1-2ubuntu0.22.04.1 + - python3-zipp==1.0.0-3ubuntu0.1 + - python3.10==3.10.12-1~22.04.5 + - python3.10-minimal==3.10.12-1~22.04.5 + - readline-common==8.1.2-1 + - rpcsvc-proto==1.4.2-0ubuntu6 + - rsync==3.2.7-0ubuntu0.22.04.2 + - sed==4.8-1ubuntu2 + - sensible-utils==0.0.17 + - software-properties-common==0.99.22.9 + - sudo==1.9.9-1ubuntu2.4 + - systemd==249.11-0ubuntu3.12 + - systemd-sysv==249.11-0ubuntu3.12 + - sysvinit-utils==3.01-1ubuntu1 + - tar==1.34+dfsg-1ubuntu0.1.22.04.2 + - tmux==3.2a-4ubuntu0.2 + - ubuntu-keyring==2021.03.26 + - ucf==3.0043 + - usrmerge==25ubuntu2 + - util-linux==2.37.2-4ubuntu3.4 + - wget==1.21.2-2ubuntu1.1 + - xz-utils==5.2.5-2ubuntu1 + - zlib1g==1:1.2.11.dfsg-2ubuntu9.2 + machine: x86_64 + os: Linux + os_version: '#40~22.04.3-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 30 17:30:19 UTC 2' + processor: x86_64 + release: 6.8.0-40-generic diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml new file mode 100644 index 0000000..78aa6aa --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml @@ -0,0 +1,6 @@ +comment: 4x RTX 4000Ada +experiment: vllm_llama_3_70b_instruct_awq +experiment_hash: exp_hash_v1:7aa490 +run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada +slug: 4x_rtx_4000ada +timestamp: 2024-08-22_12-26-03 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml new file mode 100644 index 0000000..443d83a --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9 +Describe justice system in UK vs USA in 2000-5000 words: f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8 +Describe schooling system in UK vs USA in 2000-5000 words: 291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2 +Explain me some random problem for me in 2000-5000 words: 6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833 +Tell me entire history of USA: 1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687 +Write a ballad. Pick a random theme.: 53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f +Write an epic story about a dragon and a knight: e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6 +Write an essay about being a Senior developer.: 33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log new file mode 100644 index 0000000..1b3a57a --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log @@ -0,0 +1,15 @@ +2024-08-22 12:26:03,711 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 4x RTX 4000Ada +2024-08-22 12:26:03,714 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log +2024-08-22 12:26:03,882 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-22 12:26:04,053 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-22 12:26:04,057 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-22 12:26:04,062 - __main__ - INFO - Syncing files to remote +2024-08-22 12:26:04,290 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output' stdout: '' stderr: '' status_code: 0 +2024-08-22 12:26:06,997 - __main__ - INFO - Setting up remote environment +2024-08-22 12:26:45,244 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n source $HOME/.cargo/env (sh, bash, zsh)\n source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 885ms\nPrepared 108 packages in 31.88s\nInstalled 108 packages in 489ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0 +2024-08-22 12:26:45,264 - __main__ - INFO - Gathering system info +2024-08-22 12:26:49,096 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:26:49,110 - __main__ - INFO - Running experiment code on remote +2024-08-22 12:35:57,340 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/stdout.txt' stdout: "gpu_count=4\nStarting model loading\nINFO 08-22 10:26:56 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:26:56 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:26:56 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:26:58 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:26:58 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nWARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nINFO 08-22 10:27:00 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1, 2, 3], buffer=, local_subscribe_port=40373, remote_subscribe_port=None)\nINFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:27:01 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\nINFO 08-22 10:30:36 model_runner.py:732] Loading model weights took 9.2867 GB\nINFO 08-22 10:30:44 distributed_gpu_executor.py:56] # GPU blocks: 3207, # CPU blocks: 3276\nmodel loading took 233.71 seconds\nStarting 8 responses generation\n8 responses generation took 298.52 seconds\n{'Count to 1000, skip unpopular numbers': 'e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2',\n 'Explain me some random problem for me in 2000-5000 words': '6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833',\n 'Tell me entire history of USA': '1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687',\n 'Write a ballad. Pick a random theme.': '53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f',\n 'Write an epic story about a dragon and a knight': 'e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6',\n 'Write an essay about being a Senior developer.': '33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0'}\nERROR 08-22 10:35:51 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 788 died, exit code: -15\nINFO 08-22 10:35:51 multiproc_worker_utils.py:123] Killing local vLLM worker processes\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/stdout.txt\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/5d/51/5d5111f0b358d39407f5182b8ea3ee71a6b1ed7942bd42d40a40c060adb2c2fb/dd7daa0a6f7e9a11ac7c28bd6dbbd6974b99efbe329afb15cc506fb705e6e407?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00002-of-00009.safetensors%3B+filename%3D%22model-00002-of-00009.safetensors%22%3B&Expires=1724581621&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDU4MTYyMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzVkLzUxLzVkNTExMWYwYjM1OGQzOTQwN2Y1MTgyYjhlYTNlZTcxYTZiMWVkNzk0MmJkNDJkNDBhNDBjMDYwYWRiMmMyZmIvZGQ3ZGFhMGE2ZjdlOWExMWFjN2MyOGJkNmRiYmQ2OTc0Yjk5ZWZiZTMyOWFmYjE1Y2M1MDZmYjcwNWU2ZTQwNz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=nlfFzTyogK4EsH6wbheWxniQwOIRKC6WNSOaFQQSQyXV0lvX2CpbwIv6JZwmqC8grxmPJHeKpXFUxg-5nW3sghqJGu756q2niYpV%7EC8HgwkUWFhI0uSV%7EdAYY4kQ%7E7c4b7dmY8hxqx3RsWZqyXNfr1R0l39Q39G0sTn6yiqLDq7f%7Ezpexl2xH1pzJ3EJFDRd1QBiLJVMd7Rh4yzHiBtmsNYuBgV5VCX%7Ezl-bc33zNonQfV%7E7k1VhTJ2sTsLGVIbW0CaboZZyqo8opAL1MMfYvI9hTMM10FQI-gMWRN9xcmWe7EWK%7EB2M-x6KkKK-5IimWoKd5fsYvF8jZmie6epALA__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Trying to resume download...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/5d/51/5d5111f0b358d39407f5182b8ea3ee71a6b1ed7942bd42d40a40c060adb2c2fb/bf8de6ef3f4e527721c9f03c5bec9dd6219b58623d11feef6c213a6fee79d759?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00003-of-00009.safetensors%3B+filename%3D%22model-00003-of-00009.safetensors%22%3B&Expires=1724581621&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDU4MTYyMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzVkLzUxLzVkNTExMWYwYjM1OGQzOTQwN2Y1MTgyYjhlYTNlZTcxYTZiMWVkNzk0MmJkNDJkNDBhNDBjMDYwYWRiMmMyZmIvYmY4ZGU2ZWYzZjRlNTI3NzIxYzlmMDNjNWJlYzlkZDYyMTliNTg2MjNkMTFmZWVmNmMyMTNhNmZlZTc5ZDc1OT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=McjODK0UDiZHwiqwC4q6oncp3zJEfm4K9rLznT7rZW2IYk2Y1Rv7vACZbm9xpH5kOK%7Er1Qh3bTVmMDB29rKPSmswjBNQYVW-IDpCSS3hWNvzHBE7HvdQVB9%7Ej5vm%7EkWKgYJUUDbeSePmJ-vt%7EfHxBmHpm5UfsfZJZtNNl62s1ss1XL5kNPmwIaeZHWpmuVK7rXaeJQZMlGYSvnnFMs0eDviVbr0-6pGjHZgiC4HoiF7290GO-TgPISRUsVPK-iVhjOGFafUDzZAeJAqdS%7ENJiX47VR5012YvTGdTX0mWcZUSMEt-vaNsImbSro642d%7EWVxTCsyX%7E2wRLWlUtPOOkQA__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Trying to resume download...\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00, local_subscribe_port=40373, remote_subscribe_port=None) +INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=788) INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=787) INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=789) INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=788) INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=789) INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=787) INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors'] +INFO 08-22 10:27:01 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=788) INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB +(VllmWorkerProcess pid=789) INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB +(VllmWorkerProcess pid=787) INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB +INFO 08-22 10:30:36 model_runner.py:732] Loading model weights took 9.2867 GB +INFO 08-22 10:30:44 distributed_gpu_executor.py:56] # GPU blocks: 3207, # CPU blocks: 3276 +model loading took 233.71 seconds +Starting 8 responses generation +8 responses generation took 298.52 seconds +{'Count to 1000, skip unpopular numbers': 'e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9', + 'Describe justice system in UK vs USA in 2000-5000 words': 'f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8', + 'Describe schooling system in UK vs USA in 2000-5000 words': '291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2', + 'Explain me some random problem for me in 2000-5000 words': '6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833', + 'Tell me entire history of USA': '1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687', + 'Write a ballad. Pick a random theme.': '53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f', + 'Write an epic story about a dragon and a knight': 'e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6', + 'Write an essay about being a Senior developer.': '33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0'} +ERROR 08-22 10:35:51 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 788 died, exit code: -15 +INFO 08-22 10:35:51 multiproc_worker_utils.py:123] Killing local vLLM worker processes diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml new file mode 100644 index 0000000..d6af4cb --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml @@ -0,0 +1,558 @@ +cuda: + cuda: '12.1' + cudnn: 90100 +machine: + cpu: + clocks: + - 1500.0 + - 1500.0 + - 2800.0 + - 2800.0 + - 2400.0 + - 2800.0 + - 2800.0 + - 1500.0 + - 3349.951 + - 1500.0 + - 1799.199 + - 1500.0 + - 2800.0 + - 2800.0 + - 1500.0 + - 2193.01 + - 2589.516 + - 1500.0 + - 1500.0 + - 1500.0 + - 2800.0 + - 2800.0 + - 1500.0 + - 3333.545 + - 2311.349 + - 1496.058 + - 2400.0 + - 2800.0 + - 1500.0 + - 2784.201 + - 1500.0 + - 2800.0 + - 1500.0 + - 2800.0 + - 2400.0 + - 2800.0 + - 1500.0 + - 1788.284 + - 1799.926 + - 1500.0 + - 1500.0 + - 2800.0 + - 1500.0 + - 2800.0 + - 2028.381 + - 3165.351 + - 2800.0 + - 2800.0 + count: 48 + model: AMD EPYC 7402P 24-Core Processor + docker_support: + nvidia: false + runc: false + gpu: + count: 4 + details: + - capacity: '20475' + cuda: '8.9' + driver: 555.58.02 + graphics_speed: '210' + memory_speed: '405' + name: NVIDIA RTX 4000 Ada Generation + power_limit: '130.00' + - capacity: '20475' + cuda: '8.9' + driver: 555.58.02 + graphics_speed: '210' + memory_speed: '405' + name: NVIDIA RTX 4000 Ada Generation + power_limit: '130.00' + - capacity: '20475' + cuda: '8.9' + driver: 555.58.02 + graphics_speed: '210' + memory_speed: '405' + name: NVIDIA RTX 4000 Ada Generation + power_limit: '130.00' + - capacity: '20475' + cuda: '8.9' + driver: 555.58.02 + graphics_speed: '210' + memory_speed: '405' + name: NVIDIA RTX 4000 Ada Generation + power_limit: '130.00' + hard_disk: + free: 77359212 + total: 83886080 + used: 6526868 + os: Ubuntu 22.04.4 LTS + ram: + available: 249409076 + free: 121386568 + total: 263771272 + used: 142384704 +python: + packages: + - aiohappyeyeballs==2.4.0 + - aiohttp==3.10.5 + - aiosignal==1.3.1 + - annotated-types==0.7.0 + - anyio==4.4.0 + - attrs==24.2.0 + - certifi==2024.7.4 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.30.2 + - datasets==2.21.0 + - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl + - dill==0.3.8 + - diskcache==5.6.3 + - distro==1.9.0 + - fastapi==0.112.1 + - filelock==3.15.4 + - frozenlist==1.4.1 + - fsspec==2024.6.1 + - h11==0.14.0 + - httpcore==1.0.5 + - httptools==0.6.1 + - httpx==0.27.0 + - huggingface-hub==0.24.6 + - idna==3.7 + - interegular==0.3.3 + - jinja2==3.1.4 + - jiter==0.5.0 + - jsonschema==4.23.0 + - jsonschema-specifications==2023.12.1 + - lark==1.2.2 + - llvmlite==0.43.0 + - lm-format-enforcer==0.10.3 + - markupsafe==2.1.5 + - mpmath==1.3.0 + - msgpack==1.0.8 + - multidict==6.0.5 + - multiprocess==0.70.16 + - nest-asyncio==1.6.0 + - networkx==3.3 + - ninja==1.11.1.1 + - numba==0.60.0 + - numpy==1.26.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==9.1.0.70 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-ml-py==12.560.30 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.20 + - nvidia-nvtx-cu12==12.1.105 + - openai==1.42.0 + - outlines==0.0.46 + - packaging==24.1 + - pandas==2.2.2 + - pillow==10.4.0 + - prometheus-client==0.20.0 + - prometheus-fastapi-instrumentator==7.0.0 + - protobuf==5.27.3 + - psutil==6.0.0 + - py-cpuinfo==9.0.0 + - pyairports==2.1.1 + - pyarrow==17.0.0 + - pycountry==24.6.1 + - pydantic==2.8.2 + - pydantic-core==2.20.1 + - python-dateutil==2.9.0.post0 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - ray==2.34.0 + - referencing==0.35.1 + - regex==2024.7.24 + - requests==2.32.3 + - rpds-py==0.20.0 + - safetensors==0.4.4 + - sentencepiece==0.2.0 + - setuptools==73.0.1 + - six==1.16.0 + - sniffio==1.3.1 + - starlette==0.38.2 + - sympy==1.13.2 + - tiktoken==0.7.0 + - tokenizers==0.19.1 + - torch==2.4.0 + - torchvision==0.19.0 + - tqdm==4.66.5 + - transformers==4.44.1 + - triton==3.0.0 + - typing-extensions==4.12.2 + - tzdata==2024.1 + - urllib3==2.2.2 + - uvicorn==0.30.6 + - uvloop==0.20.0 + - vllm==0.5.4 + - vllm-flash-attn==2.6.1 + - watchfiles==0.23.0 + - websockets==13.0 + - xformers==0.0.27.post2 + - xxhash==3.5.0 + - yarl==1.9.4 + version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ] +system: + dpkg_packages: + - adduser==3.118ubuntu5 + - apt==2.4.12 + - base-files==12ubuntu4.6 + - base-passwd==3.5.52build1 + - bash==5.1-6ubuntu1.1 + - binutils==2.38-4ubuntu2.6 + - binutils-common==2.38-4ubuntu2.6 + - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6 + - bsdutils==1:2.37.2-4ubuntu3.4 + - build-essential==12.9ubuntu3 + - bzip2==1.0.8-5build1 + - ca-certificates==20230311ubuntu0.22.04.1 + - coreutils==8.32-4.1ubuntu1.2 + - cpp==4:11.2.0-1ubuntu1 + - cpp-11==11.4.0-1ubuntu1~22.04 + - cuda-cccl-12-5==12.5.39-1 + - cuda-command-line-tools-12-5==12.5.1-1 + - cuda-compat-12-5==555.42.06-1 + - cuda-compiler-12-5==12.5.1-1 + - cuda-crt-12-5==12.5.82-1 + - cuda-cudart-12-5==12.5.82-1 + - cuda-cudart-dev-12-5==12.5.82-1 + - cuda-cuobjdump-12-5==12.5.39-1 + - cuda-cupti-12-5==12.5.82-1 + - cuda-cupti-dev-12-5==12.5.82-1 + - cuda-cuxxfilt-12-5==12.5.82-1 + - cuda-driver-dev-12-5==12.5.82-1 + - cuda-gdb-12-5==12.5.82-1 + - cuda-keyring==1.1-1 + - cuda-libraries-12-5==12.5.1-1 + - cuda-libraries-dev-12-5==12.5.1-1 + - cuda-minimal-build-12-5==12.5.1-1 + - cuda-nsight-compute-12-5==12.5.1-1 + - cuda-nvcc-12-5==12.5.82-1 + - cuda-nvdisasm-12-5==12.5.39-1 + - cuda-nvml-dev-12-5==12.5.82-1 + - cuda-nvprof-12-5==12.5.82-1 + - cuda-nvprune-12-5==12.5.82-1 + - cuda-nvrtc-12-5==12.5.82-1 + - cuda-nvrtc-dev-12-5==12.5.82-1 + - cuda-nvtx-12-5==12.5.82-1 + - cuda-nvvm-12-5==12.5.82-1 + - cuda-opencl-12-5==12.5.39-1 + - cuda-opencl-dev-12-5==12.5.39-1 + - cuda-profiler-api-12-5==12.5.39-1 + - cuda-sanitizer-12-5==12.5.81-1 + - cuda-toolkit-12-5-config-common==12.5.82-1 + - cuda-toolkit-12-config-common==12.5.82-1 + - cuda-toolkit-config-common==12.5.82-1 + - curl==7.81.0-1ubuntu1.17 + - dash==0.5.11+git20210903+057cd650a4ed-3build1 + - dbus==1.12.20-2ubuntu4.1 + - debconf==1.5.79ubuntu1 + - debianutils==5.5-1ubuntu2 + - diffutils==1:3.8-0ubuntu2 + - dirmngr==2.2.27-3ubuntu2.1 + - distro-info-data==0.52ubuntu0.7 + - dpkg==1.21.1ubuntu2.3 + - dpkg-dev==1.21.1ubuntu2.3 + - e2fsprogs==1.46.5-2ubuntu1.1 + - findutils==4.8.0-1ubuntu3 + - g++==4:11.2.0-1ubuntu1 + - g++-11==11.4.0-1ubuntu1~22.04 + - gcc==4:11.2.0-1ubuntu1 + - gcc-11==11.4.0-1ubuntu1~22.04 + - gcc-11-base==11.4.0-1ubuntu1~22.04 + - gcc-12-base==12.3.0-1ubuntu1~22.04 + - gir1.2-glib-2.0==1.72.0-1 + - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2 + - git==1:2.34.1-1ubuntu1.11 + - git-man==1:2.34.1-1ubuntu1.11 + - gnupg==2.2.27-3ubuntu2.1 + - gnupg-l10n==2.2.27-3ubuntu2.1 + - gnupg-utils==2.2.27-3ubuntu2.1 + - gnupg2==2.2.27-3ubuntu2.1 + - gpg==2.2.27-3ubuntu2.1 + - gpg-agent==2.2.27-3ubuntu2.1 + - gpg-wks-client==2.2.27-3ubuntu2.1 + - gpg-wks-server==2.2.27-3ubuntu2.1 + - gpgconf==2.2.27-3ubuntu2.1 + - gpgsm==2.2.27-3ubuntu2.1 + - gpgv==2.2.27-3ubuntu2.1 + - grep==3.7-1build1 + - gzip==1.10-4ubuntu4.1 + - hostname==3.23ubuntu2 + - init-system-helpers==1.62 + - iso-codes==4.9.0-1 + - less==590-1ubuntu0.22.04.3 + - libacl1==2.3.1-1 + - libapparmor1==3.0.4-2ubuntu2.3 + - libappstream4==0.15.2-2 + - libapt-pkg6.0==2.4.12 + - libargon2-1==0~20171227-0.3 + - libasan6==11.4.0-1ubuntu1~22.04 + - libassuan0==2.5.5-1build1 + - libatomic1==12.3.0-1ubuntu1~22.04 + - libattr1==1:2.5.1-1build1 + - libaudit-common==1:3.0.7-1build1 + - libaudit1==1:3.0.7-1build1 + - libbinutils==2.38-4ubuntu2.6 + - libblkid1==2.37.2-4ubuntu3.4 + - libbrotli1==1.0.9-2build6 + - libbsd0==0.11.5-1 + - libbz2-1.0==1.0.8-5build1 + - libc-bin==2.35-0ubuntu3.8 + - libc-dev-bin==2.35-0ubuntu3.8 + - libc6==2.35-0ubuntu3.8 + - libc6-dev==2.35-0ubuntu3.8 + - libcap-ng0==0.7.9-2.2build3 + - libcap2==1:2.44-1ubuntu0.22.04.1 + - libcap2-bin==1:2.44-1ubuntu0.22.04.1 + - libcbor0.8==0.8.0-2ubuntu1 + - libcc1-0==12.3.0-1ubuntu1~22.04 + - libcom-err2==1.46.5-2ubuntu1.1 + - libcrypt-dev==1:4.4.27-1 + - libcrypt1==1:4.4.27-1 + - libcryptsetup12==2:2.4.3-1ubuntu1.2 + - libctf-nobfd0==2.38-4ubuntu2.6 + - libctf0==2.38-4ubuntu2.6 + - libcublas-12-5==12.5.3.2-1 + - libcublas-dev-12-5==12.5.3.2-1 + - libcudnn9-cuda-12==9.2.1.18-1 + - libcudnn9-dev-cuda-12==9.2.1.18-1 + - libcufft-12-5==11.2.3.61-1 + - libcufft-dev-12-5==11.2.3.61-1 + - libcufile-12-5==1.10.1.7-1 + - libcufile-dev-12-5==1.10.1.7-1 + - libcurand-12-5==10.3.6.82-1 + - libcurand-dev-12-5==10.3.6.82-1 + - libcurl3-gnutls==7.81.0-1ubuntu1.17 + - libcurl4==7.81.0-1ubuntu1.17 + - libcusolver-12-5==11.6.3.83-1 + - libcusolver-dev-12-5==11.6.3.83-1 + - libcusparse-12-5==12.5.1.3-1 + - libcusparse-dev-12-5==12.5.1.3-1 + - libdb5.3==5.3.28+dfsg1-0.8ubuntu3 + - libdbus-1-3==1.12.20-2ubuntu4.1 + - libdebconfclient0==0.261ubuntu1 + - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4 + - libdpkg-perl==1.21.1ubuntu2.3 + - libdw1==0.186-1build1 + - libedit2==3.1-20210910-1build1 + - libelf1==0.186-1build1 + - liberror-perl==0.17029-1 + - libevent-core-2.1-7==2.1.12-stable-1build3 + - libexpat1==2.4.7-1ubuntu0.3 + - libext2fs2==1.46.5-2ubuntu1.1 + - libffi8==3.4.2-4 + - libfido2-1==1.10.0-1 + - libgcc-11-dev==11.4.0-1ubuntu1~22.04 + - libgcc-s1==12.3.0-1ubuntu1~22.04 + - libgcrypt20==1.9.4-3ubuntu3 + - libgdbm-compat4==1.23-1 + - libgdbm6==1.23-1 + - libgirepository-1.0-1==1.72.0-1 + - libglib2.0-0==2.72.4-0ubuntu2.3 + - libglib2.0-bin==2.72.4-0ubuntu2.3 + - libglib2.0-data==2.72.4-0ubuntu2.3 + - libgmp10==2:6.2.1+dfsg-3ubuntu1 + - libgnutls30==3.7.3-4ubuntu1.5 + - libgomp1==12.3.0-1ubuntu1~22.04 + - libgpg-error0==1.43-3 + - libgssapi-krb5-2==1.19.2-2ubuntu0.3 + - libgstreamer1.0-0==1.20.3-0ubuntu1 + - libhogweed6==3.7.3-1build2 + - libicu70==70.1-2 + - libidn2-0==2.3.2-2build1 + - libip4tc2==1.8.7-1ubuntu5.2 + - libisl23==0.24-2build1 + - libitm1==12.3.0-1ubuntu1~22.04 + - libjson-c5==0.15-3~ubuntu1.22.04.2 + - libk5crypto3==1.19.2-2ubuntu0.3 + - libkeyutils1==1.6.1-2ubuntu3 + - libkmod2==29-1ubuntu1 + - libkrb5-3==1.19.2-2ubuntu0.3 + - libkrb5support0==1.19.2-2ubuntu0.3 + - libksba8==1.6.0-2ubuntu0.2 + - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1 + - liblsan0==12.3.0-1ubuntu1~22.04 + - liblz4-1==1.9.3-2build2 + - liblzma5==5.2.5-2ubuntu1 + - libmd0==1.0.4-1build1 + - libmount1==2.37.2-4ubuntu3.4 + - libmpc3==1.2.1-2build1 + - libmpdec3==2.5.1-2build2 + - libmpfr6==4.1.0-3build3 + - libnccl-dev==2.22.3-1+cuda12.5 + - libnccl2==2.22.3-1+cuda12.5 + - libncurses6==6.3-2ubuntu0.1 + - libncursesw6==6.3-2ubuntu0.1 + - libnettle8==3.7.3-1build2 + - libnghttp2-14==1.43.0-1ubuntu0.2 + - libnpp-12-5==12.3.0.159-1 + - libnpp-dev-12-5==12.3.0.159-1 + - libnpth0==1.6-3build2 + - libnsl-dev==1.3.0-2build2 + - libnsl2==1.3.0-2build2 + - libnvfatbin-12-5==12.5.82-1 + - libnvfatbin-dev-12-5==12.5.82-1 + - libnvjitlink-12-5==12.5.82-1 + - libnvjitlink-dev-12-5==12.5.82-1 + - libnvjpeg-12-5==12.3.2.81-1 + - libnvjpeg-dev-12-5==12.3.2.81-1 + - libp11-kit0==0.24.0-6build1 + - libpackagekit-glib2-18==1.2.5-2ubuntu2 + - libpam-modules==1.4.0-11ubuntu2.4 + - libpam-modules-bin==1.4.0-11ubuntu2.4 + - libpam-runtime==1.4.0-11ubuntu2.4 + - libpam-systemd==249.11-0ubuntu3.12 + - libpam0g==1.4.0-11ubuntu2.4 + - libpcre2-8-0==10.39-3ubuntu0.1 + - libpcre3==2:8.39-13ubuntu0.22.04.1 + - libperl5.34==5.34.0-3ubuntu1.3 + - libpolkit-agent-1-0==0.105-33 + - libpolkit-gobject-1-0==0.105-33 + - libpopt0==1.18-3build1 + - libprocps8==2:3.3.17-6ubuntu2.1 + - libpsl5==0.21.0-1.2build2 + - libpython3-stdlib==3.10.6-1~22.04 + - libpython3.10-minimal==3.10.12-1~22.04.5 + - libpython3.10-stdlib==3.10.12-1~22.04.5 + - libquadmath0==12.3.0-1ubuntu1~22.04 + - libreadline8==8.1.2-1 + - librtmp1==2.4+20151223.gitfa8646d.1-2build4 + - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2 + - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2 + - libseccomp2==2.5.3-2ubuntu2 + - libselinux1==3.3-1build2 + - libsemanage-common==3.3-1build2 + - libsemanage2==3.3-1build2 + - libsepol2==3.3-1build1 + - libsmartcols1==2.37.2-4ubuntu3.4 + - libsqlite3-0==3.37.2-2ubuntu0.3 + - libss2==1.46.5-2ubuntu1.1 + - libssh-4==0.9.6-2ubuntu0.22.04.3 + - libssl3==3.0.2-0ubuntu1.16 + - libstdc++-11-dev==11.4.0-1ubuntu1~22.04 + - libstdc++6==12.3.0-1ubuntu1~22.04 + - libstemmer0d==2.2.0-1build1 + - libsystemd0==249.11-0ubuntu3.12 + - libtasn1-6==4.18.0-4build1 + - libtinfo6==6.3-2ubuntu0.1 + - libtirpc-common==1.3.2-2ubuntu0.1 + - libtirpc-dev==1.3.2-2ubuntu0.1 + - libtirpc3==1.3.2-2ubuntu0.1 + - libtsan0==11.4.0-1ubuntu1~22.04 + - libubsan1==12.3.0-1ubuntu1~22.04 + - libudev1==249.11-0ubuntu3.12 + - libunistring2==1.0-1 + - libunwind8==1.3.2-2build2.1 + - libutempter0==1.2.1-2build2 + - libuuid1==2.37.2-4ubuntu3.4 + - libwrap0==7.6.q-31build2 + - libxml2==2.9.13+dfsg-1ubuntu0.4 + - libxmlb2==0.3.6-2build1 + - libxxhash0==0.8.1-1 + - libyaml-0-2==0.2.2-1build2 + - libzstd1==1.4.8+dfsg-3build1 + - linux-libc-dev==5.15.0-113.123 + - locales==2.35-0ubuntu3.8 + - login==1:4.8.1-2ubuntu2.2 + - logsave==1.46.5-2ubuntu1.1 + - lsb-base==11.1.0ubuntu4 + - lsb-release==11.1.0ubuntu4 + - lto-disabled-list==24 + - make==4.3-4.1build1 + - mawk==1.3.4.20200120-3 + - media-types==7.0.0 + - mount==2.37.2-4ubuntu3.4 + - ncurses-base==6.3-2ubuntu0.1 + - ncurses-bin==6.3-2ubuntu0.1 + - nsight-compute-2024.2.1==2024.2.1.2-1 + - openssh-client==1:8.9p1-3ubuntu0.10 + - openssh-server==1:8.9p1-3ubuntu0.10 + - openssh-sftp-server==1:8.9p1-3ubuntu0.10 + - openssl==3.0.2-0ubuntu1.16 + - packagekit==1.2.5-2ubuntu2 + - passwd==1:4.8.1-2ubuntu2.2 + - patch==2.7.6-7build2 + - perl==5.34.0-3ubuntu1.3 + - perl-base==5.34.0-3ubuntu1.3 + - perl-modules-5.34==5.34.0-3ubuntu1.3 + - pinentry-curses==1.1.1-1build2 + - pkexec==0.105-33 + - policykit-1==0.105-33 + - polkitd==0.105-33 + - procps==2:3.3.17-6ubuntu2.1 + - python-apt-common==2.4.0ubuntu3 + - python3==3.10.6-1~22.04 + - python3-apt==2.4.0ubuntu3 + - python3-blinker==1.4+dfsg1-0.4 + - python3-cffi-backend==1.15.0-1build2 + - python3-cryptography==3.4.8-1ubuntu2.2 + - python3-dbus==1.2.18-3build1 + - python3-distro==1.7.0-1 + - python3-distutils==3.10.8-1~22.04 + - python3-gi==3.42.1-0ubuntu1 + - python3-httplib2==0.20.2-2 + - python3-importlib-metadata==4.6.4-1 + - python3-jeepney==0.7.1-3 + - python3-jwt==2.3.0-1ubuntu0.2 + - python3-keyring==23.5.0-1 + - python3-launchpadlib==1.10.16-1 + - python3-lazr.restfulclient==0.14.4-1 + - python3-lazr.uri==1.0.6-2 + - python3-lib2to3==3.10.8-1~22.04 + - python3-minimal==3.10.6-1~22.04 + - python3-more-itertools==8.10.0-2 + - python3-oauthlib==3.2.0-1ubuntu0.1 + - python3-pip==22.0.2+dfsg-1ubuntu0.4 + - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1 + - python3-pyparsing==2.4.7-1 + - python3-secretstorage==3.3.1-1 + - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1 + - python3-six==1.16.0-3ubuntu1 + - python3-software-properties==0.99.22.9 + - python3-wadllib==1.3.6-1 + - python3-wheel==0.37.1-2ubuntu0.22.04.1 + - python3-zipp==1.0.0-3ubuntu0.1 + - python3.10==3.10.12-1~22.04.5 + - python3.10-minimal==3.10.12-1~22.04.5 + - readline-common==8.1.2-1 + - rpcsvc-proto==1.4.2-0ubuntu6 + - rsync==3.2.7-0ubuntu0.22.04.2 + - sed==4.8-1ubuntu2 + - sensible-utils==0.0.17 + - software-properties-common==0.99.22.9 + - sudo==1.9.9-1ubuntu2.4 + - systemd==249.11-0ubuntu3.12 + - systemd-sysv==249.11-0ubuntu3.12 + - sysvinit-utils==3.01-1ubuntu1 + - tar==1.34+dfsg-1ubuntu0.1.22.04.2 + - tmux==3.2a-4ubuntu0.2 + - ubuntu-keyring==2021.03.26 + - ucf==3.0043 + - usrmerge==25ubuntu2 + - util-linux==2.37.2-4ubuntu3.4 + - wget==1.21.2-2ubuntu1.1 + - xz-utils==5.2.5-2ubuntu1 + - zlib1g==1:1.2.11.dfsg-2ubuntu9.2 + machine: x86_64 + os: Linux + os_version: '#45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2' + processor: x86_64 + release: 6.5.0-45-generic diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml new file mode 100644 index 0000000..8766afb --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml @@ -0,0 +1,6 @@ +comment: 2x RTX A6000 +experiment: vllm_llama_3_70b_instruct_awq +experiment_hash: exp_hash_v1:7aa490 +run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000 +slug: 2x_rtx_a6000 +timestamp: 2024-08-22_12-26-40 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml new file mode 100644 index 0000000..38f582a --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: 90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb +Describe justice system in UK vs USA in 2000-5000 words: aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8 +Describe schooling system in UK vs USA in 2000-5000 words: b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd +Explain me some random problem for me in 2000-5000 words: 011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad +Tell me entire history of USA: 9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae +Write a ballad. Pick a random theme.: f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc +Write an epic story about a dragon and a knight: 72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4 +Write an essay about being a Senior developer.: 4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log new file mode 100644 index 0000000..f15fc24 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log @@ -0,0 +1,15 @@ +2024-08-22 12:26:40,850 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 2x RTX A6000 +2024-08-22 12:26:40,853 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log +2024-08-22 12:26:41,096 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-22 12:26:41,588 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-22 12:26:41,596 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-22 12:26:41,598 - __main__ - INFO - Syncing files to remote +2024-08-22 12:26:41,991 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output' stdout: '' stderr: '' status_code: 0 +2024-08-22 12:26:47,230 - __main__ - INFO - Setting up remote environment +2024-08-22 12:27:21,898 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n source $HOME/.cargo/env (sh, bash, zsh)\n source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 928ms\nPrepared 108 packages in 29.93s\nInstalled 108 packages in 394ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0 +2024-08-22 12:27:21,927 - __main__ - INFO - Gathering system info +2024-08-22 12:27:25,735 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:27:25,743 - __main__ - INFO - Running experiment code on remote +2024-08-22 12:36:14,863 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/stdout.txt' stdout: "gpu_count=2\nStarting model loading\nINFO 08-22 10:27:33 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:27:33 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:27:33 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:27:33 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:27:33 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\nINFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5\nINFO 08-22 10:27:35 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nWARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m WARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nINFO 08-22 10:27:45 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=, local_subscribe_port=45163, remote_subscribe_port=None)\nINFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:29:21 model_runner.py:732] Loading model weights took 18.5518 GB\nINFO 08-22 10:29:23 model_runner.py:732] Loading model weights took 18.5518 GB\nINFO 08-22 10:29:35 distributed_gpu_executor.py:56] # GPU blocks: 6941, # CPU blocks: 1638\nmodel loading took 126.15 seconds\nStarting 8 responses generation\n8 responses generation took 392.59 seconds\n{'Count to 1000, skip unpopular numbers': '90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',\n 'Explain me some random problem for me in 2000-5000 words': '011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad',\n 'Tell me entire history of USA': '9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae',\n 'Write a ballad. Pick a random theme.': 'f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc',\n 'Write an epic story about a dragon and a knight': '72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4',\n 'Write an essay about being a Senior developer.': '4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00, local_subscribe_port=45163, remote_subscribe_port=None) +INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +(VllmWorkerProcess pid=761) INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=761) INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors'] +(VllmWorkerProcess pid=761) INFO 08-22 10:29:21 model_runner.py:732] Loading model weights took 18.5518 GB +INFO 08-22 10:29:23 model_runner.py:732] Loading model weights took 18.5518 GB +INFO 08-22 10:29:35 distributed_gpu_executor.py:56] # GPU blocks: 6941, # CPU blocks: 1638 +model loading took 126.15 seconds +Starting 8 responses generation +8 responses generation took 392.59 seconds +{'Count to 1000, skip unpopular numbers': '90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb', + 'Describe justice system in UK vs USA in 2000-5000 words': 'aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8', + 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd', + 'Explain me some random problem for me in 2000-5000 words': '011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad', + 'Tell me entire history of USA': '9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae', + 'Write a ballad. Pick a random theme.': 'f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc', + 'Write an epic story about a dragon and a knight': '72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4', + 'Write an essay about being a Senior developer.': '4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec'} diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml new file mode 100644 index 0000000..fb349ee --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml @@ -0,0 +1,544 @@ +cuda: + cuda: '12.1' + cudnn: 90100 +machine: + cpu: + clocks: + - 2700.0 + - 1200.011 + - 1200.0 + - 1300.0 + - 1200.0 + - 1200.0 + - 2500.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 2698.678 + - 2299.646 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 2700.627 + - 2401.573 + - 2057.748 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1499.94 + - 2400.157 + - 1500.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1922.86 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + - 1200.0 + count: 48 + model: Intel(R) Xeon(R) Gold 6226 CPU @ 2.70GHz + docker_support: + nvidia: false + runc: false + gpu: + count: 2 + details: + - capacity: '46068' + cuda: '8.6' + driver: 555.58.02 + graphics_speed: '0' + memory_speed: '405' + name: NVIDIA RTX A6000 + power_limit: '300.00' + - capacity: '46068' + cuda: '8.6' + driver: 555.58.02 + graphics_speed: '0' + memory_speed: '405' + name: NVIDIA RTX A6000 + power_limit: '300.00' + hard_disk: + free: 77359368 + total: 83886080 + used: 6526712 + os: Ubuntu 22.04.4 LTS + ram: + available: 189181908 + free: 125037648 + total: 197637528 + used: 72599880 +python: + packages: + - aiohappyeyeballs==2.4.0 + - aiohttp==3.10.5 + - aiosignal==1.3.1 + - annotated-types==0.7.0 + - anyio==4.4.0 + - attrs==24.2.0 + - certifi==2024.7.4 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.30.2 + - datasets==2.21.0 + - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl + - dill==0.3.8 + - diskcache==5.6.3 + - distro==1.9.0 + - fastapi==0.112.1 + - filelock==3.15.4 + - frozenlist==1.4.1 + - fsspec==2024.6.1 + - h11==0.14.0 + - httpcore==1.0.5 + - httptools==0.6.1 + - httpx==0.27.0 + - huggingface-hub==0.24.6 + - idna==3.7 + - interegular==0.3.3 + - jinja2==3.1.4 + - jiter==0.5.0 + - jsonschema==4.23.0 + - jsonschema-specifications==2023.12.1 + - lark==1.2.2 + - llvmlite==0.43.0 + - lm-format-enforcer==0.10.3 + - markupsafe==2.1.5 + - mpmath==1.3.0 + - msgpack==1.0.8 + - multidict==6.0.5 + - multiprocess==0.70.16 + - nest-asyncio==1.6.0 + - networkx==3.3 + - ninja==1.11.1.1 + - numba==0.60.0 + - numpy==1.26.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==9.1.0.70 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-ml-py==12.560.30 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.20 + - nvidia-nvtx-cu12==12.1.105 + - openai==1.42.0 + - outlines==0.0.46 + - packaging==24.1 + - pandas==2.2.2 + - pillow==10.4.0 + - prometheus-client==0.20.0 + - prometheus-fastapi-instrumentator==7.0.0 + - protobuf==5.27.3 + - psutil==6.0.0 + - py-cpuinfo==9.0.0 + - pyairports==2.1.1 + - pyarrow==17.0.0 + - pycountry==24.6.1 + - pydantic==2.8.2 + - pydantic-core==2.20.1 + - python-dateutil==2.9.0.post0 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - ray==2.34.0 + - referencing==0.35.1 + - regex==2024.7.24 + - requests==2.32.3 + - rpds-py==0.20.0 + - safetensors==0.4.4 + - sentencepiece==0.2.0 + - setuptools==73.0.1 + - six==1.16.0 + - sniffio==1.3.1 + - starlette==0.38.2 + - sympy==1.13.2 + - tiktoken==0.7.0 + - tokenizers==0.19.1 + - torch==2.4.0 + - torchvision==0.19.0 + - tqdm==4.66.5 + - transformers==4.44.1 + - triton==3.0.0 + - typing-extensions==4.12.2 + - tzdata==2024.1 + - urllib3==2.2.2 + - uvicorn==0.30.6 + - uvloop==0.20.0 + - vllm==0.5.4 + - vllm-flash-attn==2.6.1 + - watchfiles==0.23.0 + - websockets==13.0 + - xformers==0.0.27.post2 + - xxhash==3.5.0 + - yarl==1.9.4 + version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ] +system: + dpkg_packages: + - adduser==3.118ubuntu5 + - apt==2.4.12 + - base-files==12ubuntu4.6 + - base-passwd==3.5.52build1 + - bash==5.1-6ubuntu1.1 + - binutils==2.38-4ubuntu2.6 + - binutils-common==2.38-4ubuntu2.6 + - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6 + - bsdutils==1:2.37.2-4ubuntu3.4 + - build-essential==12.9ubuntu3 + - bzip2==1.0.8-5build1 + - ca-certificates==20230311ubuntu0.22.04.1 + - coreutils==8.32-4.1ubuntu1.2 + - cpp==4:11.2.0-1ubuntu1 + - cpp-11==11.4.0-1ubuntu1~22.04 + - cuda-cccl-12-5==12.5.39-1 + - cuda-command-line-tools-12-5==12.5.1-1 + - cuda-compat-12-5==555.42.06-1 + - cuda-compiler-12-5==12.5.1-1 + - cuda-crt-12-5==12.5.82-1 + - cuda-cudart-12-5==12.5.82-1 + - cuda-cudart-dev-12-5==12.5.82-1 + - cuda-cuobjdump-12-5==12.5.39-1 + - cuda-cupti-12-5==12.5.82-1 + - cuda-cupti-dev-12-5==12.5.82-1 + - cuda-cuxxfilt-12-5==12.5.82-1 + - cuda-driver-dev-12-5==12.5.82-1 + - cuda-gdb-12-5==12.5.82-1 + - cuda-keyring==1.1-1 + - cuda-libraries-12-5==12.5.1-1 + - cuda-libraries-dev-12-5==12.5.1-1 + - cuda-minimal-build-12-5==12.5.1-1 + - cuda-nsight-compute-12-5==12.5.1-1 + - cuda-nvcc-12-5==12.5.82-1 + - cuda-nvdisasm-12-5==12.5.39-1 + - cuda-nvml-dev-12-5==12.5.82-1 + - cuda-nvprof-12-5==12.5.82-1 + - cuda-nvprune-12-5==12.5.82-1 + - cuda-nvrtc-12-5==12.5.82-1 + - cuda-nvrtc-dev-12-5==12.5.82-1 + - cuda-nvtx-12-5==12.5.82-1 + - cuda-nvvm-12-5==12.5.82-1 + - cuda-opencl-12-5==12.5.39-1 + - cuda-opencl-dev-12-5==12.5.39-1 + - cuda-profiler-api-12-5==12.5.39-1 + - cuda-sanitizer-12-5==12.5.81-1 + - cuda-toolkit-12-5-config-common==12.5.82-1 + - cuda-toolkit-12-config-common==12.5.82-1 + - cuda-toolkit-config-common==12.5.82-1 + - curl==7.81.0-1ubuntu1.17 + - dash==0.5.11+git20210903+057cd650a4ed-3build1 + - dbus==1.12.20-2ubuntu4.1 + - debconf==1.5.79ubuntu1 + - debianutils==5.5-1ubuntu2 + - diffutils==1:3.8-0ubuntu2 + - dirmngr==2.2.27-3ubuntu2.1 + - distro-info-data==0.52ubuntu0.7 + - dpkg==1.21.1ubuntu2.3 + - dpkg-dev==1.21.1ubuntu2.3 + - e2fsprogs==1.46.5-2ubuntu1.1 + - findutils==4.8.0-1ubuntu3 + - g++==4:11.2.0-1ubuntu1 + - g++-11==11.4.0-1ubuntu1~22.04 + - gcc==4:11.2.0-1ubuntu1 + - gcc-11==11.4.0-1ubuntu1~22.04 + - gcc-11-base==11.4.0-1ubuntu1~22.04 + - gcc-12-base==12.3.0-1ubuntu1~22.04 + - gir1.2-glib-2.0==1.72.0-1 + - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2 + - git==1:2.34.1-1ubuntu1.11 + - git-man==1:2.34.1-1ubuntu1.11 + - gnupg==2.2.27-3ubuntu2.1 + - gnupg-l10n==2.2.27-3ubuntu2.1 + - gnupg-utils==2.2.27-3ubuntu2.1 + - gnupg2==2.2.27-3ubuntu2.1 + - gpg==2.2.27-3ubuntu2.1 + - gpg-agent==2.2.27-3ubuntu2.1 + - gpg-wks-client==2.2.27-3ubuntu2.1 + - gpg-wks-server==2.2.27-3ubuntu2.1 + - gpgconf==2.2.27-3ubuntu2.1 + - gpgsm==2.2.27-3ubuntu2.1 + - gpgv==2.2.27-3ubuntu2.1 + - grep==3.7-1build1 + - gzip==1.10-4ubuntu4.1 + - hostname==3.23ubuntu2 + - init-system-helpers==1.62 + - iso-codes==4.9.0-1 + - less==590-1ubuntu0.22.04.3 + - libacl1==2.3.1-1 + - libapparmor1==3.0.4-2ubuntu2.3 + - libappstream4==0.15.2-2 + - libapt-pkg6.0==2.4.12 + - libargon2-1==0~20171227-0.3 + - libasan6==11.4.0-1ubuntu1~22.04 + - libassuan0==2.5.5-1build1 + - libatomic1==12.3.0-1ubuntu1~22.04 + - libattr1==1:2.5.1-1build1 + - libaudit-common==1:3.0.7-1build1 + - libaudit1==1:3.0.7-1build1 + - libbinutils==2.38-4ubuntu2.6 + - libblkid1==2.37.2-4ubuntu3.4 + - libbrotli1==1.0.9-2build6 + - libbsd0==0.11.5-1 + - libbz2-1.0==1.0.8-5build1 + - libc-bin==2.35-0ubuntu3.8 + - libc-dev-bin==2.35-0ubuntu3.8 + - libc6==2.35-0ubuntu3.8 + - libc6-dev==2.35-0ubuntu3.8 + - libcap-ng0==0.7.9-2.2build3 + - libcap2==1:2.44-1ubuntu0.22.04.1 + - libcap2-bin==1:2.44-1ubuntu0.22.04.1 + - libcbor0.8==0.8.0-2ubuntu1 + - libcc1-0==12.3.0-1ubuntu1~22.04 + - libcom-err2==1.46.5-2ubuntu1.1 + - libcrypt-dev==1:4.4.27-1 + - libcrypt1==1:4.4.27-1 + - libcryptsetup12==2:2.4.3-1ubuntu1.2 + - libctf-nobfd0==2.38-4ubuntu2.6 + - libctf0==2.38-4ubuntu2.6 + - libcublas-12-5==12.5.3.2-1 + - libcublas-dev-12-5==12.5.3.2-1 + - libcudnn9-cuda-12==9.2.1.18-1 + - libcudnn9-dev-cuda-12==9.2.1.18-1 + - libcufft-12-5==11.2.3.61-1 + - libcufft-dev-12-5==11.2.3.61-1 + - libcufile-12-5==1.10.1.7-1 + - libcufile-dev-12-5==1.10.1.7-1 + - libcurand-12-5==10.3.6.82-1 + - libcurand-dev-12-5==10.3.6.82-1 + - libcurl3-gnutls==7.81.0-1ubuntu1.17 + - libcurl4==7.81.0-1ubuntu1.17 + - libcusolver-12-5==11.6.3.83-1 + - libcusolver-dev-12-5==11.6.3.83-1 + - libcusparse-12-5==12.5.1.3-1 + - libcusparse-dev-12-5==12.5.1.3-1 + - libdb5.3==5.3.28+dfsg1-0.8ubuntu3 + - libdbus-1-3==1.12.20-2ubuntu4.1 + - libdebconfclient0==0.261ubuntu1 + - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4 + - libdpkg-perl==1.21.1ubuntu2.3 + - libdw1==0.186-1build1 + - libedit2==3.1-20210910-1build1 + - libelf1==0.186-1build1 + - liberror-perl==0.17029-1 + - libevent-core-2.1-7==2.1.12-stable-1build3 + - libexpat1==2.4.7-1ubuntu0.3 + - libext2fs2==1.46.5-2ubuntu1.1 + - libffi8==3.4.2-4 + - libfido2-1==1.10.0-1 + - libgcc-11-dev==11.4.0-1ubuntu1~22.04 + - libgcc-s1==12.3.0-1ubuntu1~22.04 + - libgcrypt20==1.9.4-3ubuntu3 + - libgdbm-compat4==1.23-1 + - libgdbm6==1.23-1 + - libgirepository-1.0-1==1.72.0-1 + - libglib2.0-0==2.72.4-0ubuntu2.3 + - libglib2.0-bin==2.72.4-0ubuntu2.3 + - libglib2.0-data==2.72.4-0ubuntu2.3 + - libgmp10==2:6.2.1+dfsg-3ubuntu1 + - libgnutls30==3.7.3-4ubuntu1.5 + - libgomp1==12.3.0-1ubuntu1~22.04 + - libgpg-error0==1.43-3 + - libgssapi-krb5-2==1.19.2-2ubuntu0.3 + - libgstreamer1.0-0==1.20.3-0ubuntu1 + - libhogweed6==3.7.3-1build2 + - libicu70==70.1-2 + - libidn2-0==2.3.2-2build1 + - libip4tc2==1.8.7-1ubuntu5.2 + - libisl23==0.24-2build1 + - libitm1==12.3.0-1ubuntu1~22.04 + - libjson-c5==0.15-3~ubuntu1.22.04.2 + - libk5crypto3==1.19.2-2ubuntu0.3 + - libkeyutils1==1.6.1-2ubuntu3 + - libkmod2==29-1ubuntu1 + - libkrb5-3==1.19.2-2ubuntu0.3 + - libkrb5support0==1.19.2-2ubuntu0.3 + - libksba8==1.6.0-2ubuntu0.2 + - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1 + - liblsan0==12.3.0-1ubuntu1~22.04 + - liblz4-1==1.9.3-2build2 + - liblzma5==5.2.5-2ubuntu1 + - libmd0==1.0.4-1build1 + - libmount1==2.37.2-4ubuntu3.4 + - libmpc3==1.2.1-2build1 + - libmpdec3==2.5.1-2build2 + - libmpfr6==4.1.0-3build3 + - libnccl-dev==2.22.3-1+cuda12.5 + - libnccl2==2.22.3-1+cuda12.5 + - libncurses6==6.3-2ubuntu0.1 + - libncursesw6==6.3-2ubuntu0.1 + - libnettle8==3.7.3-1build2 + - libnghttp2-14==1.43.0-1ubuntu0.2 + - libnpp-12-5==12.3.0.159-1 + - libnpp-dev-12-5==12.3.0.159-1 + - libnpth0==1.6-3build2 + - libnsl-dev==1.3.0-2build2 + - libnsl2==1.3.0-2build2 + - libnvfatbin-12-5==12.5.82-1 + - libnvfatbin-dev-12-5==12.5.82-1 + - libnvjitlink-12-5==12.5.82-1 + - libnvjitlink-dev-12-5==12.5.82-1 + - libnvjpeg-12-5==12.3.2.81-1 + - libnvjpeg-dev-12-5==12.3.2.81-1 + - libp11-kit0==0.24.0-6build1 + - libpackagekit-glib2-18==1.2.5-2ubuntu2 + - libpam-modules==1.4.0-11ubuntu2.4 + - libpam-modules-bin==1.4.0-11ubuntu2.4 + - libpam-runtime==1.4.0-11ubuntu2.4 + - libpam-systemd==249.11-0ubuntu3.12 + - libpam0g==1.4.0-11ubuntu2.4 + - libpcre2-8-0==10.39-3ubuntu0.1 + - libpcre3==2:8.39-13ubuntu0.22.04.1 + - libperl5.34==5.34.0-3ubuntu1.3 + - libpolkit-agent-1-0==0.105-33 + - libpolkit-gobject-1-0==0.105-33 + - libpopt0==1.18-3build1 + - libprocps8==2:3.3.17-6ubuntu2.1 + - libpsl5==0.21.0-1.2build2 + - libpython3-stdlib==3.10.6-1~22.04 + - libpython3.10-minimal==3.10.12-1~22.04.5 + - libpython3.10-stdlib==3.10.12-1~22.04.5 + - libquadmath0==12.3.0-1ubuntu1~22.04 + - libreadline8==8.1.2-1 + - librtmp1==2.4+20151223.gitfa8646d.1-2build4 + - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2 + - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2 + - libseccomp2==2.5.3-2ubuntu2 + - libselinux1==3.3-1build2 + - libsemanage-common==3.3-1build2 + - libsemanage2==3.3-1build2 + - libsepol2==3.3-1build1 + - libsmartcols1==2.37.2-4ubuntu3.4 + - libsqlite3-0==3.37.2-2ubuntu0.3 + - libss2==1.46.5-2ubuntu1.1 + - libssh-4==0.9.6-2ubuntu0.22.04.3 + - libssl3==3.0.2-0ubuntu1.16 + - libstdc++-11-dev==11.4.0-1ubuntu1~22.04 + - libstdc++6==12.3.0-1ubuntu1~22.04 + - libstemmer0d==2.2.0-1build1 + - libsystemd0==249.11-0ubuntu3.12 + - libtasn1-6==4.18.0-4build1 + - libtinfo6==6.3-2ubuntu0.1 + - libtirpc-common==1.3.2-2ubuntu0.1 + - libtirpc-dev==1.3.2-2ubuntu0.1 + - libtirpc3==1.3.2-2ubuntu0.1 + - libtsan0==11.4.0-1ubuntu1~22.04 + - libubsan1==12.3.0-1ubuntu1~22.04 + - libudev1==249.11-0ubuntu3.12 + - libunistring2==1.0-1 + - libunwind8==1.3.2-2build2.1 + - libutempter0==1.2.1-2build2 + - libuuid1==2.37.2-4ubuntu3.4 + - libwrap0==7.6.q-31build2 + - libxml2==2.9.13+dfsg-1ubuntu0.4 + - libxmlb2==0.3.6-2build1 + - libxxhash0==0.8.1-1 + - libyaml-0-2==0.2.2-1build2 + - libzstd1==1.4.8+dfsg-3build1 + - linux-libc-dev==5.15.0-113.123 + - locales==2.35-0ubuntu3.8 + - login==1:4.8.1-2ubuntu2.2 + - logsave==1.46.5-2ubuntu1.1 + - lsb-base==11.1.0ubuntu4 + - lsb-release==11.1.0ubuntu4 + - lto-disabled-list==24 + - make==4.3-4.1build1 + - mawk==1.3.4.20200120-3 + - media-types==7.0.0 + - mount==2.37.2-4ubuntu3.4 + - ncurses-base==6.3-2ubuntu0.1 + - ncurses-bin==6.3-2ubuntu0.1 + - nsight-compute-2024.2.1==2024.2.1.2-1 + - openssh-client==1:8.9p1-3ubuntu0.10 + - openssh-server==1:8.9p1-3ubuntu0.10 + - openssh-sftp-server==1:8.9p1-3ubuntu0.10 + - openssl==3.0.2-0ubuntu1.16 + - packagekit==1.2.5-2ubuntu2 + - passwd==1:4.8.1-2ubuntu2.2 + - patch==2.7.6-7build2 + - perl==5.34.0-3ubuntu1.3 + - perl-base==5.34.0-3ubuntu1.3 + - perl-modules-5.34==5.34.0-3ubuntu1.3 + - pinentry-curses==1.1.1-1build2 + - pkexec==0.105-33 + - policykit-1==0.105-33 + - polkitd==0.105-33 + - procps==2:3.3.17-6ubuntu2.1 + - python-apt-common==2.4.0ubuntu3 + - python3==3.10.6-1~22.04 + - python3-apt==2.4.0ubuntu3 + - python3-blinker==1.4+dfsg1-0.4 + - python3-cffi-backend==1.15.0-1build2 + - python3-cryptography==3.4.8-1ubuntu2.2 + - python3-dbus==1.2.18-3build1 + - python3-distro==1.7.0-1 + - python3-distutils==3.10.8-1~22.04 + - python3-gi==3.42.1-0ubuntu1 + - python3-httplib2==0.20.2-2 + - python3-importlib-metadata==4.6.4-1 + - python3-jeepney==0.7.1-3 + - python3-jwt==2.3.0-1ubuntu0.2 + - python3-keyring==23.5.0-1 + - python3-launchpadlib==1.10.16-1 + - python3-lazr.restfulclient==0.14.4-1 + - python3-lazr.uri==1.0.6-2 + - python3-lib2to3==3.10.8-1~22.04 + - python3-minimal==3.10.6-1~22.04 + - python3-more-itertools==8.10.0-2 + - python3-oauthlib==3.2.0-1ubuntu0.1 + - python3-pip==22.0.2+dfsg-1ubuntu0.4 + - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1 + - python3-pyparsing==2.4.7-1 + - python3-secretstorage==3.3.1-1 + - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1 + - python3-six==1.16.0-3ubuntu1 + - python3-software-properties==0.99.22.9 + - python3-wadllib==1.3.6-1 + - python3-wheel==0.37.1-2ubuntu0.22.04.1 + - python3-zipp==1.0.0-3ubuntu0.1 + - python3.10==3.10.12-1~22.04.5 + - python3.10-minimal==3.10.12-1~22.04.5 + - readline-common==8.1.2-1 + - rpcsvc-proto==1.4.2-0ubuntu6 + - rsync==3.2.7-0ubuntu0.22.04.2 + - sed==4.8-1ubuntu2 + - sensible-utils==0.0.17 + - software-properties-common==0.99.22.9 + - sudo==1.9.9-1ubuntu2.4 + - systemd==249.11-0ubuntu3.12 + - systemd-sysv==249.11-0ubuntu3.12 + - sysvinit-utils==3.01-1ubuntu1 + - tar==1.34+dfsg-1ubuntu0.1.22.04.2 + - tmux==3.2a-4ubuntu0.2 + - ubuntu-keyring==2021.03.26 + - ucf==3.0043 + - usrmerge==25ubuntu2 + - util-linux==2.37.2-4ubuntu3.4 + - wget==1.21.2-2ubuntu1.1 + - xz-utils==5.2.5-2ubuntu1 + - zlib1g==1:1.2.11.dfsg-2ubuntu9.2 + machine: x86_64 + os: Linux + os_version: '#40~22.04.3-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 30 17:30:19 UTC 2' + processor: x86_64 + release: 6.8.0-40-generic diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml new file mode 100644 index 0000000..5435cd7 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml @@ -0,0 +1,6 @@ +comment: 1x H100 NVL +experiment: vllm_llama_3_70b_instruct_awq +experiment_hash: exp_hash_v1:7aa490 +run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl +slug: 1x_h100_nvl +timestamp: 2024-08-22_12-49-14 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml new file mode 100644 index 0000000..de10221 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: 40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762 +Describe justice system in UK vs USA in 2000-5000 words: 2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973 +Describe schooling system in UK vs USA in 2000-5000 words: 3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66 +Explain me some random problem for me in 2000-5000 words: 323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827 +Tell me entire history of USA: d2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e +Write a ballad. Pick a random theme.: 8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b +Write an epic story about a dragon and a knight: a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77 +Write an essay about being a Senior developer.: 426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log new file mode 100644 index 0000000..275824a --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log @@ -0,0 +1,15 @@ +2024-08-22 12:49:14,487 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 NVL +2024-08-22 12:49:14,490 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log +2024-08-22 12:49:14,851 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-22 12:49:15,574 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-22 12:49:15,582 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-22 12:49:15,584 - __main__ - INFO - Syncing files to remote +2024-08-22 12:49:16,157 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output' stdout: '' stderr: '' status_code: 0 +2024-08-22 12:49:23,557 - __main__ - INFO - Setting up remote environment +2024-08-22 12:50:03,266 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n source $HOME/.cargo/env (sh, bash, zsh)\n source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 958ms\nPrepared 108 packages in 33.96s\nInstalled 108 packages in 559ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0 +2024-08-22 12:50:03,285 - __main__ - INFO - Gathering system info +2024-08-22 12:50:07,008 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:50:07,015 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:02:02,793 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:50:14 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:50:14 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:50:16 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:50:17 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:58:40 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 10:58:46 gpu_executor.py:102] # GPU blocks: 8601, # CPU blocks: 819\nmodel loading took 529.89 seconds\nStarting 8 responses generation\n8 responses generation took 175.68 seconds\n{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',\n 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',\n 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',\n 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',\n 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',\n 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',\n 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-54-54_1x_a100x '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-54-54_1x_a100x\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-54-54_1x_a100x) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:55:45,672 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:07:21,903 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:55:53 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:55:53 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:55:55 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:55:55 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:02:44 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:02:53 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819\nmodel loading took 423.10 seconds\nStarting 8 responses generation\n8 responses generation took 263.25 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-54-54_1x_a100x '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-54-54_1x_a100x\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-54-54_1x_a100x) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-58-24_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-58-24_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-58-24_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 12:59:12,874 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:45:08,985 - __main__ - INFO - Syncing output back to local diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt new file mode 100644 index 0000000..2be7483 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt @@ -0,0 +1,19 @@ +gpu_count=1 +Starting model loading +INFO 08-22 10:59:18 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel. +INFO 08-22 10:59:18 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False) +INFO 08-22 10:59:20 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq... +INFO 08-22 10:59:21 weight_utils.py:225] Using model weights format ['*.safetensors'] +INFO 08-22 11:05:30 model_runner.py:732] Loading model weights took 37.0561 GB +INFO 08-22 11:05:38 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819 +model loading took 382.21 seconds +Starting 8 responses generation +8 responses generation took 184.56 seconds +{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e', + 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede', + 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1', + 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c', + 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518', + 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3', + 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94', + 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'} diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml new file mode 100644 index 0000000..0e330bc --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml @@ -0,0 +1,510 @@ +cuda: + cuda: '12.1' + cudnn: 90100 +machine: + cpu: + clocks: + - 1796.352 + - 1800.0 + - 2900.0 + - 4000.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + - 3632.855 + - 4539.065 + - 3634.414 + - 1800.0 + - 1800.0 + - 1800.0 + - 1800.0 + count: 32 + model: AMD Ryzen Threadripper PRO 5955WX 16-Cores + docker_support: + nvidia: false + runc: false + gpu: + count: 1 + details: + - capacity: '81920' + cuda: '8.0' + driver: 535.54.03 + graphics_speed: '210' + memory_speed: '1512' + name: NVIDIA A100 80GB PCIe + power_limit: '300.00' + hard_disk: + free: 56277300 + total: 62914560 + used: 6637260 + os: Ubuntu 22.04.3 LTS + ram: + available: 460197228 + free: 162540176 + total: 527997164 + used: 365456988 +python: + packages: + - aiohappyeyeballs==2.4.0 + - aiohttp==3.10.5 + - aiosignal==1.3.1 + - annotated-types==0.7.0 + - anyio==4.4.0 + - attrs==24.2.0 + - certifi==2024.7.4 + - charset-normalizer==3.3.2 + - click==8.1.7 + - cloudpickle==3.0.0 + - cmake==3.30.2 + - datasets==2.21.0 + - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl + - dill==0.3.8 + - diskcache==5.6.3 + - distro==1.9.0 + - fastapi==0.112.1 + - filelock==3.15.4 + - frozenlist==1.4.1 + - fsspec==2024.6.1 + - h11==0.14.0 + - httpcore==1.0.5 + - httptools==0.6.1 + - httpx==0.27.0 + - huggingface-hub==0.24.6 + - idna==3.7 + - interegular==0.3.3 + - jinja2==3.1.4 + - jiter==0.5.0 + - jsonschema==4.23.0 + - jsonschema-specifications==2023.12.1 + - lark==1.2.2 + - llvmlite==0.43.0 + - lm-format-enforcer==0.10.3 + - markupsafe==2.1.5 + - mpmath==1.3.0 + - msgpack==1.0.8 + - multidict==6.0.5 + - multiprocess==0.70.16 + - nest-asyncio==1.6.0 + - networkx==3.3 + - ninja==1.11.1.1 + - numba==0.60.0 + - numpy==1.26.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==9.1.0.70 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-ml-py==12.560.30 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.20 + - nvidia-nvtx-cu12==12.1.105 + - openai==1.42.0 + - outlines==0.0.46 + - packaging==24.1 + - pandas==2.2.2 + - pillow==10.4.0 + - prometheus-client==0.20.0 + - prometheus-fastapi-instrumentator==7.0.0 + - protobuf==5.27.3 + - psutil==6.0.0 + - py-cpuinfo==9.0.0 + - pyairports==2.1.1 + - pyarrow==17.0.0 + - pycountry==24.6.1 + - pydantic==2.8.2 + - pydantic-core==2.20.1 + - python-dateutil==2.9.0.post0 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - ray==2.34.0 + - referencing==0.35.1 + - regex==2024.7.24 + - requests==2.32.3 + - rpds-py==0.20.0 + - safetensors==0.4.4 + - sentencepiece==0.2.0 + - setuptools==73.0.1 + - six==1.16.0 + - sniffio==1.3.1 + - starlette==0.38.2 + - sympy==1.13.2 + - tiktoken==0.7.0 + - tokenizers==0.19.1 + - torch==2.4.0 + - torchvision==0.19.0 + - tqdm==4.66.5 + - transformers==4.44.1 + - triton==3.0.0 + - typing-extensions==4.12.2 + - tzdata==2024.1 + - urllib3==2.2.2 + - uvicorn==0.30.6 + - uvloop==0.20.0 + - vllm==0.5.4 + - vllm-flash-attn==2.6.1 + - watchfiles==0.23.0 + - websockets==13.0 + - xformers==0.0.27.post2 + - xxhash==3.5.0 + - yarl==1.9.4 + version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ] +system: + dpkg_packages: + - adduser==3.118ubuntu5 + - apt==2.4.10 + - base-files==12ubuntu4.4 + - base-passwd==3.5.52build1 + - bash==5.1-6ubuntu1 + - binutils==2.38-4ubuntu2.3 + - binutils-common==2.38-4ubuntu2.3 + - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3 + - bsdutils==1:2.37.2-4ubuntu3 + - build-essential==12.9ubuntu3 + - bzip2==1.0.8-5build1 + - ca-certificates==20230311ubuntu0.22.04.1 + - coreutils==8.32-4.1ubuntu1 + - cpp==4:11.2.0-1ubuntu1 + - cpp-11==11.4.0-1ubuntu1~22.04 + - cuda-cccl-12-0==12.0.140-1 + - cuda-command-line-tools-12-0==12.0.1-1 + - cuda-compat-12-0==525.147.05-1 + - cuda-compiler-12-0==12.0.1-1 + - cuda-cudart-12-0==12.0.146-1 + - cuda-cudart-dev-12-0==12.0.146-1 + - cuda-cuobjdump-12-0==12.0.140-1 + - cuda-cupti-12-0==12.0.146-1 + - cuda-cupti-dev-12-0==12.0.146-1 + - cuda-cuxxfilt-12-0==12.0.140-1 + - cuda-driver-dev-12-0==12.0.146-1 + - cuda-gdb-12-0==12.0.140-1 + - cuda-keyring==1.0-1 + - cuda-libraries-12-0==12.0.1-1 + - cuda-libraries-dev-12-0==12.0.1-1 + - cuda-minimal-build-12-0==12.0.1-1 + - cuda-nsight-compute-12-0==12.0.1-1 + - cuda-nvcc-12-0==12.0.140-1 + - cuda-nvdisasm-12-0==12.0.140-1 + - cuda-nvml-dev-12-0==12.0.140-1 + - cuda-nvprof-12-0==12.0.146-1 + - cuda-nvprune-12-0==12.0.140-1 + - cuda-nvrtc-12-0==12.0.140-1 + - cuda-nvrtc-dev-12-0==12.0.140-1 + - cuda-nvtx-12-0==12.0.140-1 + - cuda-opencl-12-0==12.0.140-1 + - cuda-opencl-dev-12-0==12.0.140-1 + - cuda-profiler-api-12-0==12.0.140-1 + - cuda-sanitizer-12-0==12.0.140-1 + - cuda-toolkit-12-0-config-common==12.0.146-1 + - cuda-toolkit-12-config-common==12.3.52-1 + - cuda-toolkit-config-common==12.3.52-1 + - curl==7.81.0-1ubuntu1.17 + - dash==0.5.11+git20210903+057cd650a4ed-3build1 + - dbus==1.12.20-2ubuntu4.1 + - debconf==1.5.79ubuntu1 + - debianutils==5.5-1ubuntu2 + - diffutils==1:3.8-0ubuntu2 + - dirmngr==2.2.27-3ubuntu2.1 + - distro-info-data==0.52ubuntu0.7 + - dpkg==1.21.1ubuntu2.2 + - dpkg-dev==1.21.1ubuntu2.2 + - e2fsprogs==1.46.5-2ubuntu1.1 + - findutils==4.8.0-1ubuntu3 + - g++==4:11.2.0-1ubuntu1 + - g++-11==11.4.0-1ubuntu1~22.04 + - gcc==4:11.2.0-1ubuntu1 + - gcc-11==11.4.0-1ubuntu1~22.04 + - gcc-11-base==11.4.0-1ubuntu1~22.04 + - gcc-12-base==12.3.0-1ubuntu1~22.04 + - gir1.2-glib-2.0==1.72.0-1 + - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2 + - git==1:2.34.1-1ubuntu1.11 + - git-man==1:2.34.1-1ubuntu1.11 + - gnupg==2.2.27-3ubuntu2.1 + - gnupg-l10n==2.2.27-3ubuntu2.1 + - gnupg-utils==2.2.27-3ubuntu2.1 + - gnupg2==2.2.27-3ubuntu2.1 + - gpg==2.2.27-3ubuntu2.1 + - gpg-agent==2.2.27-3ubuntu2.1 + - gpg-wks-client==2.2.27-3ubuntu2.1 + - gpg-wks-server==2.2.27-3ubuntu2.1 + - gpgconf==2.2.27-3ubuntu2.1 + - gpgsm==2.2.27-3ubuntu2.1 + - gpgv==2.2.27-3ubuntu2.1 + - grep==3.7-1build1 + - gzip==1.10-4ubuntu4.1 + - hostname==3.23ubuntu2 + - init-system-helpers==1.62 + - iso-codes==4.9.0-1 + - less==590-1ubuntu0.22.04.3 + - libacl1==2.3.1-1 + - libapparmor1==3.0.4-2ubuntu2.3 + - libappstream4==0.15.2-2 + - libapt-pkg6.0==2.4.10 + - libargon2-1==0~20171227-0.3 + - libasan6==11.4.0-1ubuntu1~22.04 + - libassuan0==2.5.5-1build1 + - libatomic1==12.3.0-1ubuntu1~22.04 + - libattr1==1:2.5.1-1build1 + - libaudit-common==1:3.0.7-1build1 + - libaudit1==1:3.0.7-1build1 + - libbinutils==2.38-4ubuntu2.3 + - libblkid1==2.37.2-4ubuntu3 + - libbrotli1==1.0.9-2build6 + - libbsd0==0.11.5-1 + - libbz2-1.0==1.0.8-5build1 + - libc-bin==2.35-0ubuntu3.4 + - libc-dev-bin==2.35-0ubuntu3.4 + - libc6==2.35-0ubuntu3.4 + - libc6-dev==2.35-0ubuntu3.4 + - libcap-ng0==0.7.9-2.2build3 + - libcap2==1:2.44-1ubuntu0.22.04.1 + - libcap2-bin==1:2.44-1ubuntu0.22.04.1 + - libcbor0.8==0.8.0-2ubuntu1 + - libcc1-0==12.3.0-1ubuntu1~22.04 + - libcom-err2==1.46.5-2ubuntu1.1 + - libcrypt-dev==1:4.4.27-1 + - libcrypt1==1:4.4.27-1 + - libcryptsetup12==2:2.4.3-1ubuntu1.2 + - libctf-nobfd0==2.38-4ubuntu2.3 + - libctf0==2.38-4ubuntu2.3 + - libcublas-12-0==12.0.2.224-1 + - libcublas-dev-12-0==12.0.2.224-1 + - libcufft-12-0==11.0.1.95-1 + - libcufft-dev-12-0==11.0.1.95-1 + - libcufile-12-0==1.5.1.14-1 + - libcufile-dev-12-0==1.5.1.14-1 + - libcurand-12-0==10.3.1.124-1 + - libcurand-dev-12-0==10.3.1.124-1 + - libcurl3-gnutls==7.81.0-1ubuntu1.17 + - libcurl4==7.81.0-1ubuntu1.17 + - libcusolver-12-0==11.4.3.1-1 + - libcusolver-dev-12-0==11.4.3.1-1 + - libcusparse-12-0==12.0.1.140-1 + - libcusparse-dev-12-0==12.0.1.140-1 + - libdb5.3==5.3.28+dfsg1-0.8ubuntu3 + - libdbus-1-3==1.12.20-2ubuntu4.1 + - libdebconfclient0==0.261ubuntu1 + - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4 + - libdpkg-perl==1.21.1ubuntu2.2 + - libdw1==0.186-1build1 + - libedit2==3.1-20210910-1build1 + - libelf1==0.186-1build1 + - liberror-perl==0.17029-1 + - libevent-core-2.1-7==2.1.12-stable-1build3 + - libexpat1==2.4.7-1ubuntu0.3 + - libext2fs2==1.46.5-2ubuntu1.1 + - libffi8==3.4.2-4 + - libfido2-1==1.10.0-1 + - libgcc-11-dev==11.4.0-1ubuntu1~22.04 + - libgcc-s1==12.3.0-1ubuntu1~22.04 + - libgcrypt20==1.9.4-3ubuntu3 + - libgdbm-compat4==1.23-1 + - libgdbm6==1.23-1 + - libgirepository-1.0-1==1.72.0-1 + - libglib2.0-0==2.72.4-0ubuntu2.3 + - libglib2.0-bin==2.72.4-0ubuntu2.3 + - libglib2.0-data==2.72.4-0ubuntu2.3 + - libgmp10==2:6.2.1+dfsg-3ubuntu1 + - libgnutls30==3.7.3-4ubuntu1.2 + - libgomp1==12.3.0-1ubuntu1~22.04 + - libgpg-error0==1.43-3 + - libgssapi-krb5-2==1.19.2-2ubuntu0.2 + - libgstreamer1.0-0==1.20.3-0ubuntu1 + - libhogweed6==3.7.3-1build2 + - libicu70==70.1-2 + - libidn2-0==2.3.2-2build1 + - libip4tc2==1.8.7-1ubuntu5.2 + - libisl23==0.24-2build1 + - libitm1==12.3.0-1ubuntu1~22.04 + - libjson-c5==0.15-3~ubuntu1.22.04.2 + - libk5crypto3==1.19.2-2ubuntu0.2 + - libkeyutils1==1.6.1-2ubuntu3 + - libkmod2==29-1ubuntu1 + - libkrb5-3==1.19.2-2ubuntu0.2 + - libkrb5support0==1.19.2-2ubuntu0.2 + - libksba8==1.6.0-2ubuntu0.2 + - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1 + - liblsan0==12.3.0-1ubuntu1~22.04 + - liblz4-1==1.9.3-2build2 + - liblzma5==5.2.5-2ubuntu1 + - libmd0==1.0.4-1build1 + - libmount1==2.37.2-4ubuntu3 + - libmpc3==1.2.1-2build1 + - libmpdec3==2.5.1-2build2 + - libmpfr6==4.1.0-3build3 + - libnccl-dev==2.16.5-1+cuda12.0 + - libnccl2==2.16.5-1+cuda12.0 + - libncurses6==6.3-2ubuntu0.1 + - libncursesw6==6.3-2ubuntu0.1 + - libnettle8==3.7.3-1build2 + - libnghttp2-14==1.43.0-1ubuntu0.2 + - libnpp-12-0==12.0.1.104-1 + - libnpp-dev-12-0==12.0.1.104-1 + - libnpth0==1.6-3build2 + - libnsl-dev==1.3.0-2build2 + - libnsl2==1.3.0-2build2 + - libnvjitlink-12-0==12.0.140-1 + - libnvjitlink-dev-12-0==12.0.140-1 + - libnvjpeg-12-0==12.0.1.102-1 + - libnvjpeg-dev-12-0==12.0.1.102-1 + - libp11-kit0==0.24.0-6build1 + - libpackagekit-glib2-18==1.2.5-2ubuntu2 + - libpam-modules==1.4.0-11ubuntu2.3 + - libpam-modules-bin==1.4.0-11ubuntu2.3 + - libpam-runtime==1.4.0-11ubuntu2.3 + - libpam-systemd==249.11-0ubuntu3.12 + - libpam0g==1.4.0-11ubuntu2.3 + - libpcre2-8-0==10.39-3ubuntu0.1 + - libpcre3==2:8.39-13ubuntu0.22.04.1 + - libperl5.34==5.34.0-3ubuntu1.2 + - libpolkit-agent-1-0==0.105-33 + - libpolkit-gobject-1-0==0.105-33 + - libpopt0==1.18-3build1 + - libprocps8==2:3.3.17-6ubuntu2 + - libpsl5==0.21.0-1.2build2 + - libpython3-stdlib==3.10.6-1~22.04 + - libpython3.10-minimal==3.10.12-1~22.04.5 + - libpython3.10-stdlib==3.10.12-1~22.04.5 + - libquadmath0==12.3.0-1ubuntu1~22.04 + - libreadline8==8.1.2-1 + - librtmp1==2.4+20151223.gitfa8646d.1-2build4 + - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2 + - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2 + - libseccomp2==2.5.3-2ubuntu2 + - libselinux1==3.3-1build2 + - libsemanage-common==3.3-1build2 + - libsemanage2==3.3-1build2 + - libsepol2==3.3-1build1 + - libsmartcols1==2.37.2-4ubuntu3 + - libsqlite3-0==3.37.2-2ubuntu0.1 + - libss2==1.46.5-2ubuntu1.1 + - libssh-4==0.9.6-2ubuntu0.22.04.3 + - libssl3==3.0.2-0ubuntu1.10 + - libstdc++-11-dev==11.4.0-1ubuntu1~22.04 + - libstdc++6==12.3.0-1ubuntu1~22.04 + - libstemmer0d==2.2.0-1build1 + - libsystemd0==249.11-0ubuntu3.12 + - libtasn1-6==4.18.0-4build1 + - libtinfo6==6.3-2ubuntu0.1 + - libtirpc-common==1.3.2-2ubuntu0.1 + - libtirpc-dev==1.3.2-2ubuntu0.1 + - libtirpc3==1.3.2-2ubuntu0.1 + - libtsan0==11.4.0-1ubuntu1~22.04 + - libubsan1==12.3.0-1ubuntu1~22.04 + - libudev1==249.11-0ubuntu3.10 + - libunistring2==1.0-1 + - libunwind8==1.3.2-2build2.1 + - libutempter0==1.2.1-2build2 + - libuuid1==2.37.2-4ubuntu3 + - libwrap0==7.6.q-31build2 + - libxml2==2.9.13+dfsg-1ubuntu0.4 + - libxmlb2==0.3.6-2build1 + - libxxhash0==0.8.1-1 + - libyaml-0-2==0.2.2-1build2 + - libzstd1==1.4.8+dfsg-3build1 + - linux-libc-dev==5.15.0-88.98 + - locales==2.35-0ubuntu3.8 + - login==1:4.8.1-2ubuntu2.1 + - logsave==1.46.5-2ubuntu1.1 + - lsb-base==11.1.0ubuntu4 + - lsb-release==11.1.0ubuntu4 + - lto-disabled-list==24 + - make==4.3-4.1build1 + - mawk==1.3.4.20200120-3 + - media-types==7.0.0 + - mount==2.37.2-4ubuntu3 + - ncurses-base==6.3-2ubuntu0.1 + - ncurses-bin==6.3-2ubuntu0.1 + - nsight-compute-2022.4.1==2022.4.1.6-1 + - openssh-client==1:8.9p1-3ubuntu0.10 + - openssh-server==1:8.9p1-3ubuntu0.10 + - openssh-sftp-server==1:8.9p1-3ubuntu0.10 + - openssl==3.0.2-0ubuntu1.12 + - packagekit==1.2.5-2ubuntu2 + - passwd==1:4.8.1-2ubuntu2.1 + - patch==2.7.6-7build2 + - perl==5.34.0-3ubuntu1.2 + - perl-base==5.34.0-3ubuntu1.2 + - perl-modules-5.34==5.34.0-3ubuntu1.2 + - pinentry-curses==1.1.1-1build2 + - pkexec==0.105-33 + - policykit-1==0.105-33 + - polkitd==0.105-33 + - procps==2:3.3.17-6ubuntu2 + - python-apt-common==2.4.0ubuntu3 + - python3==3.10.6-1~22.04 + - python3-apt==2.4.0ubuntu3 + - python3-blinker==1.4+dfsg1-0.4 + - python3-cffi-backend==1.15.0-1build2 + - python3-cryptography==3.4.8-1ubuntu2.2 + - python3-dbus==1.2.18-3build1 + - python3-distro==1.7.0-1 + - python3-gi==3.42.1-0ubuntu1 + - python3-httplib2==0.20.2-2 + - python3-importlib-metadata==4.6.4-1 + - python3-jeepney==0.7.1-3 + - python3-jwt==2.3.0-1ubuntu0.2 + - python3-keyring==23.5.0-1 + - python3-launchpadlib==1.10.16-1 + - python3-lazr.restfulclient==0.14.4-1 + - python3-lazr.uri==1.0.6-2 + - python3-minimal==3.10.6-1~22.04 + - python3-more-itertools==8.10.0-2 + - python3-oauthlib==3.2.0-1ubuntu0.1 + - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1 + - python3-pyparsing==2.4.7-1 + - python3-secretstorage==3.3.1-1 + - python3-six==1.16.0-3ubuntu1 + - python3-software-properties==0.99.22.9 + - python3-wadllib==1.3.6-1 + - python3-zipp==1.0.0-3ubuntu0.1 + - python3.10==3.10.12-1~22.04.5 + - python3.10-minimal==3.10.12-1~22.04.5 + - readline-common==8.1.2-1 + - rpcsvc-proto==1.4.2-0ubuntu6 + - rsync==3.2.7-0ubuntu0.22.04.2 + - sed==4.8-1ubuntu2 + - sensible-utils==0.0.17 + - software-properties-common==0.99.22.9 + - sudo==1.9.9-1ubuntu2.4 + - systemd==249.11-0ubuntu3.12 + - systemd-sysv==249.11-0ubuntu3.12 + - sysvinit-utils==3.01-1ubuntu1 + - tar==1.34+dfsg-1ubuntu0.1.22.04.1 + - tmux==3.2a-4ubuntu0.2 + - ubuntu-keyring==2021.03.26 + - ucf==3.0043 + - usrmerge==25ubuntu2 + - util-linux==2.37.2-4ubuntu3 + - wget==1.21.2-2ubuntu1.1 + - xz-utils==5.2.5-2ubuntu1 + - zlib1g==1:1.2.11.dfsg-2ubuntu9.2 + machine: x86_64 + os: Linux + os_version: '#40~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 16 10:53:04 UTC 2' + processor: x86_64 + release: 6.2.0-39-generic diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml new file mode 100644 index 0000000..c08efc1 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml @@ -0,0 +1,6 @@ +comment: 1x H100 PCIE +experiment: vllm_llama_3_70b_instruct_awq +experiment_hash: exp_hash_v1:7aa490 +run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie +slug: 1x_h100_pcie +timestamp: 2024-08-22_13-32-37 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml new file mode 100644 index 0000000..b9dd089 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: 5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea +Describe justice system in UK vs USA in 2000-5000 words: 64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362 +Describe schooling system in UK vs USA in 2000-5000 words: eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117 +Explain me some random problem for me in 2000-5000 words: 9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51 +Tell me entire history of USA: 3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f +Write a ballad. Pick a random theme.: 886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2 +Write an epic story about a dragon and a knight: 5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352 +Write an essay about being a Senior developer.: 303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581 diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log new file mode 100644 index 0000000..da5c3d3 --- /dev/null +++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log @@ -0,0 +1,15 @@ +2024-08-22 13:32:37,442 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 PCIE +2024-08-22 13:32:37,445 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log +2024-08-22 13:32:37,750 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-22 13:32:38,358 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-22 13:32:38,366 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-22 13:32:38,368 - __main__ - INFO - Syncing files to remote +2024-08-22 13:32:38,853 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output' stdout: '' stderr: '' status_code: 0 +2024-08-22 13:32:45,617 - __main__ - INFO - Setting up remote environment +2024-08-22 13:33:18,125 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n source $HOME/.cargo/env (sh, bash, zsh)\n source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev3+g69e9ae9-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 534ms\nPrepared 108 packages in 28.49s\nInstalled 108 packages in 386ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev3+g69e9ae9 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/deterministic_ml-0.1.dev3+g69e9ae9-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0 +2024-08-22 13:33:18,146 - __main__ - INFO - Gathering system info +2024-08-22 13:33:21,064 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 13:33:21,077 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:37:31,207 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:33:26 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:33:26 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:33:28 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:33:28 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:34:33 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:34:40 gpu_executor.py:102] # GPU blocks: 6043, # CPU blocks: 819\nmodel loading took 76.33 seconds\nStarting 8 responses generation\n8 responses generation took 166.18 seconds\n{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',\n 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',\n 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',\n 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',\n 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',\n 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',\n 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-42-57_1x_h100_sxm '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-42-57_1x_h100_sxm\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-42-57_1x_h100_sxm) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 13:43:07,891 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:45:49,699 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:43:12 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:43:12 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:43:14 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:43:15 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:43:34 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:43:38 gpu_executor.py:102] # GPU blocks: 6059, # CPU blocks: 819\nmodel loading took 27.66 seconds\nStarting 8 responses generation\n8 responses generation took 127.92 seconds\n{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',\n 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',\n 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',\n 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',\n 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',\n 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',\n 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-42-57_1x_h100_sxm '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-42-57_1x_h100_sxm\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-42-57_1x_h100_sxm) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-45-58_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-45-58_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-45-58_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 13:46:49,511 - __main__ - INFO - Running experiment code on remote +2024-08-22 13:52:42,106 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:46:57 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:46:57 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:46:58 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:46:59 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:48:11 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:48:19 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819\nmodel loading took 84.08 seconds\nStarting 8 responses generation\n8 responses generation took 258.53 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-45-58_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-45-58_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-45-58_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00 ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-22 14:11:28,264 - __main__ - INFO - Running experiment code on remote +2024-08-22 14:14:47,273 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 12:11:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 12:11:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 12:11:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 12:11:36 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 12:11:49 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 12:11:56 gpu_executor.py:102] # GPU blocks: 6042, # CPU blocks: 819\nmodel loading took 23.78 seconds\nStarting 8 responses generation\n8 responses generation took 167.54 seconds\n{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',\n 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',\n 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',\n 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',\n 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',\n 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',\n 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/stdout.txt\n\rLoading safetensors checkpoint shards: 0% Completed | 0/9 [00:00