From 832e55b48ebf10f9b35a3833ec571de5c9d5631f Mon Sep 17 00:00:00 2001
From: Maciej Urbanski <maciej.rooter.urbanski@reef.pl>
Date: Fri, 23 Aug 2024 14:25:20 +0200
Subject: [PATCH] reference experiment results

---
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 563 ++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  34 +
 .../sysinfo.yaml                              | 560 ++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  49 ++
 .../sysinfo.yaml                              | 558 ++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  36 +
 .../sysinfo.yaml                              | 544 +++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 541 +++++++++++++++
 .../experiment.yaml                           |   6 +
 .../2024-08-22_12-54-54_1x_a100x/output.yaml  |   8 +
 .../run.local.log                             |  15 +
 .../2024-08-22_12-54-54_1x_a100x/stdout.txt   |  19 +
 .../2024-08-22_12-54-54_1x_a100x/sysinfo.yaml | 542 +++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  12 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 510 +++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 606 +++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 617 ++++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 606 +++++++++++++++++
 .../experiment.yaml                           |   6 +
 .../output.yaml                               |   8 +
 .../run.local.log                             |  15 +
 .../stdout.txt                                |  19 +
 .../sysinfo.yaml                              | 510 +++++++++++++++
 55 files changed, 6744 insertions(+)
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/sysinfo.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/experiment.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output.yaml
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/stdout.txt
 create mode 100644 tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/sysinfo.yaml

diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml
new file mode 100644
index 0000000..94bc969
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x A100 SXM4 80GB
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb
+slug: 1x_a100_sxm4_80gb
+timestamp: 2024-08-22_12-16-19
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml
new file mode 100644
index 0000000..88b431d
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e
+Describe justice system in UK vs USA in 2000-5000 words: 83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede
+Describe schooling system in UK vs USA in 2000-5000 words: f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1
+Explain me some random problem for me in 2000-5000 words: 143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c
+Tell me entire history of USA: 210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518
+Write a ballad. Pick a random theme.: 21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3
+Write an epic story about a dragon and a knight: 81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94
+Write an essay about being a Senior developer.: 0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log
new file mode 100644
index 0000000..331afd3
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:16:19,452 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x A100 SXM4 80GB
+2024-08-22 12:16:19,455 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/run.local.log
+2024-08-22 12:16:19,564 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:16:19,769 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:16:19,772 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:16:19,774 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:16:19,961 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:16:22,432 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:16:25,588 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 57ms\nPrepared 1 package in 2ms\nInstalled 108 packages in 473ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:16:25,608 - __main__ - INFO - Gathering system info
+2024-08-22 12:16:28,471 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:16:28,485 - __main__ - INFO - Running experiment code on remote
+2024-08-22 12:20:56,768 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:16:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:16:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:16:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:16:36 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:17:10 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 10:17:16 gpu_executor.py:102] # GPU blocks: 6068, # CPU blocks: 819\nmodel loading took 46.38 seconds\nStarting 8 responses generation\n8 responses generation took 213.59 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-16-19_1x_a100_sxm4_80gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-16-19_1x_a100_sxm4_80gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-16-19_1x_a100_sxm4_80gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:01<00:12,  1.55s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:03<00:14,  2.06s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:06<00:13,  2.31s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:09<00:12,  2.42s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:11<00:09,  2.44s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:15<00:08,  2.97s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:17<00:05,  2.72s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:21<00:03,  3.06s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:22<00:00,  2.40s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:22<00:00,  2.51s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [03:33<24:55, 213.59s/it, est. speed input: 0.15 toks/s, output: 19.18 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [03:33<00:00, 26.70s/it, est. speed input: 1.32 toks/s, output: 153.42 toks/s]\n" status_code: 0
+2024-08-22 12:20:56,801 - __main__ - INFO - Syncing output back to local
+2024-08-22 12:20:57,304 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/stdout.txt
new file mode 100644
index 0000000..295a710
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 10:16:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:16:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 10:16:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:16:36 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 10:17:10 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 10:17:16 gpu_executor.py:102] # GPU blocks: 6068, # CPU blocks: 819
+model loading took 46.38 seconds
+Starting 8 responses generation
+8 responses generation took 213.59 seconds
+{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',
+ 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',
+ 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',
+ 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',
+ 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',
+ 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/sysinfo.yaml
new file mode 100644
index 0000000..9ed900c
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/sysinfo.yaml
@@ -0,0 +1,563 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1965.398
+    - 2195.458
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 1941.383
+    - 3663.006
+    - 3079.662
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 1805.502
+    - 2595.195
+    - 1796.418
+    - 2004.992
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2504.399
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 1796.351
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 1796.201
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    - 2595.195
+    count: 64
+    model: AMD EPYC 7513 32-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81920'
+      cuda: '8.0'
+      driver: 555.58.02
+      graphics_speed: '210'
+      memory_speed: '1593'
+      name: NVIDIA A100-SXM4-80GB
+      power_limit: '500.00'
+  hard_disk:
+    free: 40000072
+    total: 95420416
+    used: 55420344
+  os: Ubuntu 22.04.4 LTS
+  ram:
+    available: 519784888
+    free: 335122660
+    total: 527941444
+    used: 192818784
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-16-19_1x_a100_sxm4_80gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.12
+  - base-files==12ubuntu4.6
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1.1
+  - binutils==2.38-4ubuntu2.6
+  - binutils-common==2.38-4ubuntu2.6
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6
+  - bsdutils==1:2.37.2-4ubuntu3.4
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1.2
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-5==12.5.39-1
+  - cuda-command-line-tools-12-5==12.5.1-1
+  - cuda-compat-12-5==555.42.06-1
+  - cuda-compiler-12-5==12.5.1-1
+  - cuda-crt-12-5==12.5.82-1
+  - cuda-cudart-12-5==12.5.82-1
+  - cuda-cudart-dev-12-5==12.5.82-1
+  - cuda-cuobjdump-12-5==12.5.39-1
+  - cuda-cupti-12-5==12.5.82-1
+  - cuda-cupti-dev-12-5==12.5.82-1
+  - cuda-cuxxfilt-12-5==12.5.82-1
+  - cuda-driver-dev-12-5==12.5.82-1
+  - cuda-gdb-12-5==12.5.82-1
+  - cuda-keyring==1.1-1
+  - cuda-libraries-12-5==12.5.1-1
+  - cuda-libraries-dev-12-5==12.5.1-1
+  - cuda-minimal-build-12-5==12.5.1-1
+  - cuda-nsight-compute-12-5==12.5.1-1
+  - cuda-nvcc-12-5==12.5.82-1
+  - cuda-nvdisasm-12-5==12.5.39-1
+  - cuda-nvml-dev-12-5==12.5.82-1
+  - cuda-nvprof-12-5==12.5.82-1
+  - cuda-nvprune-12-5==12.5.82-1
+  - cuda-nvrtc-12-5==12.5.82-1
+  - cuda-nvrtc-dev-12-5==12.5.82-1
+  - cuda-nvtx-12-5==12.5.82-1
+  - cuda-nvvm-12-5==12.5.82-1
+  - cuda-opencl-12-5==12.5.39-1
+  - cuda-opencl-dev-12-5==12.5.39-1
+  - cuda-profiler-api-12-5==12.5.39-1
+  - cuda-sanitizer-12-5==12.5.81-1
+  - cuda-toolkit-12-5-config-common==12.5.82-1
+  - cuda-toolkit-12-config-common==12.5.82-1
+  - cuda-toolkit-config-common==12.5.82-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.3
+  - dpkg-dev==1.21.1ubuntu2.3
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.12
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.6
+  - libblkid1==2.37.2-4ubuntu3.4
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.8
+  - libc-dev-bin==2.35-0ubuntu3.8
+  - libc6==2.35-0ubuntu3.8
+  - libc6-dev==2.35-0ubuntu3.8
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.6
+  - libctf0==2.38-4ubuntu2.6
+  - libcublas-12-5==12.5.3.2-1
+  - libcublas-dev-12-5==12.5.3.2-1
+  - libcudnn9-cuda-12==9.2.1.18-1
+  - libcudnn9-dev-cuda-12==9.2.1.18-1
+  - libcufft-12-5==11.2.3.61-1
+  - libcufft-dev-12-5==11.2.3.61-1
+  - libcufile-12-5==1.10.1.7-1
+  - libcufile-dev-12-5==1.10.1.7-1
+  - libcurand-12-5==10.3.6.82-1
+  - libcurand-dev-12-5==10.3.6.82-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-5==11.6.3.83-1
+  - libcusolver-dev-12-5==11.6.3.83-1
+  - libcusparse-12-5==12.5.1.3-1
+  - libcusparse-dev-12-5==12.5.1.3-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.3
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.5
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgpm2==1.20.7-10build1
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.3
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.3
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.3
+  - libkrb5support0==1.19.2-2ubuntu0.3
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3.4
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.22.3-1+cuda12.5
+  - libnccl2==2.22.3-1+cuda12.5
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-5==12.3.0.159-1
+  - libnpp-dev-12-5==12.3.0.159-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvfatbin-12-5==12.5.82-1
+  - libnvfatbin-dev-12-5==12.5.82-1
+  - libnvjitlink-12-5==12.5.82-1
+  - libnvjitlink-dev-12-5==12.5.82-1
+  - libnvjpeg-12-5==12.3.2.81-1
+  - libnvjpeg-dev-12-5==12.3.2.81-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.4
+  - libpam-modules-bin==1.4.0-11ubuntu2.4
+  - libpam-runtime==1.4.0-11ubuntu2.4
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.4
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.3
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2.1
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10==3.10.12-1~22.04.5
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3.4
+  - libsodium23==1.0.18-1build2
+  - libsqlite3-0==3.37.2-2ubuntu0.3
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.16
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.12
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3.4
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-113.123
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.2
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3.4
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2024.2.1==2024.2.1.2-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.16
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.2
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.3
+  - perl-base==5.34.0-3ubuntu1.3
+  - perl-modules-5.34==5.34.0-3ubuntu1.3
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2.1
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-distutils==3.10.8-1~22.04
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-lib2to3==3.10.8-1~22.04
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pip==22.0.2+dfsg-1ubuntu0.4
+  - python3-pip-whl==22.0.2+dfsg-1ubuntu0.4
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1
+  - python3-setuptools-whl==59.6.0-1.2ubuntu0.22.04.1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-wheel==0.37.1-2ubuntu0.22.04.1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - python3.10-venv==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.2
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3.4
+  - vim==2:8.2.3995-1ubuntu2.17
+  - vim-common==2:8.2.3995-1ubuntu2.17
+  - vim-runtime==2:8.2.3995-1ubuntu2.17
+  - wget==1.21.2-2ubuntu1.1
+  - xxd==2:8.2.3995-1ubuntu2.17
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2'
+  processor: x86_64
+  release: 6.5.0-45-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/experiment.yaml
new file mode 100644
index 0000000..ebdf42b
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 2x A100 SXM4 40GB
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb
+slug: 2x_a100_sxm4_40gb
+timestamp: 2024-08-22_12-23-42
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output.yaml
new file mode 100644
index 0000000..19c54b4
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: ceff25d8303b1a21729c9c6685541976d5236852cac0ef5626e5cdf54f76e7be4504d7e21d8c6805258794ffb0ec1a6635486797c1ac098666be3f1605650b70
+Describe justice system in UK vs USA in 2000-5000 words: 5efab605ab72f01ce1f06b22898d59584fe9395fe478029d3120c282332331dbdad870ce6a77771eddbfacc28bc09e58de88a53ae623610bf7dfda7f56d91b6e
+Describe schooling system in UK vs USA in 2000-5000 words: b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd
+Explain me some random problem for me in 2000-5000 words: c504642a6f6c90f2ad6d46b9e43ebfd8d055f76613df17609c360734069a9acd7e0c9cbc2f49b636d80c510adb84b04d962cb5d5d23a179fbf60873ed66fefa7
+Tell me entire history of USA: 24bc6463cf8635c4ed91234966fe4c1013318f86de7e1071d44c3d84ca1dda2a891998d5ad2454ef6701e9173fa66f3d4a2655d541d9b08ff057b0bfa15e56fa
+Write a ballad. Pick a random theme.: cbe4bcec1e75a20c03396126efc338bb10b49fa75e86494095b71d985f2f071ea611b3ca76781abe52ca9b14c3720f15459d00ec755e02ac1113adeb91cd73e5
+Write an epic story about a dragon and a knight: 2fc7b6d29f3c45a25c2aa80295bb038239e6641b65eec7e108e560d399378b69556dfbfeb728a2e7fd7efd0173e1b0fd62ae7240f69f5c810c2a6671f26e197e
+Write an essay about being a Senior developer.: 36cd178f3af54c90392779118a679a8905e3aeea8e4f471469886b61ab047b1c05beaef7e04d6f22eefe5dc6b0ca5ea1b1bbbe31a1961f7a3138b087734f5b09
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log
new file mode 100644
index 0000000..8cd16f6
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:23:42,366 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 2x A100 SXM4 40GB
+2024-08-22 12:23:42,369 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/run.local.log
+2024-08-22 12:23:42,728 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:23:43,548 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:23:43,557 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:23:43,560 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:23:44,162 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:23:51,973 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:24:22,170 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-23-42_2x_a100_sxm4_40gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-23-42_2x_a100_sxm4_40gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-23-42_2x_a100_sxm4_40gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 534ms\nPrepared 108 packages in 26.08s\nInstalled 108 packages in 405ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:24:22,176 - __main__ - INFO - Gathering system info
+2024-08-22 12:24:25,124 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-23-42_2x_a100_sxm4_40gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-23-42_2x_a100_sxm4_40gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-23-42_2x_a100_sxm4_40gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:24:25,130 - __main__ - INFO - Running experiment code on remote
+2024-08-22 12:30:14,815 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/stdout.txt' stdout: "gpu_count=2\nStarting model loading\nINFO 08-22 10:24:31 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:24:31 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:24:31 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:24:32 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 32 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:24:32 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:32 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5\nINFO 08-22 10:24:33 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:24:40 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x780bbf51f0d0>, local_subscribe_port=48445, remote_subscribe_port=None)\nINFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:27:21 model_runner.py:732] Loading model weights took 18.5516 GB\n\x1b[1;36m(VllmWorkerProcess pid=842)\x1b[0;0m INFO 08-22 10:27:22 model_runner.py:732] Loading model weights took 18.5516 GB\nINFO 08-22 10:27:27 distributed_gpu_executor.py:56] # GPU blocks: 5022, # CPU blocks: 1638\nmodel loading took 178.84 seconds\nStarting 8 responses generation\n8 responses generation took 160.77 seconds\n{'Count to 1000, skip unpopular numbers': 'ceff25d8303b1a21729c9c6685541976d5236852cac0ef5626e5cdf54f76e7be4504d7e21d8c6805258794ffb0ec1a6635486797c1ac098666be3f1605650b70',\n 'Describe justice system in UK vs USA in 2000-5000 words': '5efab605ab72f01ce1f06b22898d59584fe9395fe478029d3120c282332331dbdad870ce6a77771eddbfacc28bc09e58de88a53ae623610bf7dfda7f56d91b6e',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',\n 'Explain me some random problem for me in 2000-5000 words': 'c504642a6f6c90f2ad6d46b9e43ebfd8d055f76613df17609c360734069a9acd7e0c9cbc2f49b636d80c510adb84b04d962cb5d5d23a179fbf60873ed66fefa7',\n 'Tell me entire history of USA': '24bc6463cf8635c4ed91234966fe4c1013318f86de7e1071d44c3d84ca1dda2a891998d5ad2454ef6701e9173fa66f3d4a2655d541d9b08ff057b0bfa15e56fa',\n 'Write a ballad. Pick a random theme.': 'cbe4bcec1e75a20c03396126efc338bb10b49fa75e86494095b71d985f2f071ea611b3ca76781abe52ca9b14c3720f15459d00ec755e02ac1113adeb91cd73e5',\n 'Write an epic story about a dragon and a knight': '2fc7b6d29f3c45a25c2aa80295bb038239e6641b65eec7e108e560d399378b69556dfbfeb728a2e7fd7efd0173e1b0fd62ae7240f69f5c810c2a6671f26e197e',\n 'Write an essay about being a Senior developer.': '36cd178f3af54c90392779118a679a8905e3aeea8e4f471469886b61ab047b1c05beaef7e04d6f22eefe5dc6b0ca5ea1b1bbbe31a1961f7a3138b087734f5b09'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-23-42_2x_a100_sxm4_40gb '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-23-42_2x_a100_sxm4_40gb\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-23-42_2x_a100_sxm4_40gb) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:02<00:16,  2.06s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:04<00:17,  2.48s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:07<00:16,  2.74s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:10<00:14,  2.85s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:13<00:11,  2.91s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:16<00:08,  2.96s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:18<00:04,  2.36s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:21<00:02,  2.55s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:24<00:00,  2.70s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:24<00:00,  2.67s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [02:33<17:52, 153.26s/it, est. speed input: 0.25 toks/s, output: 25.46 toks/s]\rProcessed prompts:  62%|██████▎   | 5/8 [02:40<01:13, 24.42s/it, est. speed input: 1.11 toks/s, output: 122.56 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [02:40<00:00, 20.10s/it, est. speed input: 1.75 toks/s, output: 199.00 toks/s]\n[rank0]:[W822 10:30:12.633687892 CudaIPCTypes.cpp:16] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]\n/root/.local/share/uv/python/cpython-3.11.9-linux-x86_64-gnu/lib/python3.11/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown\n  warnings.warn('resource_tracker: There appear to be %d '\n" status_code: 0
+2024-08-22 12:30:14,853 - __main__ - INFO - Syncing output back to local
+2024-08-22 12:30:16,785 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/stdout.txt
new file mode 100644
index 0000000..83e8219
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/stdout.txt
@@ -0,0 +1,34 @@
+gpu_count=2
+Starting model loading
+INFO 08-22 10:24:31 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:24:31 config.py:729] Defaulting to use mp for distributed inference
+INFO 08-22 10:24:31 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+WARNING 08-22 10:24:32 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 32 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
+INFO 08-22 10:24:32 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:32 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2
+INFO 08-22 10:24:33 utils.py:841] Found nccl from library libnccl.so.2
+INFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:33 pynccl.py:63] vLLM is using nccl==2.20.5
+INFO 08-22 10:24:33 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+INFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:40 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+INFO 08-22 10:24:40 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x780bbf51f0d0>, local_subscribe_port=48445, remote_subscribe_port=None)
+INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:40 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:24:40 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 10:27:21 model_runner.py:732] Loading model weights took 18.5516 GB
+[1;36m(VllmWorkerProcess pid=842)[0;0m INFO 08-22 10:27:22 model_runner.py:732] Loading model weights took 18.5516 GB
+INFO 08-22 10:27:27 distributed_gpu_executor.py:56] # GPU blocks: 5022, # CPU blocks: 1638
+model loading took 178.84 seconds
+Starting 8 responses generation
+8 responses generation took 160.77 seconds
+{'Count to 1000, skip unpopular numbers': 'ceff25d8303b1a21729c9c6685541976d5236852cac0ef5626e5cdf54f76e7be4504d7e21d8c6805258794ffb0ec1a6635486797c1ac098666be3f1605650b70',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '5efab605ab72f01ce1f06b22898d59584fe9395fe478029d3120c282332331dbdad870ce6a77771eddbfacc28bc09e58de88a53ae623610bf7dfda7f56d91b6e',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',
+ 'Explain me some random problem for me in 2000-5000 words': 'c504642a6f6c90f2ad6d46b9e43ebfd8d055f76613df17609c360734069a9acd7e0c9cbc2f49b636d80c510adb84b04d962cb5d5d23a179fbf60873ed66fefa7',
+ 'Tell me entire history of USA': '24bc6463cf8635c4ed91234966fe4c1013318f86de7e1071d44c3d84ca1dda2a891998d5ad2454ef6701e9173fa66f3d4a2655d541d9b08ff057b0bfa15e56fa',
+ 'Write a ballad. Pick a random theme.': 'cbe4bcec1e75a20c03396126efc338bb10b49fa75e86494095b71d985f2f071ea611b3ca76781abe52ca9b14c3720f15459d00ec755e02ac1113adeb91cd73e5',
+ 'Write an epic story about a dragon and a knight': '2fc7b6d29f3c45a25c2aa80295bb038239e6641b65eec7e108e560d399378b69556dfbfeb728a2e7fd7efd0173e1b0fd62ae7240f69f5c810c2a6671f26e197e',
+ 'Write an essay about being a Senior developer.': '36cd178f3af54c90392779118a679a8905e3aeea8e4f471469886b61ab047b1c05beaef7e04d6f22eefe5dc6b0ca5ea1b1bbbe31a1961f7a3138b087734f5b09'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml
new file mode 100644
index 0000000..07f1b3d
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/sysinfo.yaml
@@ -0,0 +1,560 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1500.0
+    - 1500.0
+    - 2122.33
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3719.05
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1487.609
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1486.393
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2116.689
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3100.0
+    - 1500.0
+    - 1500.0
+    - 1497.615
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3100.0
+    - 1500.0
+    - 1500.0
+    - 3100.0
+    - 1500.0
+    - 1500.0
+    - 3100.0
+    - 3709.581
+    - 2121.854
+    - 2200.0
+    - 1500.0
+    - 3100.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3100.0
+    - 2170.196
+    - 1500.0
+    - 1486.641
+    count: 64
+    model: AMD EPYC 9384X 32-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 2
+    details:
+    - capacity: '40960'
+      cuda: '8.0'
+      driver: 560.28.03
+      graphics_speed: '210'
+      memory_speed: '1215'
+      name: NVIDIA A100-SXM4-40GB
+      power_limit: '400.00'
+    - capacity: '40960'
+      cuda: '8.0'
+      driver: 560.28.03
+      graphics_speed: '210'
+      memory_speed: '1215'
+      name: NVIDIA A100-SXM4-40GB
+      power_limit: '400.00'
+  hard_disk:
+    free: 77360568
+    total: 83886080
+    used: 6525512
+  os: Ubuntu 22.04.4 LTS
+  ram:
+    available: 387095904
+    free: 129260144
+    total: 395784328
+    used: 266524184
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-23-42_2x_a100_sxm4_40gb/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.12
+  - base-files==12ubuntu4.6
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1.1
+  - binutils==2.38-4ubuntu2.6
+  - binutils-common==2.38-4ubuntu2.6
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6
+  - bsdutils==1:2.37.2-4ubuntu3.4
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1.2
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-5==12.5.39-1
+  - cuda-command-line-tools-12-5==12.5.1-1
+  - cuda-compat-12-5==555.42.06-1
+  - cuda-compiler-12-5==12.5.1-1
+  - cuda-crt-12-5==12.5.82-1
+  - cuda-cudart-12-5==12.5.82-1
+  - cuda-cudart-dev-12-5==12.5.82-1
+  - cuda-cuobjdump-12-5==12.5.39-1
+  - cuda-cupti-12-5==12.5.82-1
+  - cuda-cupti-dev-12-5==12.5.82-1
+  - cuda-cuxxfilt-12-5==12.5.82-1
+  - cuda-driver-dev-12-5==12.5.82-1
+  - cuda-gdb-12-5==12.5.82-1
+  - cuda-keyring==1.1-1
+  - cuda-libraries-12-5==12.5.1-1
+  - cuda-libraries-dev-12-5==12.5.1-1
+  - cuda-minimal-build-12-5==12.5.1-1
+  - cuda-nsight-compute-12-5==12.5.1-1
+  - cuda-nvcc-12-5==12.5.82-1
+  - cuda-nvdisasm-12-5==12.5.39-1
+  - cuda-nvml-dev-12-5==12.5.82-1
+  - cuda-nvprof-12-5==12.5.82-1
+  - cuda-nvprune-12-5==12.5.82-1
+  - cuda-nvrtc-12-5==12.5.82-1
+  - cuda-nvrtc-dev-12-5==12.5.82-1
+  - cuda-nvtx-12-5==12.5.82-1
+  - cuda-nvvm-12-5==12.5.82-1
+  - cuda-opencl-12-5==12.5.39-1
+  - cuda-opencl-dev-12-5==12.5.39-1
+  - cuda-profiler-api-12-5==12.5.39-1
+  - cuda-sanitizer-12-5==12.5.81-1
+  - cuda-toolkit-12-5-config-common==12.5.82-1
+  - cuda-toolkit-12-config-common==12.5.82-1
+  - cuda-toolkit-config-common==12.5.82-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.3
+  - dpkg-dev==1.21.1ubuntu2.3
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.12
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.6
+  - libblkid1==2.37.2-4ubuntu3.4
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.8
+  - libc-dev-bin==2.35-0ubuntu3.8
+  - libc6==2.35-0ubuntu3.8
+  - libc6-dev==2.35-0ubuntu3.8
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.6
+  - libctf0==2.38-4ubuntu2.6
+  - libcublas-12-5==12.5.3.2-1
+  - libcublas-dev-12-5==12.5.3.2-1
+  - libcudnn9-cuda-12==9.2.1.18-1
+  - libcudnn9-dev-cuda-12==9.2.1.18-1
+  - libcufft-12-5==11.2.3.61-1
+  - libcufft-dev-12-5==11.2.3.61-1
+  - libcufile-12-5==1.10.1.7-1
+  - libcufile-dev-12-5==1.10.1.7-1
+  - libcurand-12-5==10.3.6.82-1
+  - libcurand-dev-12-5==10.3.6.82-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-5==11.6.3.83-1
+  - libcusolver-dev-12-5==11.6.3.83-1
+  - libcusparse-12-5==12.5.1.3-1
+  - libcusparse-dev-12-5==12.5.1.3-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.3
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.5
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.3
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.3
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.3
+  - libkrb5support0==1.19.2-2ubuntu0.3
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3.4
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.22.3-1+cuda12.5
+  - libnccl2==2.22.3-1+cuda12.5
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-5==12.3.0.159-1
+  - libnpp-dev-12-5==12.3.0.159-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvfatbin-12-5==12.5.82-1
+  - libnvfatbin-dev-12-5==12.5.82-1
+  - libnvjitlink-12-5==12.5.82-1
+  - libnvjitlink-dev-12-5==12.5.82-1
+  - libnvjpeg-12-5==12.3.2.81-1
+  - libnvjpeg-dev-12-5==12.3.2.81-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.4
+  - libpam-modules-bin==1.4.0-11ubuntu2.4
+  - libpam-runtime==1.4.0-11ubuntu2.4
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.4
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.3
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2.1
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3.4
+  - libsqlite3-0==3.37.2-2ubuntu0.3
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.16
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.12
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3.4
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-113.123
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.2
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3.4
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2024.2.1==2024.2.1.2-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.16
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.2
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.3
+  - perl-base==5.34.0-3ubuntu1.3
+  - perl-modules-5.34==5.34.0-3ubuntu1.3
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2.1
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-distutils==3.10.8-1~22.04
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-lib2to3==3.10.8-1~22.04
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pip==22.0.2+dfsg-1ubuntu0.4
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-wheel==0.37.1-2ubuntu0.22.04.1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.2
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3.4
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#40~22.04.3-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 30 17:30:19 UTC 2'
+  processor: x86_64
+  release: 6.8.0-40-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml
new file mode 100644
index 0000000..78aa6aa
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 4x RTX 4000Ada
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada
+slug: 4x_rtx_4000ada
+timestamp: 2024-08-22_12-26-03
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml
new file mode 100644
index 0000000..443d83a
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9
+Describe justice system in UK vs USA in 2000-5000 words: f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8
+Describe schooling system in UK vs USA in 2000-5000 words: 291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2
+Explain me some random problem for me in 2000-5000 words: 6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833
+Tell me entire history of USA: 1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687
+Write a ballad. Pick a random theme.: 53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f
+Write an epic story about a dragon and a knight: e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6
+Write an essay about being a Senior developer.: 33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log
new file mode 100644
index 0000000..1b3a57a
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:26:03,711 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 4x RTX 4000Ada
+2024-08-22 12:26:03,714 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/run.local.log
+2024-08-22 12:26:03,882 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:26:04,053 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:26:04,057 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:26:04,062 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:26:04,290 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:26:06,997 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:26:45,244 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 885ms\nPrepared 108 packages in 31.88s\nInstalled 108 packages in 489ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:26:45,264 - __main__ - INFO - Gathering system info
+2024-08-22 12:26:49,096 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:26:49,110 - __main__ - INFO - Running experiment code on remote
+2024-08-22 12:35:57,340 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/stdout.txt' stdout: "gpu_count=4\nStarting model loading\nINFO 08-22 10:26:56 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:26:56 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:26:56 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:26:58 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:26:58 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nWARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nINFO 08-22 10:27:00 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1, 2, 3], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x7e9dd0cc7590>, local_subscribe_port=40373, remote_subscribe_port=None)\nINFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:27:01 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\n\x1b[1;36m(VllmWorkerProcess pid=789)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\n\x1b[1;36m(VllmWorkerProcess pid=787)\x1b[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB\nINFO 08-22 10:30:36 model_runner.py:732] Loading model weights took 9.2867 GB\nINFO 08-22 10:30:44 distributed_gpu_executor.py:56] # GPU blocks: 3207, # CPU blocks: 3276\nmodel loading took 233.71 seconds\nStarting 8 responses generation\n8 responses generation took 298.52 seconds\n{'Count to 1000, skip unpopular numbers': 'e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2',\n 'Explain me some random problem for me in 2000-5000 words': '6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833',\n 'Tell me entire history of USA': '1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687',\n 'Write a ballad. Pick a random theme.': '53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f',\n 'Write an epic story about a dragon and a knight': 'e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6',\n 'Write an essay about being a Senior developer.': '33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0'}\nERROR 08-22 10:35:51 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 788 died, exit code: -15\nINFO 08-22 10:35:51 multiproc_worker_utils.py:123] Killing local vLLM worker processes\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-03_4x_rtx_4000ada '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-03_4x_rtx_4000ada\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-03_4x_rtx_4000ada) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/output/stdout.txt\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/5d/51/5d5111f0b358d39407f5182b8ea3ee71a6b1ed7942bd42d40a40c060adb2c2fb/dd7daa0a6f7e9a11ac7c28bd6dbbd6974b99efbe329afb15cc506fb705e6e407?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00002-of-00009.safetensors%3B+filename%3D%22model-00002-of-00009.safetensors%22%3B&Expires=1724581621&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDU4MTYyMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzVkLzUxLzVkNTExMWYwYjM1OGQzOTQwN2Y1MTgyYjhlYTNlZTcxYTZiMWVkNzk0MmJkNDJkNDBhNDBjMDYwYWRiMmMyZmIvZGQ3ZGFhMGE2ZjdlOWExMWFjN2MyOGJkNmRiYmQ2OTc0Yjk5ZWZiZTMyOWFmYjE1Y2M1MDZmYjcwNWU2ZTQwNz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=nlfFzTyogK4EsH6wbheWxniQwOIRKC6WNSOaFQQSQyXV0lvX2CpbwIv6JZwmqC8grxmPJHeKpXFUxg-5nW3sghqJGu756q2niYpV%7EC8HgwkUWFhI0uSV%7EdAYY4kQ%7E7c4b7dmY8hxqx3RsWZqyXNfr1R0l39Q39G0sTn6yiqLDq7f%7Ezpexl2xH1pzJ3EJFDRd1QBiLJVMd7Rh4yzHiBtmsNYuBgV5VCX%7Ezl-bc33zNonQfV%7E7k1VhTJ2sTsLGVIbW0CaboZZyqo8opAL1MMfYvI9hTMM10FQI-gMWRN9xcmWe7EWK%7EB2M-x6KkKK-5IimWoKd5fsYvF8jZmie6epALA__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Trying to resume download...\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/5d/51/5d5111f0b358d39407f5182b8ea3ee71a6b1ed7942bd42d40a40c060adb2c2fb/bf8de6ef3f4e527721c9f03c5bec9dd6219b58623d11feef6c213a6fee79d759?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00003-of-00009.safetensors%3B+filename%3D%22model-00003-of-00009.safetensors%22%3B&Expires=1724581621&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNDU4MTYyMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzVkLzUxLzVkNTExMWYwYjM1OGQzOTQwN2Y1MTgyYjhlYTNlZTcxYTZiMWVkNzk0MmJkNDJkNDBhNDBjMDYwYWRiMmMyZmIvYmY4ZGU2ZWYzZjRlNTI3NzIxYzlmMDNjNWJlYzlkZDYyMTliNTg2MjNkMTFmZWVmNmMyMTNhNmZlZTc5ZDc1OT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=McjODK0UDiZHwiqwC4q6oncp3zJEfm4K9rLznT7rZW2IYk2Y1Rv7vACZbm9xpH5kOK%7Er1Qh3bTVmMDB29rKPSmswjBNQYVW-IDpCSS3hWNvzHBE7HvdQVB9%7Ej5vm%7EkWKgYJUUDbeSePmJ-vt%7EfHxBmHpm5UfsfZJZtNNl62s1ss1XL5kNPmwIaeZHWpmuVK7rXaeJQZMlGYSvnnFMs0eDviVbr0-6pGjHZgiC4HoiF7290GO-TgPISRUsVPK-iVhjOGFafUDzZAeJAqdS%7ENJiX47VR5012YvTGdTX0mWcZUSMEt-vaNsImbSro642d%7EWVxTCsyX%7E2wRLWlUtPOOkQA__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n\x1b[1;36m(VllmWorkerProcess pid=788)\x1b[0;0m Trying to resume download...\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:02<00:18,  2.25s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:05<00:19,  2.82s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:08<00:18,  3.02s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:11<00:15,  3.11s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:15<00:12,  3.23s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:18<00:09,  3.22s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:20<00:05,  2.70s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:23<00:02,  2.89s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:26<00:00,  3.01s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:26<00:00,  2.98s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [03:33<24:53, 213.34s/it, est. speed input: 0.18 toks/s, output: 13.73 toks/s]\rProcessed prompts:  38%|███▊      | 3/8 [04:58<07:14, 86.86s/it, est. speed input: 0.35 toks/s, output: 33.35 toks/s] \rProcessed prompts: 100%|██████████| 8/8 [04:58<00:00, 37.31s/it, est. speed input: 0.94 toks/s, output: 101.95 toks/s]\n/root/.local/share/uv/python/cpython-3.11.9-linux-x86_64-gnu/lib/python3.11/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown\n  warnings.warn('resource_tracker: There appear to be %d '\n" status_code: 0
+2024-08-22 12:35:57,409 - __main__ - INFO - Syncing output back to local
+2024-08-22 12:35:57,834 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/stdout.txt
new file mode 100644
index 0000000..a261b37
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/stdout.txt
@@ -0,0 +1,49 @@
+gpu_count=4
+Starting model loading
+INFO 08-22 10:26:56 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:26:56 config.py:729] Defaulting to use mp for distributed inference
+INFO 08-22 10:26:56 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+WARNING 08-22 10:26:58 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
+INFO 08-22 10:26:58 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:26:58 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2
+INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:27:00 utils.py:841] Found nccl from library libnccl.so.2
+INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:27:00 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=787)[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+[1;36m(VllmWorkerProcess pid=788)[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+[1;36m(VllmWorkerProcess pid=789)[0;0m WARNING 08-22 10:27:00 custom_all_reduce.py:118] Custom allreduce is disabled because it's not supported on more than two PCIe-only GPUs. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+INFO 08-22 10:27:00 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1, 2, 3], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x7e9dd0cc7590>, local_subscribe_port=40373, remote_subscribe_port=None)
+INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:27:00 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:27:00 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 10:27:01 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=788)[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB
+[1;36m(VllmWorkerProcess pid=789)[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB
+[1;36m(VllmWorkerProcess pid=787)[0;0m INFO 08-22 10:30:35 model_runner.py:732] Loading model weights took 9.2867 GB
+INFO 08-22 10:30:36 model_runner.py:732] Loading model weights took 9.2867 GB
+INFO 08-22 10:30:44 distributed_gpu_executor.py:56] # GPU blocks: 3207, # CPU blocks: 3276
+model loading took 233.71 seconds
+Starting 8 responses generation
+8 responses generation took 298.52 seconds
+{'Count to 1000, skip unpopular numbers': 'e01a486cc144586ae8b3b56ac3ea584290fbe07834a67a8dbc9ef98c66015d87d9abd0bcee16e90850ca183cdc948abcf208fc1d38a3ee2f8e4851cac05c10d9',
+ 'Describe justice system in UK vs USA in 2000-5000 words': 'f12666eaf529cb993f9b5a24a9f3f9a336e0492c6fb45030acc46117776656ff5fff12fe03ba63ba431ffd32dfe68bc75a146059756f3925f0fbd1b39e01f1f8',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': '291023c3134a2fc4dc6f00507a428d9c7a6e166e1a0a7f73d74b0b4b60e460d6a50d143ea21b9cb9c26c10dd96002f208b0f8750dfc1b07cb5c742ff3c398fd2',
+ 'Explain me some random problem for me in 2000-5000 words': '6c10b7cfd03339881798d66c02e1be1f99e5536746d82517435c3ab26bb5f6b377540fb2d374af62bacc1557de85f0d70b7f753ec074bde161d150c94382a833',
+ 'Tell me entire history of USA': '1d193ab043b6dd23922e8258d6e134f390cebae90131340d47bf46510a2f34a4f93a5112b1e9160fe51219d2169576cda7948d605b4cb0d603d24388ee862687',
+ 'Write a ballad. Pick a random theme.': '53aa9308f203c0f71abf485420b4a87411b63ea75535d2c708226963ddf29b926db30b7f21c690af5bb914ab6b4f659685d1bda1d14899813dffd2de5fcdef7f',
+ 'Write an epic story about a dragon and a knight': 'e36cfba48cfa0862ad305c3f54543b0d7e9c44f89bdb6fb7d74168a8f1d3a5140b20644c62eda22795099daf3d5db93b8bd39fbb6394c5d6d5c41761cc253ce6',
+ 'Write an essay about being a Senior developer.': '33deb94b55d7c18d7b3a2b564c0413a25a2eeacd152519a5884a3fa7c8f078a01edea1d7536f05e582647dfdbb218630579071030e5ea016e8a957ac5e4057d0'}
+ERROR 08-22 10:35:51 multiproc_worker_utils.py:120] Worker VllmWorkerProcess pid 788 died, exit code: -15
+INFO 08-22 10:35:51 multiproc_worker_utils.py:123] Killing local vLLM worker processes
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml
new file mode 100644
index 0000000..d6af4cb
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/sysinfo.yaml
@@ -0,0 +1,558 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1500.0
+    - 1500.0
+    - 2800.0
+    - 2800.0
+    - 2400.0
+    - 2800.0
+    - 2800.0
+    - 1500.0
+    - 3349.951
+    - 1500.0
+    - 1799.199
+    - 1500.0
+    - 2800.0
+    - 2800.0
+    - 1500.0
+    - 2193.01
+    - 2589.516
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2800.0
+    - 2800.0
+    - 1500.0
+    - 3333.545
+    - 2311.349
+    - 1496.058
+    - 2400.0
+    - 2800.0
+    - 1500.0
+    - 2784.201
+    - 1500.0
+    - 2800.0
+    - 1500.0
+    - 2800.0
+    - 2400.0
+    - 2800.0
+    - 1500.0
+    - 1788.284
+    - 1799.926
+    - 1500.0
+    - 1500.0
+    - 2800.0
+    - 1500.0
+    - 2800.0
+    - 2028.381
+    - 3165.351
+    - 2800.0
+    - 2800.0
+    count: 48
+    model: AMD EPYC 7402P 24-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 4
+    details:
+    - capacity: '20475'
+      cuda: '8.9'
+      driver: 555.58.02
+      graphics_speed: '210'
+      memory_speed: '405'
+      name: NVIDIA RTX 4000 Ada Generation
+      power_limit: '130.00'
+    - capacity: '20475'
+      cuda: '8.9'
+      driver: 555.58.02
+      graphics_speed: '210'
+      memory_speed: '405'
+      name: NVIDIA RTX 4000 Ada Generation
+      power_limit: '130.00'
+    - capacity: '20475'
+      cuda: '8.9'
+      driver: 555.58.02
+      graphics_speed: '210'
+      memory_speed: '405'
+      name: NVIDIA RTX 4000 Ada Generation
+      power_limit: '130.00'
+    - capacity: '20475'
+      cuda: '8.9'
+      driver: 555.58.02
+      graphics_speed: '210'
+      memory_speed: '405'
+      name: NVIDIA RTX 4000 Ada Generation
+      power_limit: '130.00'
+  hard_disk:
+    free: 77359212
+    total: 83886080
+    used: 6526868
+  os: Ubuntu 22.04.4 LTS
+  ram:
+    available: 249409076
+    free: 121386568
+    total: 263771272
+    used: 142384704
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-03_4x_rtx_4000ada/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.12
+  - base-files==12ubuntu4.6
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1.1
+  - binutils==2.38-4ubuntu2.6
+  - binutils-common==2.38-4ubuntu2.6
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6
+  - bsdutils==1:2.37.2-4ubuntu3.4
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1.2
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-5==12.5.39-1
+  - cuda-command-line-tools-12-5==12.5.1-1
+  - cuda-compat-12-5==555.42.06-1
+  - cuda-compiler-12-5==12.5.1-1
+  - cuda-crt-12-5==12.5.82-1
+  - cuda-cudart-12-5==12.5.82-1
+  - cuda-cudart-dev-12-5==12.5.82-1
+  - cuda-cuobjdump-12-5==12.5.39-1
+  - cuda-cupti-12-5==12.5.82-1
+  - cuda-cupti-dev-12-5==12.5.82-1
+  - cuda-cuxxfilt-12-5==12.5.82-1
+  - cuda-driver-dev-12-5==12.5.82-1
+  - cuda-gdb-12-5==12.5.82-1
+  - cuda-keyring==1.1-1
+  - cuda-libraries-12-5==12.5.1-1
+  - cuda-libraries-dev-12-5==12.5.1-1
+  - cuda-minimal-build-12-5==12.5.1-1
+  - cuda-nsight-compute-12-5==12.5.1-1
+  - cuda-nvcc-12-5==12.5.82-1
+  - cuda-nvdisasm-12-5==12.5.39-1
+  - cuda-nvml-dev-12-5==12.5.82-1
+  - cuda-nvprof-12-5==12.5.82-1
+  - cuda-nvprune-12-5==12.5.82-1
+  - cuda-nvrtc-12-5==12.5.82-1
+  - cuda-nvrtc-dev-12-5==12.5.82-1
+  - cuda-nvtx-12-5==12.5.82-1
+  - cuda-nvvm-12-5==12.5.82-1
+  - cuda-opencl-12-5==12.5.39-1
+  - cuda-opencl-dev-12-5==12.5.39-1
+  - cuda-profiler-api-12-5==12.5.39-1
+  - cuda-sanitizer-12-5==12.5.81-1
+  - cuda-toolkit-12-5-config-common==12.5.82-1
+  - cuda-toolkit-12-config-common==12.5.82-1
+  - cuda-toolkit-config-common==12.5.82-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.3
+  - dpkg-dev==1.21.1ubuntu2.3
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.12
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.6
+  - libblkid1==2.37.2-4ubuntu3.4
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.8
+  - libc-dev-bin==2.35-0ubuntu3.8
+  - libc6==2.35-0ubuntu3.8
+  - libc6-dev==2.35-0ubuntu3.8
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.6
+  - libctf0==2.38-4ubuntu2.6
+  - libcublas-12-5==12.5.3.2-1
+  - libcublas-dev-12-5==12.5.3.2-1
+  - libcudnn9-cuda-12==9.2.1.18-1
+  - libcudnn9-dev-cuda-12==9.2.1.18-1
+  - libcufft-12-5==11.2.3.61-1
+  - libcufft-dev-12-5==11.2.3.61-1
+  - libcufile-12-5==1.10.1.7-1
+  - libcufile-dev-12-5==1.10.1.7-1
+  - libcurand-12-5==10.3.6.82-1
+  - libcurand-dev-12-5==10.3.6.82-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-5==11.6.3.83-1
+  - libcusolver-dev-12-5==11.6.3.83-1
+  - libcusparse-12-5==12.5.1.3-1
+  - libcusparse-dev-12-5==12.5.1.3-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.3
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.5
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.3
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.3
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.3
+  - libkrb5support0==1.19.2-2ubuntu0.3
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3.4
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.22.3-1+cuda12.5
+  - libnccl2==2.22.3-1+cuda12.5
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-5==12.3.0.159-1
+  - libnpp-dev-12-5==12.3.0.159-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvfatbin-12-5==12.5.82-1
+  - libnvfatbin-dev-12-5==12.5.82-1
+  - libnvjitlink-12-5==12.5.82-1
+  - libnvjitlink-dev-12-5==12.5.82-1
+  - libnvjpeg-12-5==12.3.2.81-1
+  - libnvjpeg-dev-12-5==12.3.2.81-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.4
+  - libpam-modules-bin==1.4.0-11ubuntu2.4
+  - libpam-runtime==1.4.0-11ubuntu2.4
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.4
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.3
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2.1
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3.4
+  - libsqlite3-0==3.37.2-2ubuntu0.3
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.16
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.12
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3.4
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-113.123
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.2
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3.4
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2024.2.1==2024.2.1.2-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.16
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.2
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.3
+  - perl-base==5.34.0-3ubuntu1.3
+  - perl-modules-5.34==5.34.0-3ubuntu1.3
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2.1
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-distutils==3.10.8-1~22.04
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-lib2to3==3.10.8-1~22.04
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pip==22.0.2+dfsg-1ubuntu0.4
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-wheel==0.37.1-2ubuntu0.22.04.1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.2
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3.4
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2'
+  processor: x86_64
+  release: 6.5.0-45-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml
new file mode 100644
index 0000000..8766afb
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 2x RTX A6000
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000
+slug: 2x_rtx_a6000
+timestamp: 2024-08-22_12-26-40
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml
new file mode 100644
index 0000000..38f582a
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb
+Describe justice system in UK vs USA in 2000-5000 words: aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8
+Describe schooling system in UK vs USA in 2000-5000 words: b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd
+Explain me some random problem for me in 2000-5000 words: 011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad
+Tell me entire history of USA: 9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae
+Write a ballad. Pick a random theme.: f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc
+Write an epic story about a dragon and a knight: 72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4
+Write an essay about being a Senior developer.: 4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log
new file mode 100644
index 0000000..f15fc24
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:26:40,850 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 2x RTX A6000
+2024-08-22 12:26:40,853 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/run.local.log
+2024-08-22 12:26:41,096 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:26:41,588 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:26:41,596 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:26:41,598 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:26:41,991 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:26:47,230 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:27:21,898 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 928ms\nPrepared 108 packages in 29.93s\nInstalled 108 packages in 394ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:27:21,927 - __main__ - INFO - Gathering system info
+2024-08-22 12:27:25,735 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:27:25,743 - __main__ - INFO - Running experiment code on remote
+2024-08-22 12:36:14,863 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/stdout.txt' stdout: "gpu_count=2\nStarting model loading\nINFO 08-22 10:27:33 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:27:33 config.py:729] Defaulting to use mp for distributed inference\nINFO 08-22 10:27:33 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nWARNING 08-22 10:27:33 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.\nINFO 08-22 10:27:33 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 multiproc_worker_utils.py:215] Worker ready; awaiting tasks\nINFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2\nINFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5\nINFO 08-22 10:27:35 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nINFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\nWARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m WARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.\nINFO 08-22 10:27:45 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x7886d2e7d590>, local_subscribe_port=45163, remote_subscribe_port=None)\nINFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']\n\x1b[1;36m(VllmWorkerProcess pid=761)\x1b[0;0m INFO 08-22 10:29:21 model_runner.py:732] Loading model weights took 18.5518 GB\nINFO 08-22 10:29:23 model_runner.py:732] Loading model weights took 18.5518 GB\nINFO 08-22 10:29:35 distributed_gpu_executor.py:56] # GPU blocks: 6941, # CPU blocks: 1638\nmodel loading took 126.15 seconds\nStarting 8 responses generation\n8 responses generation took 392.59 seconds\n{'Count to 1000, skip unpopular numbers': '90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',\n 'Explain me some random problem for me in 2000-5000 words': '011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad',\n 'Tell me entire history of USA': '9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae',\n 'Write a ballad. Pick a random theme.': 'f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc',\n 'Write an epic story about a dragon and a knight': '72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4',\n 'Write an essay about being a Senior developer.': '4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-26-40_2x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-26-40_2x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-26-40_2x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:02<00:21,  2.66s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:06<00:23,  3.29s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:10<00:21,  3.51s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:13<00:18,  3.61s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:17<00:14,  3.65s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:21<00:11,  3.69s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:25<00:07,  3.72s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:28<00:03,  3.62s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:29<00:00,  2.89s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:29<00:00,  3.32s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [06:15<43:46, 375.22s/it, est. speed input: 0.10 toks/s, output: 10.40 toks/s]\rProcessed prompts:  38%|███▊      | 3/8 [06:32<08:38, 103.71s/it, est. speed input: 0.27 toks/s, output: 30.31 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [06:32<00:00, 49.07s/it, est. speed input: 0.72 toks/s, output: 82.48 toks/s] \n/root/.local/share/uv/python/cpython-3.11.9-linux-x86_64-gnu/lib/python3.11/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown\n  warnings.warn('resource_tracker: There appear to be %d '\n" status_code: 0
+2024-08-22 12:36:14,913 - __main__ - INFO - Syncing output back to local
+2024-08-22 12:36:16,088 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/stdout.txt
new file mode 100644
index 0000000..0ff9e40
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/stdout.txt
@@ -0,0 +1,36 @@
+gpu_count=2
+Starting model loading
+INFO 08-22 10:27:33 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:27:33 config.py:729] Defaulting to use mp for distributed inference
+INFO 08-22 10:27:33 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+WARNING 08-22 10:27:33 multiproc_gpu_executor.py:59] Reducing Torch parallelism from 24 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
+INFO 08-22 10:27:33 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:34 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
+INFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:34 utils.py:841] Found nccl from library libnccl.so.2
+INFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:34 pynccl.py:63] vLLM is using nccl==2.20.5
+INFO 08-22 10:27:35 custom_all_reduce_utils.py:203] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+INFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+WARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:45 custom_all_reduce_utils.py:234] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+[1;36m(VllmWorkerProcess pid=761)[0;0m WARNING 08-22 10:27:45 custom_all_reduce.py:127] Custom allreduce is disabled because your platform lacks GPU P2P capability or P2P test failed. To silence this warning, specify disable_custom_all_reduce=True explicitly.
+INFO 08-22 10:27:45 shm_broadcast.py:235] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x7886d2e7d590>, local_subscribe_port=45163, remote_subscribe_port=None)
+INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:45 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:27:45 weight_utils.py:225] Using model weights format ['*.safetensors']
+[1;36m(VllmWorkerProcess pid=761)[0;0m INFO 08-22 10:29:21 model_runner.py:732] Loading model weights took 18.5518 GB
+INFO 08-22 10:29:23 model_runner.py:732] Loading model weights took 18.5518 GB
+INFO 08-22 10:29:35 distributed_gpu_executor.py:56] # GPU blocks: 6941, # CPU blocks: 1638
+model loading took 126.15 seconds
+Starting 8 responses generation
+8 responses generation took 392.59 seconds
+{'Count to 1000, skip unpopular numbers': '90e9f7cd91517e389cadb86c442cef8db2957198de38d8f8754ba022477e395d3a35b5f4704510ed3ee93fc368b11850f3e4e3751bb52b4fcb0258bc954ceeeb',
+ 'Describe justice system in UK vs USA in 2000-5000 words': 'aaeca816cbedbdedf9193ee7b57795823ba4564e64f0283da95738e2f27c0f02d92977ef21701be345b01d667305b77987fec9439c2600216cc4e1be9e856db8',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'b6eb47984ecfd87923e1a6e929381a3398208165e87bb0a1d1acc69677aea74d4156a3502f5aabc7340bc96d879bd10f7f53f1b728ed9833ca99d601f0886afd',
+ 'Explain me some random problem for me in 2000-5000 words': '011261f17df06f112cc781eda2fb1f1ffcef58a9247c8e1d83fd8e61a16e18b1953018d9b0f9b6224b38a69d5f3f7ecc0cf305cf0e66c9bd26ec5224ed404fad',
+ 'Tell me entire history of USA': '9a83ac06c7986da8448587ab09727bd297a96c59055fac300a2541b23cc2b88b4cdf035babf1a85a6b9aed6c18ba5659947de2774308ee50c911c515359f8cae',
+ 'Write a ballad. Pick a random theme.': 'f1a4f47af63fbb1c6333a6afbf187a89505731d60a3dd97d352e1e5261aaeb9bb79e4d3a6cdb2c251b4d3866eea9654bbd35248b2824fefb8be0e97d90b68ffc',
+ 'Write an epic story about a dragon and a knight': '72f9f5a5419718e907814f68e849907d2c941aa21498cbcb95b0e94a33f8989a622a072550fc04165505b1ac96278ae2b82928adb59059e136b4f2dea945faa4',
+ 'Write an essay about being a Senior developer.': '4277b57bea5b502a7810fc42e1d3055d34d47b9d4d77e03312ed42cb0fbaf045a203087ab110088ba9c6c88ae9a7233cf39d9b7fc4040bc05708101f3e7d2eec'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml
new file mode 100644
index 0000000..fb349ee
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/sysinfo.yaml
@@ -0,0 +1,544 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 2700.0
+    - 1200.011
+    - 1200.0
+    - 1300.0
+    - 1200.0
+    - 1200.0
+    - 2500.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 2698.678
+    - 2299.646
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 2700.627
+    - 2401.573
+    - 2057.748
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1499.94
+    - 2400.157
+    - 1500.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1922.86
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    - 1200.0
+    count: 48
+    model: Intel(R) Xeon(R) Gold 6226 CPU @ 2.70GHz
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 2
+    details:
+    - capacity: '46068'
+      cuda: '8.6'
+      driver: 555.58.02
+      graphics_speed: '0'
+      memory_speed: '405'
+      name: NVIDIA RTX A6000
+      power_limit: '300.00'
+    - capacity: '46068'
+      cuda: '8.6'
+      driver: 555.58.02
+      graphics_speed: '0'
+      memory_speed: '405'
+      name: NVIDIA RTX A6000
+      power_limit: '300.00'
+  hard_disk:
+    free: 77359368
+    total: 83886080
+    used: 6526712
+  os: Ubuntu 22.04.4 LTS
+  ram:
+    available: 189181908
+    free: 125037648
+    total: 197637528
+    used: 72599880
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-26-40_2x_rtx_a6000/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.12
+  - base-files==12ubuntu4.6
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1.1
+  - binutils==2.38-4ubuntu2.6
+  - binutils-common==2.38-4ubuntu2.6
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6
+  - bsdutils==1:2.37.2-4ubuntu3.4
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1.2
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-5==12.5.39-1
+  - cuda-command-line-tools-12-5==12.5.1-1
+  - cuda-compat-12-5==555.42.06-1
+  - cuda-compiler-12-5==12.5.1-1
+  - cuda-crt-12-5==12.5.82-1
+  - cuda-cudart-12-5==12.5.82-1
+  - cuda-cudart-dev-12-5==12.5.82-1
+  - cuda-cuobjdump-12-5==12.5.39-1
+  - cuda-cupti-12-5==12.5.82-1
+  - cuda-cupti-dev-12-5==12.5.82-1
+  - cuda-cuxxfilt-12-5==12.5.82-1
+  - cuda-driver-dev-12-5==12.5.82-1
+  - cuda-gdb-12-5==12.5.82-1
+  - cuda-keyring==1.1-1
+  - cuda-libraries-12-5==12.5.1-1
+  - cuda-libraries-dev-12-5==12.5.1-1
+  - cuda-minimal-build-12-5==12.5.1-1
+  - cuda-nsight-compute-12-5==12.5.1-1
+  - cuda-nvcc-12-5==12.5.82-1
+  - cuda-nvdisasm-12-5==12.5.39-1
+  - cuda-nvml-dev-12-5==12.5.82-1
+  - cuda-nvprof-12-5==12.5.82-1
+  - cuda-nvprune-12-5==12.5.82-1
+  - cuda-nvrtc-12-5==12.5.82-1
+  - cuda-nvrtc-dev-12-5==12.5.82-1
+  - cuda-nvtx-12-5==12.5.82-1
+  - cuda-nvvm-12-5==12.5.82-1
+  - cuda-opencl-12-5==12.5.39-1
+  - cuda-opencl-dev-12-5==12.5.39-1
+  - cuda-profiler-api-12-5==12.5.39-1
+  - cuda-sanitizer-12-5==12.5.81-1
+  - cuda-toolkit-12-5-config-common==12.5.82-1
+  - cuda-toolkit-12-config-common==12.5.82-1
+  - cuda-toolkit-config-common==12.5.82-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.3
+  - dpkg-dev==1.21.1ubuntu2.3
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.12
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.6
+  - libblkid1==2.37.2-4ubuntu3.4
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.8
+  - libc-dev-bin==2.35-0ubuntu3.8
+  - libc6==2.35-0ubuntu3.8
+  - libc6-dev==2.35-0ubuntu3.8
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.6
+  - libctf0==2.38-4ubuntu2.6
+  - libcublas-12-5==12.5.3.2-1
+  - libcublas-dev-12-5==12.5.3.2-1
+  - libcudnn9-cuda-12==9.2.1.18-1
+  - libcudnn9-dev-cuda-12==9.2.1.18-1
+  - libcufft-12-5==11.2.3.61-1
+  - libcufft-dev-12-5==11.2.3.61-1
+  - libcufile-12-5==1.10.1.7-1
+  - libcufile-dev-12-5==1.10.1.7-1
+  - libcurand-12-5==10.3.6.82-1
+  - libcurand-dev-12-5==10.3.6.82-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-5==11.6.3.83-1
+  - libcusolver-dev-12-5==11.6.3.83-1
+  - libcusparse-12-5==12.5.1.3-1
+  - libcusparse-dev-12-5==12.5.1.3-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.3
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.5
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.3
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.3
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.3
+  - libkrb5support0==1.19.2-2ubuntu0.3
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3.4
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.22.3-1+cuda12.5
+  - libnccl2==2.22.3-1+cuda12.5
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-5==12.3.0.159-1
+  - libnpp-dev-12-5==12.3.0.159-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvfatbin-12-5==12.5.82-1
+  - libnvfatbin-dev-12-5==12.5.82-1
+  - libnvjitlink-12-5==12.5.82-1
+  - libnvjitlink-dev-12-5==12.5.82-1
+  - libnvjpeg-12-5==12.3.2.81-1
+  - libnvjpeg-dev-12-5==12.3.2.81-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.4
+  - libpam-modules-bin==1.4.0-11ubuntu2.4
+  - libpam-runtime==1.4.0-11ubuntu2.4
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.4
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.3
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2.1
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3.4
+  - libsqlite3-0==3.37.2-2ubuntu0.3
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.16
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.12
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3.4
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-113.123
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.2
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3.4
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2024.2.1==2024.2.1.2-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.16
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.2
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.3
+  - perl-base==5.34.0-3ubuntu1.3
+  - perl-modules-5.34==5.34.0-3ubuntu1.3
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2.1
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-distutils==3.10.8-1~22.04
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-lib2to3==3.10.8-1~22.04
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pip==22.0.2+dfsg-1ubuntu0.4
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-wheel==0.37.1-2ubuntu0.22.04.1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.2
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3.4
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#40~22.04.3-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 30 17:30:19 UTC 2'
+  processor: x86_64
+  release: 6.8.0-40-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml
new file mode 100644
index 0000000..5435cd7
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x H100 NVL
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl
+slug: 1x_h100_nvl
+timestamp: 2024-08-22_12-49-14
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml
new file mode 100644
index 0000000..de10221
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762
+Describe justice system in UK vs USA in 2000-5000 words: 2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973
+Describe schooling system in UK vs USA in 2000-5000 words: 3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66
+Explain me some random problem for me in 2000-5000 words: 323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827
+Tell me entire history of USA: d2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e
+Write a ballad. Pick a random theme.: 8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b
+Write an epic story about a dragon and a knight: a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77
+Write an essay about being a Senior developer.: 426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log
new file mode 100644
index 0000000..275824a
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:49:14,487 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 NVL
+2024-08-22 12:49:14,490 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/run.local.log
+2024-08-22 12:49:14,851 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:49:15,574 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:49:15,582 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:49:15,584 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:49:16,157 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:49:23,557 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:50:03,266 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 958ms\nPrepared 108 packages in 33.96s\nInstalled 108 packages in 559ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:50:03,285 - __main__ - INFO - Gathering system info
+2024-08-22 12:50:07,008 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:50:07,015 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:02:02,793 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:50:14 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:50:14 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:50:16 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:50:17 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 10:58:40 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 10:58:46 gpu_executor.py:102] # GPU blocks: 8601, # CPU blocks: 819\nmodel loading took 529.89 seconds\nStarting 8 responses generation\n8 responses generation took 175.68 seconds\n{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',\n 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',\n 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',\n 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',\n 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',\n 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',\n 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-49-14_1x_h100_nvl '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-49-14_1x_h100_nvl\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-49-14_1x_h100_nvl) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:18<02:31, 18.94s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:32<01:48, 15.56s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:51<01:44, 17.45s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [01:04<01:18, 15.74s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [01:06<00:41, 10.48s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [01:25<00:40, 13.46s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [01:44<00:30, 15.23s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [02:04<00:16, 16.72s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [02:23<00:00, 17.68s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [02:23<00:00, 15.99s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [02:47<19:30, 167.20s/it, est. speed input: 0.23 toks/s, output: 23.34 toks/s]\rProcessed prompts:  25%|██▌       | 2/8 [02:55<07:23, 73.83s/it, est. speed input: 0.41 toks/s, output: 45.53 toks/s] \rProcessed prompts: 100%|██████████| 8/8 [02:55<00:00, 21.96s/it, est. speed input: 1.60 toks/s, output: 185.42 toks/s]\n" status_code: 0
+2024-08-22 13:02:02,833 - __main__ - INFO - Syncing output back to local
+2024-08-22 13:02:06,308 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/stdout.txt
new file mode 100644
index 0000000..35ccfc6
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 10:50:14 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:50:14 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 10:50:16 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:50:17 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 10:58:40 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 10:58:46 gpu_executor.py:102] # GPU blocks: 8601, # CPU blocks: 819
+model loading took 529.89 seconds
+Starting 8 responses generation
+8 responses generation took 175.68 seconds
+{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',
+ 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',
+ 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',
+ 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',
+ 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',
+ 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/sysinfo.yaml
new file mode 100644
index 0000000..26d7553
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/sysinfo.yaml
@@ -0,0 +1,541 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1500.0
+    - 1499.211
+    - 1500.0
+    - 1500.005
+    - 1498.988
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.108
+    - 1500.0
+    - 1500.0
+    - 2999.972
+    - 2994.989
+    - 1500.0
+    - 1499.375
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    - 3000.114
+    - 3000.0
+    - 1500.0
+    - 1500.0
+    - 3000.222
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1854.95
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2998.068
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    count: 64
+    model: AMD EPYC 9124 16-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '95830'
+      cuda: '9.0'
+      driver: 550.90.07
+      graphics_speed: '345'
+      memory_speed: '2619'
+      name: NVIDIA H100 NVL
+      power_limit: '400.00'
+  hard_disk:
+    free: 56436916
+    total: 62914560
+    used: 6477644
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 1553799468
+    free: 597237120
+    total: 1585062952
+    used: 987825832
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-49-14_1x_h100_nvl/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.16
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#41~22.04.2-Ubuntu SMP PREEMPT_DYNAMIC Mon Jun  3 11:32:55 UTC 2'
+  processor: x86_64
+  release: 6.5.0-41-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/experiment.yaml
new file mode 100644
index 0000000..bdbd894
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x A100X
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x
+slug: 1x_a100x
+timestamp: 2024-08-22_12-54-54
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output.yaml
new file mode 100644
index 0000000..88b431d
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e
+Describe justice system in UK vs USA in 2000-5000 words: 83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede
+Describe schooling system in UK vs USA in 2000-5000 words: f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1
+Explain me some random problem for me in 2000-5000 words: 143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c
+Tell me entire history of USA: 210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518
+Write a ballad. Pick a random theme.: 21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3
+Write an epic story about a dragon and a knight: 81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94
+Write an essay about being a Senior developer.: 0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log
new file mode 100644
index 0000000..7deedfd
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 12:54:54,626 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x A100X
+2024-08-22 12:54:54,629 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/run.local.log
+2024-08-22 12:54:54,916 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:54:55,492 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 12:54:55,510 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:54:55,513 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:54:55,974 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:55:01,984 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:55:41,430 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-54-54_1x_a100x '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-54-54_1x_a100x\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-54-54_1x_a100x) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 1.13s\nPrepared 108 packages in 33.86s\nInstalled 108 packages in 353ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:55:41,451 - __main__ - INFO - Gathering system info
+2024-08-22 12:55:45,656 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-54-54_1x_a100x '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-54-54_1x_a100x\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-54-54_1x_a100x) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:55:45,672 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:07:21,903 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 10:55:53 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 10:55:53 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 10:55:55 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 10:55:55 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:02:44 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:02:53 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819\nmodel loading took 423.10 seconds\nStarting 8 responses generation\n8 responses generation took 263.25 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-54-54_1x_a100x '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-54-54_1x_a100x\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-54-54_1x_a100x) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:03<00:24,  3.12s/it]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:06<00:21,  3.11s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:07<00:13,  2.21s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:10<00:12,  2.52s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:13<00:11,  2.75s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:15<00:07,  2.54s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:18<00:05,  2.72s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:21<00:02,  2.81s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:24<00:00,  2.84s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:24<00:00,  2.74s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [04:23<30:42, 263.24s/it, est. speed input: 0.13 toks/s, output: 15.56 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [04:23<00:00, 32.91s/it, est. speed input: 1.07 toks/s, output: 124.48 toks/s]\n" status_code: 0
+2024-08-22 13:07:21,944 - __main__ - INFO - Syncing output back to local
+2024-08-22 13:07:24,690 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/stdout.txt
new file mode 100644
index 0000000..e4558a7
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 10:55:53 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:55:53 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 10:55:55 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:55:55 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 11:02:44 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 11:02:53 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819
+model loading took 423.10 seconds
+Starting 8 responses generation
+8 responses generation took 263.25 seconds
+{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',
+ 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',
+ 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',
+ 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',
+ 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',
+ 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/sysinfo.yaml
new file mode 100644
index 0000000..0b93728
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/sysinfo.yaml
@@ -0,0 +1,542 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1499.852
+    - 1500.0
+    - 3297.37
+    - 1500.0
+    - 3300.002
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    - 3296.559
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2400.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 2400.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3297.616
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3000.0
+    - 3295.993
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1497.549
+    - 2400.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    count: 64
+    model: AMD EPYC 7302 16-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81920'
+      cuda: '8.0'
+      driver: 535.129.03
+      graphics_speed: '210'
+      memory_speed: '1593'
+      name: NVIDIA A100X
+      power_limit: '300.00'
+  hard_disk:
+    free: 56400520
+    total: 62914560
+    used: 6514040
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 496739204
+    free: 226021648
+    total: 528200620
+    used: 302178972
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-54-54_1x_a100x/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.6
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.10
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.15
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.2
+  - libglib2.0-bin==2.72.4-0ubuntu2.2
+  - libglib2.0-data==2.72.4-0ubuntu2.2
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.1
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.3
+  - libpython3.10-stdlib==3.10.12-1~22.04.3
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3
+  - python3.10==3.10.12-1~22.04.3
+  - python3.10-minimal==3.10.12-1~22.04.3
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#99~20.04.1-Ubuntu SMP Thu Nov 2 15:16:47 UTC 2023'
+  processor: x86_64
+  release: 5.15.0-89-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/experiment.yaml
new file mode 100644
index 0000000..76c1ccf
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x A100 PCIE
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie
+slug: 1x_a100_pcie
+timestamp: 2024-08-22_12-58-24
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output.yaml
new file mode 100644
index 0000000..88b431d
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e
+Describe justice system in UK vs USA in 2000-5000 words: 83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede
+Describe schooling system in UK vs USA in 2000-5000 words: f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1
+Explain me some random problem for me in 2000-5000 words: 143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c
+Tell me entire history of USA: 210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518
+Write a ballad. Pick a random theme.: 21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3
+Write an epic story about a dragon and a knight: 81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94
+Write an essay about being a Senior developer.: 0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log
new file mode 100644
index 0000000..3816345
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log
@@ -0,0 +1,12 @@
+2024-08-22 12:58:24,205 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x A100 PCIE
+2024-08-22 12:58:24,208 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/run.local.log
+2024-08-22 12:58:24,510 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 12:58:25,098 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 12:58:25,103 - __main__ - INFO - Syncing files to remote
+2024-08-22 12:58:25,584 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 12:58:30,628 - __main__ - INFO - Setting up remote environment
+2024-08-22 12:59:10,244 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n" stderr: "+ sh\n+ curl -LsSf https://astral.sh/uv/install.sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-58-24_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-58-24_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-58-24_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 541ms\nPrepared 108 packages in 36.29s\nInstalled 108 packages in 498ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev2+g218f083.d20240822 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 12:59:10,265 - __main__ - INFO - Gathering system info
+2024-08-22 12:59:12,858 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_12-58-24_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_12-58-24_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_12-58-24_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 12:59:12,874 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:45:08,985 - __main__ - INFO - Syncing output back to local
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt
new file mode 100644
index 0000000..2be7483
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 10:59:18 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 10:59:18 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 10:59:20 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 10:59:21 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 11:05:30 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 11:05:38 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819
+model loading took 382.21 seconds
+Starting 8 responses generation
+8 responses generation took 184.56 seconds
+{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',
+ 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',
+ 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',
+ 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',
+ 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',
+ 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml
new file mode 100644
index 0000000..0e330bc
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/sysinfo.yaml
@@ -0,0 +1,510 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1796.352
+    - 1800.0
+    - 2900.0
+    - 4000.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 3632.855
+    - 4539.065
+    - 3634.414
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    - 1800.0
+    count: 32
+    model: AMD Ryzen Threadripper PRO 5955WX 16-Cores
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81920'
+      cuda: '8.0'
+      driver: 535.54.03
+      graphics_speed: '210'
+      memory_speed: '1512'
+      name: NVIDIA A100 80GB PCIe
+      power_limit: '300.00'
+  hard_disk:
+    free: 56277300
+    total: 62914560
+    used: 6637260
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 460197228
+    free: 162540176
+    total: 527997164
+    used: 365456988
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_12-58-24_1x_a100_pcie/deterministic_ml-0.1.dev2+g218f083.d20240822-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#40~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov 16 10:53:04 UTC 2'
+  processor: x86_64
+  release: 6.2.0-39-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml
new file mode 100644
index 0000000..c08efc1
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x H100 PCIE
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie
+slug: 1x_h100_pcie
+timestamp: 2024-08-22_13-32-37
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml
new file mode 100644
index 0000000..b9dd089
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea
+Describe justice system in UK vs USA in 2000-5000 words: 64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362
+Describe schooling system in UK vs USA in 2000-5000 words: eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117
+Explain me some random problem for me in 2000-5000 words: 9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51
+Tell me entire history of USA: 3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f
+Write a ballad. Pick a random theme.: 886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2
+Write an epic story about a dragon and a knight: 5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352
+Write an essay about being a Senior developer.: 303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log
new file mode 100644
index 0000000..da5c3d3
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 13:32:37,442 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 PCIE
+2024-08-22 13:32:37,445 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/run.local.log
+2024-08-22 13:32:37,750 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 13:32:38,358 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 13:32:38,366 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 13:32:38,368 - __main__ - INFO - Syncing files to remote
+2024-08-22 13:32:38,853 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 13:32:45,617 - __main__ - INFO - Setting up remote environment
+2024-08-22 13:33:18,125 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev3+g69e9ae9-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 534ms\nPrepared 108 packages in 28.49s\nInstalled 108 packages in 386ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev3+g69e9ae9 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/deterministic_ml-0.1.dev3+g69e9ae9-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 13:33:18,146 - __main__ - INFO - Gathering system info
+2024-08-22 13:33:21,064 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 13:33:21,077 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:37:31,207 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:33:26 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:33:26 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:33:28 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:33:28 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:34:33 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:34:40 gpu_executor.py:102] # GPU blocks: 6043, # CPU blocks: 819\nmodel loading took 76.33 seconds\nStarting 8 responses generation\n8 responses generation took 166.18 seconds\n{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',\n 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',\n 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',\n 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',\n 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',\n 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',\n 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-32-37_1x_h100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-32-37_1x_h100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-32-37_1x_h100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:00<00:06,  1.31it/s]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:01<00:07,  1.04s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:03<00:06,  1.14s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:04<00:05,  1.17s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:05<00:04,  1.22s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:07<00:03,  1.24s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:07<00:02,  1.02s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:08<00:01,  1.07s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:10<00:00,  1.14s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:10<00:00,  1.12s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [02:46<19:23, 166.18s/it, est. speed input: 0.20 toks/s, output: 24.65 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [02:46<00:00, 20.77s/it, est. speed input: 1.69 toks/s, output: 197.19 toks/s]\n" status_code: 0
+2024-08-22 13:37:31,227 - __main__ - INFO - Syncing output back to local
+2024-08-22 13:37:32,717 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/stdout.txt
new file mode 100644
index 0000000..d021f87
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 11:33:26 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 11:33:26 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 11:33:28 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 11:33:28 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 11:34:33 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 11:34:40 gpu_executor.py:102] # GPU blocks: 6043, # CPU blocks: 819
+model loading took 76.33 seconds
+Starting 8 responses generation
+8 responses generation took 166.18 seconds
+{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',
+ 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',
+ 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',
+ 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',
+ 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',
+ 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/sysinfo.yaml
new file mode 100644
index 0000000..4db8b06
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/sysinfo.yaml
@@ -0,0 +1,606 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2300.0
+    - 3250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 1311.248
+    - 2300.0
+    - 1500.0
+    - 1500.0
+    - 3250.0
+    - 1532.486
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3794.886
+    - 3796.426
+    - 2169.057
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3798.682
+    - 3250.0
+    - 2300.0
+    - 3795.433
+    - 1518.182
+    - 3795.576
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3800.46
+    - 3793.12
+    - 1500.0
+    - 1500.0
+    - 3250.0
+    - 2300.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3799.21
+    - 3794.421
+    - 1498.203
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3250.0
+    - 3250.0
+    - 3250.0
+    - 1519.698
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1310.351
+    - 1500.0
+    - 1500.0
+    - 1314.12
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 3250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2300.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2300.0
+    - 1500.0
+    - 1500.0
+    - 2300.0
+    - 1500.0
+    - 3804.623
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 3250.0
+    - 3795.581
+    - 3800.207
+    - 1500.0
+    - 3797.746
+    - 3790.224
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    count: 128
+    model: AMD EPYC 9354 32-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81559'
+      cuda: '9.0'
+      driver: 535.183.01
+      graphics_speed: '345'
+      memory_speed: '1593'
+      name: NVIDIA H100 PCIe
+      power_limit: '350.00'
+  hard_disk:
+    free: 56463968
+    total: 62914560
+    used: 6450592
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 780382592
+    free: 258896520
+    total: 792304224
+    used: 533407704
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-32-37_1x_h100_pcie/deterministic_ml-0.1.dev3+g69e9ae9-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#40~22.04.3-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 30 17:30:19 UTC 2'
+  processor: x86_64
+  release: 6.8.0-40-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/experiment.yaml
new file mode 100644
index 0000000..5a26316
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x H100 SXM
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm
+slug: 1x_h100_sxm
+timestamp: 2024-08-22_13-42-57
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output.yaml
new file mode 100644
index 0000000..de10221
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762
+Describe justice system in UK vs USA in 2000-5000 words: 2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973
+Describe schooling system in UK vs USA in 2000-5000 words: 3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66
+Explain me some random problem for me in 2000-5000 words: 323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827
+Tell me entire history of USA: d2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e
+Write a ballad. Pick a random theme.: 8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b
+Write an epic story about a dragon and a knight: a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77
+Write an essay about being a Senior developer.: 426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log
new file mode 100644
index 0000000..593bb56
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 13:42:57,775 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 SXM
+2024-08-22 13:42:57,777 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/run.local.log
+2024-08-22 13:42:57,885 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 13:42:58,102 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 13:42:58,107 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 13:42:58,110 - __main__ - INFO - Syncing files to remote
+2024-08-22 13:42:58,298 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 13:43:00,827 - __main__ - INFO - Setting up remote environment
+2024-08-22 13:43:04,665 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-42-57_1x_h100_sxm '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-42-57_1x_h100_sxm\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-42-57_1x_h100_sxm) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 595ms\nPrepared 1 package in 3ms\nInstalled 108 packages in 268ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev3+gb88ba71 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 13:43:04,676 - __main__ - INFO - Gathering system info
+2024-08-22 13:43:07,885 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-42-57_1x_h100_sxm '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-42-57_1x_h100_sxm\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-42-57_1x_h100_sxm) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 13:43:07,891 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:45:49,699 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:43:12 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:43:12 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:43:14 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:43:15 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:43:34 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:43:38 gpu_executor.py:102] # GPU blocks: 6059, # CPU blocks: 819\nmodel loading took 27.66 seconds\nStarting 8 responses generation\n8 responses generation took 127.92 seconds\n{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',\n 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',\n 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',\n 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',\n 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',\n 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',\n 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-42-57_1x_h100_sxm '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-42-57_1x_h100_sxm\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-42-57_1x_h100_sxm) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:00<00:07,  1.01it/s]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:02<00:09,  1.30s/it]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:04<00:08,  1.45s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:05<00:07,  1.51s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:07<00:06,  1.57s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:08<00:04,  1.54s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:10<00:03,  1.54s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:11<00:01,  1.53s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:12<00:00,  1.23s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:12<00:00,  1.39s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [02:01<14:10, 121.54s/it, est. speed input: 0.32 toks/s, output: 32.11 toks/s]\rProcessed prompts:  25%|██▌       | 2/8 [02:07<05:22, 53.80s/it, est. speed input: 0.56 toks/s, output: 62.53 toks/s] \rProcessed prompts: 100%|██████████| 8/8 [02:07<00:00, 15.99s/it, est. speed input: 2.20 toks/s, output: 254.66 toks/s]\n" status_code: 0
+2024-08-22 13:45:49,716 - __main__ - INFO - Syncing output back to local
+2024-08-22 13:45:50,231 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/stdout.txt
new file mode 100644
index 0000000..8aff439
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 11:43:12 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 11:43:12 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 11:43:14 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 11:43:15 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 11:43:34 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 11:43:38 gpu_executor.py:102] # GPU blocks: 6059, # CPU blocks: 819
+model loading took 27.66 seconds
+Starting 8 responses generation
+8 responses generation took 127.92 seconds
+{'Count to 1000, skip unpopular numbers': '40b945bf30ccd2a3a9996c3579a6f503195da6c271b76082bc514c827d86d87eeea8677a001e8c0cf9b4d53bc24cd575f4be62d1f8d754077a272da0df073762',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '2b6f5ba49c027271b4ef651e7bf25a03f2389e708f9fc833d1b8fe0018f2c1c150fbd3dd81e2dc14fe3ffe0ef53abf7be18236f30338feff2353e8b9713ca973',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': '3a7b4d5edc2a14a525a1d0677e36570fdb22d31521947c85d626788a0688e66494a7e9414799602236a6bb9455a525029e84202446871b54613a19b2fb3aca66',
+ 'Explain me some random problem for me in 2000-5000 words': '323525095bf264a962795c48d2111bc18e790bcf9ac6ac229817d6d2ceaed9c5d71147039284037d64cc4792fe1af452540729701c6ec1ec84e4a835edfd8827',
+ 'Tell me entire history of USA': 'd2a958bb90a040a9eab4c9fe0f08524c4bd4047b33fb838ea05612d98d2f7a2df7c6c2c86b70065b7e0f3b4f3a96bc7aeacd00de1cbad3bcc3385587b90be77e',
+ 'Write a ballad. Pick a random theme.': '8ff349a1b32d048df8dc8520552b5321dc65386c0812a2d0a2100d6a23ea493559872c861583b3581aca858f3f71c427c5ef031d860370809098bd9a2d87b98b',
+ 'Write an epic story about a dragon and a knight': 'a009390cba1b724f1d842064dd4400751745157bfa4e2498189719fe705df094ec4dc393f01f4a21f3c66eea9104ef4c162af115f23d4f9a8cf08b4b3a844f77',
+ 'Write an essay about being a Senior developer.': '426f3bc4a0c96f12a23df460712ab3a0b29e5594d98fe72f474d233472612599a4c47e6dab3e13cf9a2d83a46a9a43b689ccb3f607d0af9b6d8a80d478f17e43'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/sysinfo.yaml
new file mode 100644
index 0000000..45f190a
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/sysinfo.yaml
@@ -0,0 +1,617 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 800.0
+    - 4100.0
+    - 801.387
+    - 800.0
+    - 972.955
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.018
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 4100.0
+    - 800.0
+    - 800.204
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.329
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 799.037
+    - 800.0
+    - 800.092
+    - 800.0
+    - 2100.05
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 799.087
+    - 800.0
+    - 800.323
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    - 800.0
+    count: 128
+    model: Intel(R) Xeon(R) Gold 6448Y
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81559'
+      cuda: '9.0'
+      driver: 555.58.02
+      graphics_speed: '345'
+      memory_speed: '2619'
+      name: NVIDIA H100 80GB HBM3
+      power_limit: '700.00'
+  hard_disk:
+    free: 17475620
+    total: 62914560
+    used: 45438940
+  os: Ubuntu 22.04.4 LTS
+  ram:
+    available: 507554244
+    free: 89955092
+    total: 527826756
+    used: 437871664
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-42-57_1x_h100_sxm/deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.12
+  - base-files==12ubuntu4.6
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1.1
+  - binutils==2.38-4ubuntu2.6
+  - binutils-common==2.38-4ubuntu2.6
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.6
+  - bsdutils==1:2.37.2-4ubuntu3.4
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1.2
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-5==12.5.39-1
+  - cuda-command-line-tools-12-5==12.5.1-1
+  - cuda-compat-12-5==555.42.06-1
+  - cuda-compiler-12-5==12.5.1-1
+  - cuda-crt-12-5==12.5.82-1
+  - cuda-cudart-12-5==12.5.82-1
+  - cuda-cudart-dev-12-5==12.5.82-1
+  - cuda-cuobjdump-12-5==12.5.39-1
+  - cuda-cupti-12-5==12.5.82-1
+  - cuda-cupti-dev-12-5==12.5.82-1
+  - cuda-cuxxfilt-12-5==12.5.82-1
+  - cuda-driver-dev-12-5==12.5.82-1
+  - cuda-gdb-12-5==12.5.82-1
+  - cuda-keyring==1.1-1
+  - cuda-libraries-12-5==12.5.1-1
+  - cuda-libraries-dev-12-5==12.5.1-1
+  - cuda-minimal-build-12-5==12.5.1-1
+  - cuda-nsight-compute-12-5==12.5.1-1
+  - cuda-nvcc-12-5==12.5.82-1
+  - cuda-nvdisasm-12-5==12.5.39-1
+  - cuda-nvml-dev-12-5==12.5.82-1
+  - cuda-nvprof-12-5==12.5.82-1
+  - cuda-nvprune-12-5==12.5.82-1
+  - cuda-nvrtc-12-5==12.5.82-1
+  - cuda-nvrtc-dev-12-5==12.5.82-1
+  - cuda-nvtx-12-5==12.5.82-1
+  - cuda-nvvm-12-5==12.5.82-1
+  - cuda-opencl-12-5==12.5.39-1
+  - cuda-opencl-dev-12-5==12.5.39-1
+  - cuda-profiler-api-12-5==12.5.39-1
+  - cuda-sanitizer-12-5==12.5.81-1
+  - cuda-toolkit-12-5-config-common==12.5.82-1
+  - cuda-toolkit-12-config-common==12.5.82-1
+  - cuda-toolkit-config-common==12.5.82-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.3
+  - dpkg-dev==1.21.1ubuntu2.3
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.12
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.6
+  - libblkid1==2.37.2-4ubuntu3.4
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.8
+  - libc-dev-bin==2.35-0ubuntu3.8
+  - libc6==2.35-0ubuntu3.8
+  - libc6-dev==2.35-0ubuntu3.8
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.6
+  - libctf0==2.38-4ubuntu2.6
+  - libcublas-12-5==12.5.3.2-1
+  - libcublas-dev-12-5==12.5.3.2-1
+  - libcudnn9-cuda-12==9.2.1.18-1
+  - libcudnn9-dev-cuda-12==9.2.1.18-1
+  - libcufft-12-5==11.2.3.61-1
+  - libcufft-dev-12-5==11.2.3.61-1
+  - libcufile-12-5==1.10.1.7-1
+  - libcufile-dev-12-5==1.10.1.7-1
+  - libcurand-12-5==10.3.6.82-1
+  - libcurand-dev-12-5==10.3.6.82-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-5==11.6.3.83-1
+  - libcusolver-dev-12-5==11.6.3.83-1
+  - libcusparse-12-5==12.5.1.3-1
+  - libcusparse-dev-12-5==12.5.1.3-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.3
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.5
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.3
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.3
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.3
+  - libkrb5support0==1.19.2-2ubuntu0.3
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.17+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3.4
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.22.3-1+cuda12.5
+  - libnccl2==2.22.3-1+cuda12.5
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-5==12.3.0.159-1
+  - libnpp-dev-12-5==12.3.0.159-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvfatbin-12-5==12.5.82-1
+  - libnvfatbin-dev-12-5==12.5.82-1
+  - libnvjitlink-12-5==12.5.82-1
+  - libnvjitlink-dev-12-5==12.5.82-1
+  - libnvjpeg-12-5==12.3.2.81-1
+  - libnvjpeg-dev-12-5==12.3.2.81-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.4
+  - libpam-modules-bin==1.4.0-11ubuntu2.4
+  - libpam-runtime==1.4.0-11ubuntu2.4
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.4
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.3
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2.1
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3.4
+  - libsqlite3-0==3.37.2-2ubuntu0.3
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.16
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.12
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3.4
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-113.123
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.2
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3.4
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2024.2.1==2024.2.1.2-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.16
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.2
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.3
+  - perl-base==5.34.0-3ubuntu1.3
+  - perl-modules-5.34==5.34.0-3ubuntu1.3
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2.1
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-distutils==3.10.8-1~22.04
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-lib2to3==3.10.8-1~22.04
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pip==22.0.2+dfsg-1ubuntu0.4
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-setuptools==59.6.0-1.2ubuntu0.22.04.1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-wheel==0.37.1-2ubuntu0.22.04.1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.2
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3.4
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2'
+  processor: x86_64
+  release: 6.5.0-45-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/experiment.yaml
new file mode 100644
index 0000000..f1405d9
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x A100 PCIE
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie
+slug: 1x_a100_pcie
+timestamp: 2024-08-22_13-45-58
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output.yaml
new file mode 100644
index 0000000..88b431d
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e
+Describe justice system in UK vs USA in 2000-5000 words: 83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede
+Describe schooling system in UK vs USA in 2000-5000 words: f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1
+Explain me some random problem for me in 2000-5000 words: 143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c
+Tell me entire history of USA: 210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518
+Write a ballad. Pick a random theme.: 21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3
+Write an epic story about a dragon and a knight: 81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94
+Write an essay about being a Senior developer.: 0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log
new file mode 100644
index 0000000..3ca49f4
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 13:45:58,657 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x A100 PCIE
+2024-08-22 13:45:58,660 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/run.local.log
+2024-08-22 13:45:58,995 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 13:45:59,667 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 13:45:59,672 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 13:45:59,674 - __main__ - INFO - Syncing files to remote
+2024-08-22 13:46:00,199 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 13:46:07,148 - __main__ - INFO - Setting up remote environment
+2024-08-22 13:46:45,365 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n    source $HOME/.cargo/env (sh, bash, zsh)\n    source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-45-58_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-45-58_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-45-58_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 1.33s\nPrepared 108 packages in 31.36s\nInstalled 108 packages in 336ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev3+gb88ba71 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 13:46:45,383 - __main__ - INFO - Gathering system info
+2024-08-22 13:46:49,501 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-45-58_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-45-58_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-45-58_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 13:46:49,511 - __main__ - INFO - Running experiment code on remote
+2024-08-22 13:52:42,106 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 11:46:57 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 11:46:57 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 11:46:58 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 11:46:59 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 11:48:11 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 11:48:19 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819\nmodel loading took 84.08 seconds\nStarting 8 responses generation\n8 responses generation took 258.53 seconds\n{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',\n 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',\n 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',\n 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',\n 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',\n 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_13-45-58_1x_a100_pcie '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_13-45-58_1x_a100_pcie\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_13-45-58_1x_a100_pcie) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:00<00:06,  1.26it/s]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:01<00:06,  1.00it/s]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:03<00:06,  1.08s/it]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:04<00:05,  1.12s/it]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:05<00:04,  1.14s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:06<00:03,  1.15s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:07<00:02,  1.16s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:08<00:01,  1.15s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:09<00:00,  1.06it/s]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:09<00:00,  1.05s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [04:18<30:09, 258.52s/it, est. speed input: 0.13 toks/s, output: 15.84 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [04:18<00:00, 32.32s/it, est. speed input: 1.09 toks/s, output: 126.75 toks/s]\n" status_code: 0
+2024-08-22 13:52:42,123 - __main__ - INFO - Syncing output back to local
+2024-08-22 13:52:43,750 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/stdout.txt
new file mode 100644
index 0000000..40cf422
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 11:46:57 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 11:46:57 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 11:46:58 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 11:46:59 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 11:48:11 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 11:48:19 gpu_executor.py:102] # GPU blocks: 6049, # CPU blocks: 819
+model loading took 84.08 seconds
+Starting 8 responses generation
+8 responses generation took 258.53 seconds
+{'Count to 1000, skip unpopular numbers': '5fa4c4a18a1534b96c2eb2c5a30f63da0237b338aebf745d27d3d73dbc8dedfa2aed7070799440ac37e8610f9dd4926371f77a98e79c50a2c8b5b583cbf7c86e',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '83c0ec6b7f37d53b798093724f72a40195572be308b65471e8d2aae18379ef79655233858eb842ebf73967b058c38685fbea9543a3d1b3b4f41684b5fd95eede',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'f5d13dd9ee6b6b0540bd3e4adf6baec37ff5d4dc3e1158344f5ab2c690880de0ac1263d3f2691d6b904271298ba0b023adf541ba2f7fb1add50ba27f7a67d3a1',
+ 'Explain me some random problem for me in 2000-5000 words': '143fc78fb373d10e8b27bdc3bcd5a5a9b5154c8a9dfeb72102d610a87cf47d5cfeb7a4be0136bf0ba275e3fa46e8b6cfcbeb63af6c45714abcd2875bb7bd577c',
+ 'Tell me entire history of USA': '210fa7578650d083ad35cae251f8ef272bdc61c35daa08eb27852b3ddc59262718300971b1ac9725c9ac08f63240a1a13845d6c853d2e08520567288d54b5518',
+ 'Write a ballad. Pick a random theme.': '21c8744c38338c8e8c4a9f0efc580b9040d51837573924ef731180e7cc2fb21cb96968c901803abad6df1b4f035096ec0fc75339144f133c754a8303a3f378e3',
+ 'Write an epic story about a dragon and a knight': '81ff9b82399502e2d3b0fd8f625d3c3f6141c4c179488a247c0c0cc3ccd77828f0920c3d8c03621dfe426e401f58820a6094db5f3786ab7f12bfb13d6224ef94',
+ 'Write an essay about being a Senior developer.': '0921d5c3b2e04616dbb655e6ba4648911b9461a4ecdb0d435ebf190d903a92c20cf1343d98de65b6e9690f5e6b1c8f3bfc58e720168fa54dc0e293f0f595505c'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/sysinfo.yaml
new file mode 100644
index 0000000..3d025a1
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/sysinfo.yaml
@@ -0,0 +1,606 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 1500.0
+    - 2250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2250.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1476.719
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 2000.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    - 1500.0
+    count: 128
+    model: AMD EPYC 7742 64-Core Processor
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81920'
+      cuda: '8.0'
+      driver: 535.129.03
+      graphics_speed: '210'
+      memory_speed: '1512'
+      name: NVIDIA A100 80GB PCIe
+      power_limit: '300.00'
+  hard_disk:
+    free: 56400020
+    total: 62914560
+    used: 6514540
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 256568492
+    free: 151896736
+    total: 263751572
+    used: 111854836
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_13-45-58_1x_a100_pcie/deterministic_ml-0.1.dev3+gb88ba71-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.5
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.10
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.1
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.14
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.2
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.2
+  - libglib2.0-bin==2.72.4-0ubuntu2.2
+  - libglib2.0-data==2.72.4-0ubuntu2.2
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.1
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.1
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.11
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.3
+  - libpython3.10-stdlib==3.10.12-1~22.04.3
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.1
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.11
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.3
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu2
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu2
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3
+  - python3.10==3.10.12-1~22.04.3
+  - python3.10-minimal==3.10.12-1~22.04.3
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.11
+  - systemd-sysv==249.11-0ubuntu3.11
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#98-Ubuntu SMP Mon Oct 2 15:18:56 UTC 2023'
+  processor: x86_64
+  release: 5.15.0-88-generic
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/experiment.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/experiment.yaml
new file mode 100644
index 0000000..4a9323f
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/experiment.yaml
@@ -0,0 +1,6 @@
+comment: 1x H100 PCIE with Intel Xeon
+experiment: vllm_llama_3_70b_instruct_awq
+experiment_hash: exp_hash_v1:7aa490
+run_id: vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon
+slug: 1x_h100_pcie_with_intel_xeon
+timestamp: 2024-08-22_14-11-08
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output.yaml
new file mode 100644
index 0000000..b9dd089
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output.yaml
@@ -0,0 +1,8 @@
+Count to 1000, skip unpopular numbers: 5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea
+Describe justice system in UK vs USA in 2000-5000 words: 64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362
+Describe schooling system in UK vs USA in 2000-5000 words: eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117
+Explain me some random problem for me in 2000-5000 words: 9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51
+Tell me entire history of USA: 3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f
+Write a ballad. Pick a random theme.: 886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2
+Write an epic story about a dragon and a knight: 5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352
+Write an essay about being a Senior developer.: 303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log
new file mode 100644
index 0000000..3b15651
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log
@@ -0,0 +1,15 @@
+2024-08-22 14:11:08,189 - __main__ - INFO - Starting experiment vllm_llama_3_70b_instruct_awq with comment: 1x H100 PCIE with Intel Xeon
+2024-08-22 14:11:08,191 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/run.local.log
+2024-08-22 14:11:08,816 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1)
+2024-08-22 14:11:10,052 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n'
+2024-08-22 14:11:10,059 - paramiko.transport - INFO - Authentication (publickey) successful!
+2024-08-22 14:11:10,061 - __main__ - INFO - Syncing files to remote
+2024-08-22 14:11:11,012 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output' stdout: '' stderr: '' status_code: 0
+2024-08-22 14:11:18,922 - __main__ - INFO - Setting up remote environment
+2024-08-22 14:11:24,833 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    curl -LsSf https://astral.sh/uv/install.sh | sh\n    export PATH=$HOME/.cargo/bin:$PATH\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n    uv venv -p python3.11 --python-preference managed\n    source .venv/bin/activate \n    uv pip install       ./deterministic_ml*.whl       pyyaml       -r vllm_llama_3_70b_instruct_awq/requirements.txt\n    ' stdout: "installing to /root/.cargo/bin\n  uv\n  uvx\neverything's installed!\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.1 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev4+g09eda75-py3-none-any.whl pyyaml -r vllm_llama_3_70b_instruct_awq/requirements.txt\nResolved 108 packages in 1.04s\nPrepared 1 package in 4ms\nInstalled 108 packages in 401ms\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + certifi==2024.7.4\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + cmake==3.30.2\n + datasets==2.21.0\n + deterministic-ml==0.1.dev4+g09eda75 (from file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/deterministic_ml-0.1.dev4+g09eda75-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.1\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.0\n + huggingface-hub==0.24.6\n + idna==3.7\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.3\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + ninja==1.11.1.1\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + starlette==0.38.2\n + sympy==1.13.2\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.1\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.4\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n" status_code: 0
+2024-08-22 14:11:24,848 - __main__ - INFO - Gathering system info
+2024-08-22 14:11:28,259 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0
+2024-08-22 14:11:28,264 - __main__ - INFO - Running experiment code on remote
+2024-08-22 14:14:47,273 - tools.ssh - INFO - Command: '\n    set -exo pipefail\n    \n    cd ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n    export PATH=$HOME/.cargo/bin:$PATH\n    source .venv/bin/activate;\n     python -m vllm_llama_3_70b_instruct_awq ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output | tee ~/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-22 12:11:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.\nINFO 08-22 12:11:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-22 12:11:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...\nINFO 08-22 12:11:36 weight_utils.py:225] Using model weights format ['*.safetensors']\nINFO 08-22 12:11:49 model_runner.py:732] Loading model weights took 37.0561 GB\nINFO 08-22 12:11:56 gpu_executor.py:102] # GPU blocks: 6042, # CPU blocks: 819\nmodel loading took 23.78 seconds\nStarting 8 responses generation\n8 responses generation took 167.54 seconds\n{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',\n 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',\n 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',\n 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',\n 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',\n 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',\n 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}\n" stderr: "+ cd /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_llama_3_70b_instruct_awq /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output\n+ tee /root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/output/stdout.txt\n\rLoading safetensors checkpoint shards:   0% Completed | 0/9 [00:00<?, ?it/s]\n\rLoading safetensors checkpoint shards:  11% Completed | 1/9 [00:00<00:05,  1.35it/s]\n\rLoading safetensors checkpoint shards:  22% Completed | 2/9 [00:01<00:04,  1.62it/s]\n\rLoading safetensors checkpoint shards:  33% Completed | 3/9 [00:02<00:05,  1.19it/s]\n\rLoading safetensors checkpoint shards:  44% Completed | 4/9 [00:03<00:04,  1.01it/s]\n\rLoading safetensors checkpoint shards:  56% Completed | 5/9 [00:04<00:04,  1.07s/it]\n\rLoading safetensors checkpoint shards:  67% Completed | 6/9 [00:06<00:03,  1.14s/it]\n\rLoading safetensors checkpoint shards:  78% Completed | 7/9 [00:07<00:02,  1.16s/it]\n\rLoading safetensors checkpoint shards:  89% Completed | 8/9 [00:08<00:01,  1.19s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:09<00:00,  1.20s/it]\n\rLoading safetensors checkpoint shards: 100% Completed | 9/9 [00:09<00:00,  1.08s/it]\n\n/root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/.venv/lib/python3.11/site-packages/vllm/model_executor/layers/sampler.py:287: UserWarning: cumsum_cuda_kernel does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True, warn_only=True)'. You can file an issue at https://github.com/pytorch/pytorch/issues to help us prioritize adding deterministic support for this operation. (Triggered internally at ../aten/src/ATen/Context.cpp:83.)\n  probs_sum = probs_sort.cumsum(dim=-1)\n\rProcessed prompts:   0%|          | 0/8 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\rProcessed prompts:  12%|█▎        | 1/8 [02:47<19:32, 167.53s/it, est. speed input: 0.20 toks/s, output: 24.45 toks/s]\rProcessed prompts: 100%|██████████| 8/8 [02:47<00:00, 20.94s/it, est. speed input: 1.68 toks/s, output: 195.59 toks/s]\n" status_code: 0
+2024-08-22 14:14:47,296 - __main__ - INFO - Syncing output back to local
+2024-08-22 14:14:49,936 - __main__ - INFO - Done
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/stdout.txt b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/stdout.txt
new file mode 100644
index 0000000..a275f72
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/stdout.txt
@@ -0,0 +1,19 @@
+gpu_count=1
+Starting model loading
+INFO 08-22 12:11:34 awq_marlin.py:89] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
+INFO 08-22 12:11:34 llm_engine.py:174] Initializing an LLM engine (v0.5.4) with config: model='casperhansen/llama-3-70b-instruct-awq', speculative_config=None, tokenizer='casperhansen/llama-3-70b-instruct-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None), seed=0, served_model_name=casperhansen/llama-3-70b-instruct-awq, use_v2_block_manager=False, enable_prefix_caching=False)
+INFO 08-22 12:11:35 model_runner.py:720] Starting to load model casperhansen/llama-3-70b-instruct-awq...
+INFO 08-22 12:11:36 weight_utils.py:225] Using model weights format ['*.safetensors']
+INFO 08-22 12:11:49 model_runner.py:732] Loading model weights took 37.0561 GB
+INFO 08-22 12:11:56 gpu_executor.py:102] # GPU blocks: 6042, # CPU blocks: 819
+model loading took 23.78 seconds
+Starting 8 responses generation
+8 responses generation took 167.54 seconds
+{'Count to 1000, skip unpopular numbers': '5edaedfc17612b5878d0766192b3e126326bbc0da437ecc80bb531dd370a1f370226a881ff8c7be1efa474827f8973efca04a88efacc929d5611871805291aea',
+ 'Describe justice system in UK vs USA in 2000-5000 words': '64f46d659436b9676439620ba99f104744c9ff612eacc973ac585fd935141a8777f02563a8a38de4c3991b28316bf7867b7c0af783a52a2d0b51c0d86460c362',
+ 'Describe schooling system in UK vs USA in 2000-5000 words': 'eac275677ca4a6ddd67713c401c804675b591863e82e439ac1bbb482b19f22c8633fbc8c588a868bc3772b4ee9b753f6dab998cdba7853dd7a017846172a1117',
+ 'Explain me some random problem for me in 2000-5000 words': '9420afbcb5a4566cbc7eac4361f8915a94b4d3ec46ca9e07d60d9661c0ffa013316ec88df24c9c261e0362693aaaf17df2f3024561f811fcd3725674f5987b51',
+ 'Tell me entire history of USA': '3cc0629da42ffd940b4649526c09fa8e7488ccb7b65cd9db9ebcf5c3b7def820acf9e9b2ace342237bf45444d79a36c5c2b8a938b5e6a22a176bf9d7ca018a5f',
+ 'Write a ballad. Pick a random theme.': '886c987fbb7e204c648671cff7c51f7476b8050e5b3d4cd9664abdb395b396f438b65ba31767124912e97fbacf14c3879f9d68169e524c47bedb8688edcde1c2',
+ 'Write an epic story about a dragon and a knight': '5015e1d7b4d307014843de4ff7e75223b64ef38de1fa625e6ccfb25e0d5e2a9f8c1c3a2b7dfe0d45afdb6a502f6b644be00de7653f53a490e1267f71829f6352',
+ 'Write an essay about being a Senior developer.': '303665cb313f789f0d033db3e61b21b2436255b77ba5e221e54fc8f5180a1f822fca22c0ab221912d04c3a1f8d90e324eae2ccde1ec18375de12b36b3da04581'}
diff --git a/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/sysinfo.yaml b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/sysinfo.yaml
new file mode 100644
index 0000000..13c9548
--- /dev/null
+++ b/tests/integration/results/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/sysinfo.yaml
@@ -0,0 +1,510 @@
+cuda:
+  cuda: '12.1'
+  cudnn: 90100
+machine:
+  cpu:
+    clocks:
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 807.233
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    - 3100.0
+    count: 32
+    model: Intel(R) Xeon(R) w5-3435X
+  docker_support:
+    nvidia: false
+    runc: false
+  gpu:
+    count: 1
+    details:
+    - capacity: '81559'
+      cuda: '9.0'
+      driver: 545.23.08
+      graphics_speed: '345'
+      memory_speed: '1593'
+      name: NVIDIA H100 PCIe
+      power_limit: '350.00'
+  hard_disk:
+    free: 17480328
+    total: 62914560
+    used: 45434232
+  os: Ubuntu 22.04.3 LTS
+  ram:
+    available: 521602668
+    free: 450267136
+    total: 527642136
+    used: 77375000
+python:
+  packages:
+  - aiohappyeyeballs==2.4.0
+  - aiohttp==3.10.5
+  - aiosignal==1.3.1
+  - annotated-types==0.7.0
+  - anyio==4.4.0
+  - attrs==24.2.0
+  - certifi==2024.7.4
+  - charset-normalizer==3.3.2
+  - click==8.1.7
+  - cloudpickle==3.0.0
+  - cmake==3.30.2
+  - datasets==2.21.0
+  - deterministic-ml @ file:///root/experiments/vllm_llama_3_70b_instruct_awq/2024-08-22_14-11-08_1x_h100_pcie_with_intel_xeon/deterministic_ml-0.1.dev4+g09eda75-py3-none-any.whl
+  - dill==0.3.8
+  - diskcache==5.6.3
+  - distro==1.9.0
+  - fastapi==0.112.1
+  - filelock==3.15.4
+  - frozenlist==1.4.1
+  - fsspec==2024.6.1
+  - h11==0.14.0
+  - httpcore==1.0.5
+  - httptools==0.6.1
+  - httpx==0.27.0
+  - huggingface-hub==0.24.6
+  - idna==3.7
+  - interegular==0.3.3
+  - jinja2==3.1.4
+  - jiter==0.5.0
+  - jsonschema==4.23.0
+  - jsonschema-specifications==2023.12.1
+  - lark==1.2.2
+  - llvmlite==0.43.0
+  - lm-format-enforcer==0.10.3
+  - markupsafe==2.1.5
+  - mpmath==1.3.0
+  - msgpack==1.0.8
+  - multidict==6.0.5
+  - multiprocess==0.70.16
+  - nest-asyncio==1.6.0
+  - networkx==3.3
+  - ninja==1.11.1.1
+  - numba==0.60.0
+  - numpy==1.26.4
+  - nvidia-cublas-cu12==12.1.3.1
+  - nvidia-cuda-cupti-cu12==12.1.105
+  - nvidia-cuda-nvrtc-cu12==12.1.105
+  - nvidia-cuda-runtime-cu12==12.1.105
+  - nvidia-cudnn-cu12==9.1.0.70
+  - nvidia-cufft-cu12==11.0.2.54
+  - nvidia-curand-cu12==10.3.2.106
+  - nvidia-cusolver-cu12==11.4.5.107
+  - nvidia-cusparse-cu12==12.1.0.106
+  - nvidia-ml-py==12.560.30
+  - nvidia-nccl-cu12==2.20.5
+  - nvidia-nvjitlink-cu12==12.6.20
+  - nvidia-nvtx-cu12==12.1.105
+  - openai==1.42.0
+  - outlines==0.0.46
+  - packaging==24.1
+  - pandas==2.2.2
+  - pillow==10.4.0
+  - prometheus-client==0.20.0
+  - prometheus-fastapi-instrumentator==7.0.0
+  - protobuf==5.27.3
+  - psutil==6.0.0
+  - py-cpuinfo==9.0.0
+  - pyairports==2.1.1
+  - pyarrow==17.0.0
+  - pycountry==24.6.1
+  - pydantic==2.8.2
+  - pydantic-core==2.20.1
+  - python-dateutil==2.9.0.post0
+  - python-dotenv==1.0.1
+  - pytz==2024.1
+  - pyyaml==6.0.2
+  - pyzmq==26.2.0
+  - ray==2.34.0
+  - referencing==0.35.1
+  - regex==2024.7.24
+  - requests==2.32.3
+  - rpds-py==0.20.0
+  - safetensors==0.4.4
+  - sentencepiece==0.2.0
+  - setuptools==73.0.1
+  - six==1.16.0
+  - sniffio==1.3.1
+  - starlette==0.38.2
+  - sympy==1.13.2
+  - tiktoken==0.7.0
+  - tokenizers==0.19.1
+  - torch==2.4.0
+  - torchvision==0.19.0
+  - tqdm==4.66.5
+  - transformers==4.44.1
+  - triton==3.0.0
+  - typing-extensions==4.12.2
+  - tzdata==2024.1
+  - urllib3==2.2.2
+  - uvicorn==0.30.6
+  - uvloop==0.20.0
+  - vllm==0.5.4
+  - vllm-flash-attn==2.6.1
+  - watchfiles==0.23.0
+  - websockets==13.0
+  - xformers==0.0.27.post2
+  - xxhash==3.5.0
+  - yarl==1.9.4
+  version: 3.11.9 (main, Aug 14 2024, 05:07:28) [Clang 18.1.8 ]
+system:
+  dpkg_packages:
+  - adduser==3.118ubuntu5
+  - apt==2.4.10
+  - base-files==12ubuntu4.4
+  - base-passwd==3.5.52build1
+  - bash==5.1-6ubuntu1
+  - binutils==2.38-4ubuntu2.3
+  - binutils-common==2.38-4ubuntu2.3
+  - binutils-x86-64-linux-gnu==2.38-4ubuntu2.3
+  - bsdutils==1:2.37.2-4ubuntu3
+  - build-essential==12.9ubuntu3
+  - bzip2==1.0.8-5build1
+  - ca-certificates==20230311ubuntu0.22.04.1
+  - coreutils==8.32-4.1ubuntu1
+  - cpp==4:11.2.0-1ubuntu1
+  - cpp-11==11.4.0-1ubuntu1~22.04
+  - cuda-cccl-12-0==12.0.140-1
+  - cuda-command-line-tools-12-0==12.0.1-1
+  - cuda-compat-12-0==525.147.05-1
+  - cuda-compiler-12-0==12.0.1-1
+  - cuda-cudart-12-0==12.0.146-1
+  - cuda-cudart-dev-12-0==12.0.146-1
+  - cuda-cuobjdump-12-0==12.0.140-1
+  - cuda-cupti-12-0==12.0.146-1
+  - cuda-cupti-dev-12-0==12.0.146-1
+  - cuda-cuxxfilt-12-0==12.0.140-1
+  - cuda-driver-dev-12-0==12.0.146-1
+  - cuda-gdb-12-0==12.0.140-1
+  - cuda-keyring==1.0-1
+  - cuda-libraries-12-0==12.0.1-1
+  - cuda-libraries-dev-12-0==12.0.1-1
+  - cuda-minimal-build-12-0==12.0.1-1
+  - cuda-nsight-compute-12-0==12.0.1-1
+  - cuda-nvcc-12-0==12.0.140-1
+  - cuda-nvdisasm-12-0==12.0.140-1
+  - cuda-nvml-dev-12-0==12.0.140-1
+  - cuda-nvprof-12-0==12.0.146-1
+  - cuda-nvprune-12-0==12.0.140-1
+  - cuda-nvrtc-12-0==12.0.140-1
+  - cuda-nvrtc-dev-12-0==12.0.140-1
+  - cuda-nvtx-12-0==12.0.140-1
+  - cuda-opencl-12-0==12.0.140-1
+  - cuda-opencl-dev-12-0==12.0.140-1
+  - cuda-profiler-api-12-0==12.0.140-1
+  - cuda-sanitizer-12-0==12.0.140-1
+  - cuda-toolkit-12-0-config-common==12.0.146-1
+  - cuda-toolkit-12-config-common==12.3.52-1
+  - cuda-toolkit-config-common==12.3.52-1
+  - curl==7.81.0-1ubuntu1.17
+  - dash==0.5.11+git20210903+057cd650a4ed-3build1
+  - dbus==1.12.20-2ubuntu4.1
+  - debconf==1.5.79ubuntu1
+  - debianutils==5.5-1ubuntu2
+  - diffutils==1:3.8-0ubuntu2
+  - dirmngr==2.2.27-3ubuntu2.1
+  - distro-info-data==0.52ubuntu0.7
+  - dpkg==1.21.1ubuntu2.2
+  - dpkg-dev==1.21.1ubuntu2.2
+  - e2fsprogs==1.46.5-2ubuntu1.1
+  - findutils==4.8.0-1ubuntu3
+  - g++==4:11.2.0-1ubuntu1
+  - g++-11==11.4.0-1ubuntu1~22.04
+  - gcc==4:11.2.0-1ubuntu1
+  - gcc-11==11.4.0-1ubuntu1~22.04
+  - gcc-11-base==11.4.0-1ubuntu1~22.04
+  - gcc-12-base==12.3.0-1ubuntu1~22.04
+  - gir1.2-glib-2.0==1.72.0-1
+  - gir1.2-packagekitglib-1.0==1.2.5-2ubuntu2
+  - git==1:2.34.1-1ubuntu1.11
+  - git-man==1:2.34.1-1ubuntu1.11
+  - gnupg==2.2.27-3ubuntu2.1
+  - gnupg-l10n==2.2.27-3ubuntu2.1
+  - gnupg-utils==2.2.27-3ubuntu2.1
+  - gnupg2==2.2.27-3ubuntu2.1
+  - gpg==2.2.27-3ubuntu2.1
+  - gpg-agent==2.2.27-3ubuntu2.1
+  - gpg-wks-client==2.2.27-3ubuntu2.1
+  - gpg-wks-server==2.2.27-3ubuntu2.1
+  - gpgconf==2.2.27-3ubuntu2.1
+  - gpgsm==2.2.27-3ubuntu2.1
+  - gpgv==2.2.27-3ubuntu2.1
+  - grep==3.7-1build1
+  - gzip==1.10-4ubuntu4.1
+  - hostname==3.23ubuntu2
+  - init-system-helpers==1.62
+  - iso-codes==4.9.0-1
+  - less==590-1ubuntu0.22.04.3
+  - libacl1==2.3.1-1
+  - libapparmor1==3.0.4-2ubuntu2.3
+  - libappstream4==0.15.2-2
+  - libapt-pkg6.0==2.4.10
+  - libargon2-1==0~20171227-0.3
+  - libasan6==11.4.0-1ubuntu1~22.04
+  - libassuan0==2.5.5-1build1
+  - libatomic1==12.3.0-1ubuntu1~22.04
+  - libattr1==1:2.5.1-1build1
+  - libaudit-common==1:3.0.7-1build1
+  - libaudit1==1:3.0.7-1build1
+  - libbinutils==2.38-4ubuntu2.3
+  - libblkid1==2.37.2-4ubuntu3
+  - libbrotli1==1.0.9-2build6
+  - libbsd0==0.11.5-1
+  - libbz2-1.0==1.0.8-5build1
+  - libc-bin==2.35-0ubuntu3.4
+  - libc-dev-bin==2.35-0ubuntu3.4
+  - libc6==2.35-0ubuntu3.4
+  - libc6-dev==2.35-0ubuntu3.4
+  - libcap-ng0==0.7.9-2.2build3
+  - libcap2==1:2.44-1ubuntu0.22.04.1
+  - libcap2-bin==1:2.44-1ubuntu0.22.04.1
+  - libcbor0.8==0.8.0-2ubuntu1
+  - libcc1-0==12.3.0-1ubuntu1~22.04
+  - libcom-err2==1.46.5-2ubuntu1.1
+  - libcrypt-dev==1:4.4.27-1
+  - libcrypt1==1:4.4.27-1
+  - libcryptsetup12==2:2.4.3-1ubuntu1.2
+  - libctf-nobfd0==2.38-4ubuntu2.3
+  - libctf0==2.38-4ubuntu2.3
+  - libcublas-12-0==12.0.2.224-1
+  - libcublas-dev-12-0==12.0.2.224-1
+  - libcufft-12-0==11.0.1.95-1
+  - libcufft-dev-12-0==11.0.1.95-1
+  - libcufile-12-0==1.5.1.14-1
+  - libcufile-dev-12-0==1.5.1.14-1
+  - libcurand-12-0==10.3.1.124-1
+  - libcurand-dev-12-0==10.3.1.124-1
+  - libcurl3-gnutls==7.81.0-1ubuntu1.17
+  - libcurl4==7.81.0-1ubuntu1.17
+  - libcusolver-12-0==11.4.3.1-1
+  - libcusolver-dev-12-0==11.4.3.1-1
+  - libcusparse-12-0==12.0.1.140-1
+  - libcusparse-dev-12-0==12.0.1.140-1
+  - libdb5.3==5.3.28+dfsg1-0.8ubuntu3
+  - libdbus-1-3==1.12.20-2ubuntu4.1
+  - libdebconfclient0==0.261ubuntu1
+  - libdevmapper1.02.1==2:1.02.175-2.1ubuntu4
+  - libdpkg-perl==1.21.1ubuntu2.2
+  - libdw1==0.186-1build1
+  - libedit2==3.1-20210910-1build1
+  - libelf1==0.186-1build1
+  - liberror-perl==0.17029-1
+  - libevent-core-2.1-7==2.1.12-stable-1build3
+  - libexpat1==2.4.7-1ubuntu0.3
+  - libext2fs2==1.46.5-2ubuntu1.1
+  - libffi8==3.4.2-4
+  - libfido2-1==1.10.0-1
+  - libgcc-11-dev==11.4.0-1ubuntu1~22.04
+  - libgcc-s1==12.3.0-1ubuntu1~22.04
+  - libgcrypt20==1.9.4-3ubuntu3
+  - libgdbm-compat4==1.23-1
+  - libgdbm6==1.23-1
+  - libgirepository-1.0-1==1.72.0-1
+  - libglib2.0-0==2.72.4-0ubuntu2.3
+  - libglib2.0-bin==2.72.4-0ubuntu2.3
+  - libglib2.0-data==2.72.4-0ubuntu2.3
+  - libgmp10==2:6.2.1+dfsg-3ubuntu1
+  - libgnutls30==3.7.3-4ubuntu1.2
+  - libgomp1==12.3.0-1ubuntu1~22.04
+  - libgpg-error0==1.43-3
+  - libgssapi-krb5-2==1.19.2-2ubuntu0.2
+  - libgstreamer1.0-0==1.20.3-0ubuntu1
+  - libhogweed6==3.7.3-1build2
+  - libicu70==70.1-2
+  - libidn2-0==2.3.2-2build1
+  - libip4tc2==1.8.7-1ubuntu5.2
+  - libisl23==0.24-2build1
+  - libitm1==12.3.0-1ubuntu1~22.04
+  - libjson-c5==0.15-3~ubuntu1.22.04.2
+  - libk5crypto3==1.19.2-2ubuntu0.2
+  - libkeyutils1==1.6.1-2ubuntu3
+  - libkmod2==29-1ubuntu1
+  - libkrb5-3==1.19.2-2ubuntu0.2
+  - libkrb5support0==1.19.2-2ubuntu0.2
+  - libksba8==1.6.0-2ubuntu0.2
+  - libldap-2.5-0==2.5.16+dfsg-0ubuntu0.22.04.1
+  - liblsan0==12.3.0-1ubuntu1~22.04
+  - liblz4-1==1.9.3-2build2
+  - liblzma5==5.2.5-2ubuntu1
+  - libmd0==1.0.4-1build1
+  - libmount1==2.37.2-4ubuntu3
+  - libmpc3==1.2.1-2build1
+  - libmpdec3==2.5.1-2build2
+  - libmpfr6==4.1.0-3build3
+  - libnccl-dev==2.16.5-1+cuda12.0
+  - libnccl2==2.16.5-1+cuda12.0
+  - libncurses6==6.3-2ubuntu0.1
+  - libncursesw6==6.3-2ubuntu0.1
+  - libnettle8==3.7.3-1build2
+  - libnghttp2-14==1.43.0-1ubuntu0.2
+  - libnpp-12-0==12.0.1.104-1
+  - libnpp-dev-12-0==12.0.1.104-1
+  - libnpth0==1.6-3build2
+  - libnsl-dev==1.3.0-2build2
+  - libnsl2==1.3.0-2build2
+  - libnvjitlink-12-0==12.0.140-1
+  - libnvjitlink-dev-12-0==12.0.140-1
+  - libnvjpeg-12-0==12.0.1.102-1
+  - libnvjpeg-dev-12-0==12.0.1.102-1
+  - libp11-kit0==0.24.0-6build1
+  - libpackagekit-glib2-18==1.2.5-2ubuntu2
+  - libpam-modules==1.4.0-11ubuntu2.3
+  - libpam-modules-bin==1.4.0-11ubuntu2.3
+  - libpam-runtime==1.4.0-11ubuntu2.3
+  - libpam-systemd==249.11-0ubuntu3.12
+  - libpam0g==1.4.0-11ubuntu2.3
+  - libpcre2-8-0==10.39-3ubuntu0.1
+  - libpcre3==2:8.39-13ubuntu0.22.04.1
+  - libperl5.34==5.34.0-3ubuntu1.2
+  - libpolkit-agent-1-0==0.105-33
+  - libpolkit-gobject-1-0==0.105-33
+  - libpopt0==1.18-3build1
+  - libprocps8==2:3.3.17-6ubuntu2
+  - libpsl5==0.21.0-1.2build2
+  - libpython3-stdlib==3.10.6-1~22.04
+  - libpython3.10-minimal==3.10.12-1~22.04.5
+  - libpython3.10-stdlib==3.10.12-1~22.04.5
+  - libquadmath0==12.3.0-1ubuntu1~22.04
+  - libreadline8==8.1.2-1
+  - librtmp1==2.4+20151223.gitfa8646d.1-2build4
+  - libsasl2-2==2.1.27+dfsg2-3ubuntu1.2
+  - libsasl2-modules-db==2.1.27+dfsg2-3ubuntu1.2
+  - libseccomp2==2.5.3-2ubuntu2
+  - libselinux1==3.3-1build2
+  - libsemanage-common==3.3-1build2
+  - libsemanage2==3.3-1build2
+  - libsepol2==3.3-1build1
+  - libsmartcols1==2.37.2-4ubuntu3
+  - libsqlite3-0==3.37.2-2ubuntu0.1
+  - libss2==1.46.5-2ubuntu1.1
+  - libssh-4==0.9.6-2ubuntu0.22.04.3
+  - libssl3==3.0.2-0ubuntu1.10
+  - libstdc++-11-dev==11.4.0-1ubuntu1~22.04
+  - libstdc++6==12.3.0-1ubuntu1~22.04
+  - libstemmer0d==2.2.0-1build1
+  - libsystemd0==249.11-0ubuntu3.12
+  - libtasn1-6==4.18.0-4build1
+  - libtinfo6==6.3-2ubuntu0.1
+  - libtirpc-common==1.3.2-2ubuntu0.1
+  - libtirpc-dev==1.3.2-2ubuntu0.1
+  - libtirpc3==1.3.2-2ubuntu0.1
+  - libtsan0==11.4.0-1ubuntu1~22.04
+  - libubsan1==12.3.0-1ubuntu1~22.04
+  - libudev1==249.11-0ubuntu3.10
+  - libunistring2==1.0-1
+  - libunwind8==1.3.2-2build2.1
+  - libutempter0==1.2.1-2build2
+  - libuuid1==2.37.2-4ubuntu3
+  - libwrap0==7.6.q-31build2
+  - libxml2==2.9.13+dfsg-1ubuntu0.4
+  - libxmlb2==0.3.6-2build1
+  - libxxhash0==0.8.1-1
+  - libyaml-0-2==0.2.2-1build2
+  - libzstd1==1.4.8+dfsg-3build1
+  - linux-libc-dev==5.15.0-88.98
+  - locales==2.35-0ubuntu3.8
+  - login==1:4.8.1-2ubuntu2.1
+  - logsave==1.46.5-2ubuntu1.1
+  - lsb-base==11.1.0ubuntu4
+  - lsb-release==11.1.0ubuntu4
+  - lto-disabled-list==24
+  - make==4.3-4.1build1
+  - mawk==1.3.4.20200120-3
+  - media-types==7.0.0
+  - mount==2.37.2-4ubuntu3
+  - ncurses-base==6.3-2ubuntu0.1
+  - ncurses-bin==6.3-2ubuntu0.1
+  - nsight-compute-2022.4.1==2022.4.1.6-1
+  - openssh-client==1:8.9p1-3ubuntu0.10
+  - openssh-server==1:8.9p1-3ubuntu0.10
+  - openssh-sftp-server==1:8.9p1-3ubuntu0.10
+  - openssl==3.0.2-0ubuntu1.12
+  - packagekit==1.2.5-2ubuntu2
+  - passwd==1:4.8.1-2ubuntu2.1
+  - patch==2.7.6-7build2
+  - perl==5.34.0-3ubuntu1.2
+  - perl-base==5.34.0-3ubuntu1.2
+  - perl-modules-5.34==5.34.0-3ubuntu1.2
+  - pinentry-curses==1.1.1-1build2
+  - pkexec==0.105-33
+  - policykit-1==0.105-33
+  - polkitd==0.105-33
+  - procps==2:3.3.17-6ubuntu2
+  - python-apt-common==2.4.0ubuntu3
+  - python3==3.10.6-1~22.04
+  - python3-apt==2.4.0ubuntu3
+  - python3-blinker==1.4+dfsg1-0.4
+  - python3-cffi-backend==1.15.0-1build2
+  - python3-cryptography==3.4.8-1ubuntu2.2
+  - python3-dbus==1.2.18-3build1
+  - python3-distro==1.7.0-1
+  - python3-gi==3.42.1-0ubuntu1
+  - python3-httplib2==0.20.2-2
+  - python3-importlib-metadata==4.6.4-1
+  - python3-jeepney==0.7.1-3
+  - python3-jwt==2.3.0-1ubuntu0.2
+  - python3-keyring==23.5.0-1
+  - python3-launchpadlib==1.10.16-1
+  - python3-lazr.restfulclient==0.14.4-1
+  - python3-lazr.uri==1.0.6-2
+  - python3-minimal==3.10.6-1~22.04
+  - python3-more-itertools==8.10.0-2
+  - python3-oauthlib==3.2.0-1ubuntu0.1
+  - python3-pkg-resources==59.6.0-1.2ubuntu0.22.04.1
+  - python3-pyparsing==2.4.7-1
+  - python3-secretstorage==3.3.1-1
+  - python3-six==1.16.0-3ubuntu1
+  - python3-software-properties==0.99.22.9
+  - python3-wadllib==1.3.6-1
+  - python3-zipp==1.0.0-3ubuntu0.1
+  - python3.10==3.10.12-1~22.04.5
+  - python3.10-minimal==3.10.12-1~22.04.5
+  - readline-common==8.1.2-1
+  - rpcsvc-proto==1.4.2-0ubuntu6
+  - rsync==3.2.7-0ubuntu0.22.04.2
+  - sed==4.8-1ubuntu2
+  - sensible-utils==0.0.17
+  - software-properties-common==0.99.22.9
+  - sudo==1.9.9-1ubuntu2.4
+  - systemd==249.11-0ubuntu3.12
+  - systemd-sysv==249.11-0ubuntu3.12
+  - sysvinit-utils==3.01-1ubuntu1
+  - tar==1.34+dfsg-1ubuntu0.1.22.04.1
+  - tmux==3.2a-4ubuntu0.2
+  - ubuntu-keyring==2021.03.26
+  - ucf==3.0043
+  - usrmerge==25ubuntu2
+  - util-linux==2.37.2-4ubuntu3
+  - wget==1.21.2-2ubuntu1.1
+  - xz-utils==5.2.5-2ubuntu1
+  - zlib1g==1:1.2.11.dfsg-2ubuntu9.2
+  machine: x86_64
+  os: Linux
+  os_version: '#129~20.04.1-Ubuntu SMP Wed Aug 7 13:07:13 UTC 2024'
+  processor: x86_64
+  release: 5.15.0-119-generic