diff --git a/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/experiment.yaml b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/experiment.yaml new file mode 100644 index 0000000..cec812c --- /dev/null +++ b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/experiment.yaml @@ -0,0 +1,6 @@ +comment: 1x 1x RTX A5000 +experiment: vllm_16g +experiment_hash: exp_hash_v1:a37636 +run_id: vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000 +slug: 1x_1x_rtx_a5000 +timestamp: 2024-08-27_14-21-55 diff --git a/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output.yaml b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output.yaml new file mode 100644 index 0000000..80115b9 --- /dev/null +++ b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output.yaml @@ -0,0 +1,8 @@ +Count to 1000, skip unpopular numbers: 47e766e6b9c4a9530cca0163f395f6bf814a5977b1543e330c7cf9985e68b3f9304c9b58901989b36647313fb3279a276fcbd9abc107f3a84d982ed8713b0c7e +Describe justice system in UK vs USA in 2000-5000 words: 331a2d29206403d9a76027eb005c9d5e202bcc26d49dc7dd31a4026b612bbafca22264f75cb22548174bdc4675b7a8acb5892d3dd8203b81991bc3fb25995d6b +Describe schooling system in UK vs USA in 2000-5000 words: 0621808e3ca6777d55024775b4b01ca1b7fa452f713137c72be02ba8c30683b5d19dfb6241ef1605e10d1b90b541f08e15df632e77960ff15b370163fb814d0b +Explain me some random problem for me in 2000-5000 words: 9f05037cdf6126545027c496e06b7a86b987364e1e07664948b2a5190ff955f7e81c2d3f792752b621f7180e46a6c5c5f379be579f7aeb89ed87722cfaa41f5d +Tell me entire history of USA: 318fd13f7cbee849f351f5919364f34a8fcafe202050d91d630b3c10523075516d9eaff838f808e3c0ba785dcf857fa03a8a5308b002ffb323a74d4d04f5e3f4 +Write a ballad. Pick a random theme.: 1d5e78e142e62ddeb914053bec41f9987a5b3d9cdf2fd39ac4835460c2b003fa3991799ed0fb9bd4ad068b6a2d63b19c80ebc2e99c29286205c8db71b361929b +Write an epic story about a dragon and a knight: b5901ec5d261c379acec9180c658e024b5665b3b9a1c52202647a68f7f125467e65a35b3e941e8c7a669689b73cfce49273eacd3e59fd2d3d5c8a0c0686dd9fb +Write an essay about being a Senior developer.: a6068b4300b724b84ee1ba949824a4647430b745162e21acb85cc4bb9704dc8316ad04d740e18c8c666a4dabf2eee98d3c443b7cb350e6f1441f4c42de5a2f25 diff --git a/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/run.local.log b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/run.local.log new file mode 100644 index 0000000..d11f01a --- /dev/null +++ b/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/run.local.log @@ -0,0 +1,15 @@ +2024-08-27 14:21:55,507 - __main__ - INFO - Starting experiment vllm_16g with comment: 1x 1x RTX A5000 +2024-08-27 14:21:55,510 - __main__ - INFO - Local log file: /home/rooter/dev/bac/deterministic-ml/tests/integration/results/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/run.local.log +2024-08-27 14:21:55,644 - paramiko.transport - INFO - Connected (version 2.0, client OpenSSH_8.9p1) +2024-08-27 14:21:55,891 - paramiko.transport - INFO - Auth banner: b'Welcome to vast.ai. If authentication fails, try again after a few seconds, and double check your ssh key.\nHave fun!\n' +2024-08-27 14:21:55,896 - paramiko.transport - INFO - Authentication (publickey) successful! +2024-08-27 14:21:55,899 - __main__ - INFO - Syncing files to remote +2024-08-27 14:21:56,106 - tools.ssh - INFO - Command: 'mkdir -p ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output' stdout: '' stderr: '' status_code: 0 +2024-08-27 14:21:59,804 - __main__ - INFO - Setting up remote environment +2024-08-27 14:23:02,082 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n curl -LsSf https://astral.sh/uv/install.sh | sh\n export PATH=$HOME/.cargo/bin:$PATH\n \n cd ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n uv venv -p python3.11 --python-preference managed\n source .venv/bin/activate \n uv pip install ./deterministic_ml*.whl pyyaml -r vllm_16g/requirements.txt\n ' stdout: "installing to /root/.cargo/bin\n uv\n uvx\neverything's installed!\n\nTo add $HOME/.cargo/bin to your PATH, either restart your shell or run:\n\n source $HOME/.cargo/env (sh, bash, zsh)\n source $HOME/.cargo/env.fish (fish)\n" stderr: "+ curl -LsSf https://astral.sh/uv/install.sh\n+ sh\ndownloading uv 0.3.4 x86_64-unknown-linux-gnu\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ cd /root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n+ uv venv -p python3.11 --python-preference managed\nUsing Python 3.11.9\nCreating virtualenv at: .venv\nActivate with: source .venv/bin/activate\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-21-55_1x_1x_rtx_a5000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-21-55_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-21-55_1x_1x_rtx_a5000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ uv pip install ./deterministic_ml-0.1.dev7+ge44d014.d20240827-py3-none-any.whl pyyaml -r vllm_16g/requirements.txt\nResolved 124 packages in 2.08s\nPrepared 124 packages in 39.29s\nInstalled 124 packages in 10.29s\n + aiohappyeyeballs==2.4.0\n + aiohttp==3.10.5\n + aiosignal==1.3.1\n + annotated-types==0.7.0\n + anyio==4.4.0\n + attrs==24.2.0\n + audioread==3.0.1\n + certifi==2024.7.4\n + cffi==1.17.0\n + charset-normalizer==3.3.2\n + click==8.1.7\n + cloudpickle==3.0.0\n + datasets==2.21.0\n + decorator==5.1.1\n + deterministic-ml==0.1.dev7+ge44d014.d20240827 (from file:///root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/deterministic_ml-0.1.dev7+ge44d014.d20240827-py3-none-any.whl)\n + dill==0.3.8\n + diskcache==5.6.3\n + distro==1.9.0\n + fastapi==0.112.2\n + filelock==3.15.4\n + frozenlist==1.4.1\n + fsspec==2024.6.1\n + gguf==0.9.1\n + h11==0.14.0\n + httpcore==1.0.5\n + httptools==0.6.1\n + httpx==0.27.1\n + huggingface-hub==0.24.6\n + idna==3.8\n + importlib-metadata==8.4.0\n + interegular==0.3.3\n + jinja2==3.1.4\n + jiter==0.5.0\n + joblib==1.4.2\n + jsonschema==4.23.0\n + jsonschema-specifications==2023.12.1\n + lark==1.2.2\n + lazy-loader==0.4\n + librosa==0.10.2.post1\n + llvmlite==0.43.0\n + lm-format-enforcer==0.10.6\n + markupsafe==2.1.5\n + mpmath==1.3.0\n + msgpack==1.0.8\n + msgspec==0.18.6\n + multidict==6.0.5\n + multiprocess==0.70.16\n + nest-asyncio==1.6.0\n + networkx==3.3\n + numba==0.60.0\n + numpy==1.26.4\n + nvidia-cublas-cu12==12.1.3.1\n + nvidia-cuda-cupti-cu12==12.1.105\n + nvidia-cuda-nvrtc-cu12==12.1.105\n + nvidia-cuda-runtime-cu12==12.1.105\n + nvidia-cudnn-cu12==9.1.0.70\n + nvidia-cufft-cu12==11.0.2.54\n + nvidia-curand-cu12==10.3.2.106\n + nvidia-cusolver-cu12==11.4.5.107\n + nvidia-cusparse-cu12==12.1.0.106\n + nvidia-ml-py==12.560.30\n + nvidia-nccl-cu12==2.20.5\n + nvidia-nvjitlink-cu12==12.6.20\n + nvidia-nvtx-cu12==12.1.105\n + openai==1.42.0\n + outlines==0.0.46\n + packaging==24.1\n + pandas==2.2.2\n + pillow==10.4.0\n + platformdirs==4.2.2\n + pooch==1.8.2\n + prometheus-client==0.20.0\n + prometheus-fastapi-instrumentator==7.0.0\n + protobuf==5.27.3\n + psutil==6.0.0\n + py-cpuinfo==9.0.0\n + pyairports==2.1.1\n + pyarrow==17.0.0\n + pycountry==24.6.1\n + pycparser==2.22\n + pydantic==2.8.2\n + pydantic-core==2.20.1\n + python-dateutil==2.9.0.post0\n + python-dotenv==1.0.1\n + pytz==2024.1\n + pyyaml==6.0.2\n + pyzmq==26.2.0\n + ray==2.34.0\n + referencing==0.35.1\n + regex==2024.7.24\n + requests==2.32.3\n + rpds-py==0.20.0\n + safetensors==0.4.4\n + scikit-learn==1.5.1\n + scipy==1.14.1\n + sentencepiece==0.2.0\n + setuptools==73.0.1\n + six==1.16.0\n + sniffio==1.3.1\n + soundfile==0.12.1\n + soxr==0.5.0\n + starlette==0.38.2\n + sympy==1.13.2\n + threadpoolctl==3.5.0\n + tiktoken==0.7.0\n + tokenizers==0.19.1\n + torch==2.4.0\n + torchvision==0.19.0\n + tqdm==4.66.5\n + transformers==4.44.2\n + triton==3.0.0\n + typing-extensions==4.12.2\n + tzdata==2024.1\n + urllib3==2.2.2\n + uvicorn==0.30.6\n + uvloop==0.20.0\n + vllm==0.5.5\n + vllm-flash-attn==2.6.1\n + watchfiles==0.23.0\n + websockets==13.0\n + xformers==0.0.27.post2\n + xxhash==3.5.0\n + yarl==1.9.4\n + zipp==3.20.1\n" status_code: 0 +2024-08-27 14:23:02,091 - __main__ - INFO - Gathering system info +2024-08-27 14:23:06,086 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m deterministic_ml._internal.sysinfo > ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-21-55_1x_1x_rtx_a5000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-21-55_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-21-55_1x_1x_rtx_a5000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:23:06,090 - __main__ - INFO - Running experiment code on remote +2024-08-27 14:26:53,904 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output | tee ~/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:23:17 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:23:20 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:23:20 selector.py:116] Using XFormers backend.\nINFO 08-27 12:23:21 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:23:21 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:23:21 selector.py:116] Using XFormers backend.\nINFO 08-27 12:23:22 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 12:24:34 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 12:24:35 gpu_executor.py:121] # GPU blocks: 2004, # CPU blocks: 682\nmodel loading took 80.13 seconds\nStarting 8 responses generation\n8 responses generation took 134.87 seconds\n{'Count to 1000, skip unpopular numbers': '47e766e6b9c4a9530cca0163f395f6bf814a5977b1543e330c7cf9985e68b3f9304c9b58901989b36647313fb3279a276fcbd9abc107f3a84d982ed8713b0c7e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '331a2d29206403d9a76027eb005c9d5e202bcc26d49dc7dd31a4026b612bbafca22264f75cb22548174bdc4675b7a8acb5892d3dd8203b81991bc3fb25995d6b',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '0621808e3ca6777d55024775b4b01ca1b7fa452f713137c72be02ba8c30683b5d19dfb6241ef1605e10d1b90b541f08e15df632e77960ff15b370163fb814d0b',\n 'Explain me some random problem for me in 2000-5000 words': '9f05037cdf6126545027c496e06b7a86b987364e1e07664948b2a5190ff955f7e81c2d3f792752b621f7180e46a6c5c5f379be579f7aeb89ed87722cfaa41f5d',\n 'Tell me entire history of USA': '318fd13f7cbee849f351f5919364f34a8fcafe202050d91d630b3c10523075516d9eaff838f808e3c0ba785dcf857fa03a8a5308b002ffb323a74d4d04f5e3f4',\n 'Write a ballad. Pick a random theme.': '1d5e78e142e62ddeb914053bec41f9987a5b3d9cdf2fd39ac4835460c2b003fa3991799ed0fb9bd4ad068b6a2d63b19c80ebc2e99c29286205c8db71b361929b',\n 'Write an epic story about a dragon and a knight': 'b5901ec5d261c379acec9180c658e024b5665b3b9a1c52202647a68f7f125467e65a35b3e941e8c7a669689b73cfce49273eacd3e59fd2d3d5c8a0c0686dd9fb',\n 'Write an essay about being a Senior developer.': 'a6068b4300b724b84ee1ba949824a4647430b745162e21acb85cc4bb9704dc8316ad04d740e18c8c666a4dabf2eee98d3c443b7cb350e6f1441f4c42de5a2f25'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-21-55_1x_1x_rtx_a5000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-21-55_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-21-55_1x_1x_rtx_a5000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-21-55_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-30-01_1x_1x_rtx_a5000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-30-01_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-30-01_1x_1x_rtx_a5000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:30:12,293 - __main__ - INFO - Running experiment code on remote +2024-08-27 14:32:48,675 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/output | tee ~/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:30:23 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:30:23 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:30:23 selector.py:116] Using XFormers backend.\nINFO 08-27 12:30:24 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:30:24 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:30:24 selector.py:116] Using XFormers backend.\nINFO 08-27 12:30:25 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 12:30:29 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 12:30:31 gpu_executor.py:121] # GPU blocks: 2004, # CPU blocks: 682\nmodel loading took 9.64 seconds\nStarting 8 responses generation\n8 responses generation took 134.12 seconds\n{'Count to 1000, skip unpopular numbers': '47e766e6b9c4a9530cca0163f395f6bf814a5977b1543e330c7cf9985e68b3f9304c9b58901989b36647313fb3279a276fcbd9abc107f3a84d982ed8713b0c7e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '331a2d29206403d9a76027eb005c9d5e202bcc26d49dc7dd31a4026b612bbafca22264f75cb22548174bdc4675b7a8acb5892d3dd8203b81991bc3fb25995d6b',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '0621808e3ca6777d55024775b4b01ca1b7fa452f713137c72be02ba8c30683b5d19dfb6241ef1605e10d1b90b541f08e15df632e77960ff15b370163fb814d0b',\n 'Explain me some random problem for me in 2000-5000 words': '9f05037cdf6126545027c496e06b7a86b987364e1e07664948b2a5190ff955f7e81c2d3f792752b621f7180e46a6c5c5f379be579f7aeb89ed87722cfaa41f5d',\n 'Tell me entire history of USA': '318fd13f7cbee849f351f5919364f34a8fcafe202050d91d630b3c10523075516d9eaff838f808e3c0ba785dcf857fa03a8a5308b002ffb323a74d4d04f5e3f4',\n 'Write a ballad. Pick a random theme.': '1d5e78e142e62ddeb914053bec41f9987a5b3d9cdf2fd39ac4835460c2b003fa3991799ed0fb9bd4ad068b6a2d63b19c80ebc2e99c29286205c8db71b361929b',\n 'Write an epic story about a dragon and a knight': 'b5901ec5d261c379acec9180c658e024b5665b3b9a1c52202647a68f7f125467e65a35b3e941e8c7a669689b73cfce49273eacd3e59fd2d3d5c8a0c0686dd9fb',\n 'Write an essay about being a Senior developer.': 'a6068b4300b724b84ee1ba949824a4647430b745162e21acb85cc4bb9704dc8316ad04d740e18c8c666a4dabf2eee98d3c443b7cb350e6f1441f4c42de5a2f25'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-30-01_1x_1x_rtx_a5000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-30-01_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-30-01_1x_1x_rtx_a5000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-30-01_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-39-44_1x_1x_rtx_a5000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-39-44_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-39-44_1x_1x_rtx_a5000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:39:59,370 - __main__ - INFO - Running experiment code on remote +2024-08-27 14:42:36,467 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/output | tee ~/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:40:10 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:40:10 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:40:10 selector.py:116] Using XFormers backend.\nINFO 08-27 12:40:11 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:40:12 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:40:12 selector.py:116] Using XFormers backend.\nINFO 08-27 12:40:12 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 12:40:16 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 12:40:18 gpu_executor.py:121] # GPU blocks: 2004, # CPU blocks: 682\nmodel loading took 9.59 seconds\nStarting 8 responses generation\n8 responses generation took 134.79 seconds\n{'Count to 1000, skip unpopular numbers': '47e766e6b9c4a9530cca0163f395f6bf814a5977b1543e330c7cf9985e68b3f9304c9b58901989b36647313fb3279a276fcbd9abc107f3a84d982ed8713b0c7e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '331a2d29206403d9a76027eb005c9d5e202bcc26d49dc7dd31a4026b612bbafca22264f75cb22548174bdc4675b7a8acb5892d3dd8203b81991bc3fb25995d6b',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '0621808e3ca6777d55024775b4b01ca1b7fa452f713137c72be02ba8c30683b5d19dfb6241ef1605e10d1b90b541f08e15df632e77960ff15b370163fb814d0b',\n 'Explain me some random problem for me in 2000-5000 words': '9f05037cdf6126545027c496e06b7a86b987364e1e07664948b2a5190ff955f7e81c2d3f792752b621f7180e46a6c5c5f379be579f7aeb89ed87722cfaa41f5d',\n 'Tell me entire history of USA': '318fd13f7cbee849f351f5919364f34a8fcafe202050d91d630b3c10523075516d9eaff838f808e3c0ba785dcf857fa03a8a5308b002ffb323a74d4d04f5e3f4',\n 'Write a ballad. Pick a random theme.': '1d5e78e142e62ddeb914053bec41f9987a5b3d9cdf2fd39ac4835460c2b003fa3991799ed0fb9bd4ad068b6a2d63b19c80ebc2e99c29286205c8db71b361929b',\n 'Write an epic story about a dragon and a knight': 'b5901ec5d261c379acec9180c658e024b5665b3b9a1c52202647a68f7f125467e65a35b3e941e8c7a669689b73cfce49273eacd3e59fd2d3d5c8a0c0686dd9fb',\n 'Write an essay about being a Senior developer.': 'a6068b4300b724b84ee1ba949824a4647430b745162e21acb85cc4bb9704dc8316ad04d740e18c8c666a4dabf2eee98d3c443b7cb350e6f1441f4c42de5a2f25'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-39-44_1x_1x_rtx_a5000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-39-44_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-39-44_1x_1x_rtx_a5000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-39-44_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-44-48_1x_1x_rtx_a5000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-44-48_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-44-48_1x_1x_rtx_a5000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:45:54,056 - __main__ - INFO - Running experiment code on remote +2024-08-27 14:49:04,620 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/output | tee ~/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:46:05 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:46:06 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:46:06 selector.py:116] Using XFormers backend.\nINFO 08-27 12:46:08 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:46:08 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:46:08 selector.py:116] Using XFormers backend.\nINFO 08-27 12:46:09 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 12:47:20 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 12:47:21 gpu_executor.py:121] # GPU blocks: 2233, # CPU blocks: 682\nmodel loading took 78.82 seconds\nStarting 8 responses generation\n8 responses generation took 99.26 seconds\n{'Count to 1000, skip unpopular numbers': '47e766e6b9c4a9530cca0163f395f6bf814a5977b1543e330c7cf9985e68b3f9304c9b58901989b36647313fb3279a276fcbd9abc107f3a84d982ed8713b0c7e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '331a2d29206403d9a76027eb005c9d5e202bcc26d49dc7dd31a4026b612bbafca22264f75cb22548174bdc4675b7a8acb5892d3dd8203b81991bc3fb25995d6b',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '0621808e3ca6777d55024775b4b01ca1b7fa452f713137c72be02ba8c30683b5d19dfb6241ef1605e10d1b90b541f08e15df632e77960ff15b370163fb814d0b',\n 'Explain me some random problem for me in 2000-5000 words': '9f05037cdf6126545027c496e06b7a86b987364e1e07664948b2a5190ff955f7e81c2d3f792752b621f7180e46a6c5c5f379be579f7aeb89ed87722cfaa41f5d',\n 'Tell me entire history of USA': '318fd13f7cbee849f351f5919364f34a8fcafe202050d91d630b3c10523075516d9eaff838f808e3c0ba785dcf857fa03a8a5308b002ffb323a74d4d04f5e3f4',\n 'Write a ballad. Pick a random theme.': '1d5e78e142e62ddeb914053bec41f9987a5b3d9cdf2fd39ac4835460c2b003fa3991799ed0fb9bd4ad068b6a2d63b19c80ebc2e99c29286205c8db71b361929b',\n 'Write an epic story about a dragon and a knight': 'b5901ec5d261c379acec9180c658e024b5665b3b9a1c52202647a68f7f125467e65a35b3e941e8c7a669689b73cfce49273eacd3e59fd2d3d5c8a0c0686dd9fb',\n 'Write an essay about being a Senior developer.': 'a6068b4300b724b84ee1ba949824a4647430b745162e21acb85cc4bb9704dc8316ad04d740e18c8c666a4dabf2eee98d3c443b7cb350e6f1441f4c42de5a2f25'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-44-48_1x_1x_rtx_a5000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-44-48_1x_1x_rtx_a5000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-44-48_1x_1x_rtx_a5000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-44-48_1x_1x_rtx_a5000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-56-46_1x_rtx_4090 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-56-46_1x_rtx_4090\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-56-46_1x_rtx_4090) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:58:14,270 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:03:55,321 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/output | tee ~/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:58:26 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:58:29 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:58:29 selector.py:116] Using XFormers backend.\nINFO 08-27 12:58:33 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:58:33 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:58:33 selector.py:116] Using XFormers backend.\nINFO 08-27 12:58:33 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:01:35 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:01:37 gpu_executor.py:121] # GPU blocks: 2170, # CPU blocks: 682\nmodel loading took 196.25 seconds\nStarting 8 responses generation\n8 responses generation took 130.16 seconds\n{'Count to 1000, skip unpopular numbers': '546198e7a8aab75a2e7e18ced14cfa23be3fc19362496163b14eacca7406e3527f04efd67be916fb87705c100c2e2a81e8ccdaae5d389f862020ef8f3e73a8c4',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'd197beaf5d3a4d5b82f520a5845c76b5c6827d3bf1f5565c6e56f614b5f2cf588f5ab4790be2fd12f31f584923fbac4cd2b3a8b50f484221e9eb5c724cdca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '050a7ef858d9be0c386ceefbf2a729f0938201026477137d1a2066c1f2b135f3d3bdf0d17846a9c80fb5ab64296923e86b4ae794c60747d6784749a66d4f7e6e',\n 'Explain me some random problem for me in 2000-5000 words': 'f546a57138eb952dbb8dace7d46d06e976da96ea6e489b532824e4b42b68bdabea4d97c08ec38c4766a1827c247a550a6e542c572dcd0f7b1ca224415fcb5661',\n 'Tell me entire history of USA': '9e1870ab7240877ab7adbc86eb3b62ddf68e5a270020b454bb4c3a77a7a35a214b3eda6b819c2b6a5019bec4f9d712522f94860df377a2c3edcdc685861cb837',\n 'Write a ballad. Pick a random theme.': '30ee7bd7c5393814debaa7008acf5188ffba68af09f539bb141414376cdf322c76f81bdeca1a15a575659569cd1ddd392ff467c7cb4baec16b8cbc9fd41b018c',\n 'Write an epic story about a dragon and a knight': '6447f18aabf3bac7dd79a451a1b66f632061e7a447b25a5be8829c1501518126c64cb42f791441544902be06ea63e0450807303520bb4d3a111fbfff19641e68',\n 'Write an essay about being a Senior developer.': '6a25eb3c90d572794f575470cd8b916892db8777f7830bb0b87afc0e7126e90c090878a19294bc73a5b7a42c56de5ce04f84d368099f9d59df7a2e8d38b0ff3a'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-56-46_1x_rtx_4090 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-56-46_1x_rtx_4090\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-56-46_1x_rtx_4090) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-56-46_1x_rtx_4090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_14-58-04_1x_rtx_4080s '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-58-04_1x_rtx_4080s\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_14-58-04_1x_rtx_4080s) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 14:59:35,486 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:04:20,497 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/output | tee ~/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 12:59:46 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 12:59:48 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:59:48 selector.py:116] Using XFormers backend.\nINFO 08-27 12:59:50 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 12:59:50 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 12:59:50 selector.py:116] Using XFormers backend.\nINFO 08-27 12:59:50 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:01:03 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:01:04 gpu_executor.py:121] # GPU blocks: 1005, # CPU blocks: 682\nmodel loading took 80.54 seconds\nStarting 8 responses generation\nWARNING 08-27 13:02:06 scheduler.py:1242] Sequence group 7 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1\n8 responses generation took 191.26 seconds\n{'Count to 1000, skip unpopular numbers': '8826e45b35b12de22a71803cd67cfaf0e47f3d83adbc9c95d9026d864761d8a88850adb3bd115de8f28c73bccb09e7dec0c4f40f0c85972292a72bcc4c3a344e',\n 'Describe justice system in UK vs USA in 2000-5000 words': '4989a42c2e1fc4552d4f403d0e9b22b5e7144d813ea38a9c1490adc82eee1d1537408c1e0140a2726cafac0ebe8601661fd088f80bb539898a745f5ae41de5ca',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '85393953c469ff11238c24e7a8c4c8891f0b2f7cf80dc42b15c98c20f9ccb870b7ad4842f9272dd902f2bd7e3c04270adeba92e1e68979e6decd788d63df7ffa',\n 'Explain me some random problem for me in 2000-5000 words': '5a07c589a60d0c597ec31800171d07c0e5394beab896ad186472e8b589bd3998d1500076af33885addc03a116fc2bf807ebcef4beef2595d49dabae61b811ed0',\n 'Tell me entire history of USA': '61285b07965a2d1e456aa5a64450c68c3ef01377d2f6f6b87cb146932aeb3bff0ffb0b892c8fcf2c0ba65e6a8e514e78ec1f9bb1c50dc033e8d129049701d989',\n 'Write a ballad. Pick a random theme.': 'e13a867f6e7a044f0d90bcf4cca2e07373abf93bf072b66a664e1dd71f72092779797dec166559eae130d85a02660d60003c4f230e443848802ec0534abc6e6d',\n 'Write an epic story about a dragon and a knight': '29549b2b8a3ef34db9fd949e789c27c5418942548414cbc9f8e84d391e6693c5a0ac85018898d590918f584557d1820f5c58062a87c92d380a9ce42ba328e971',\n 'Write an essay about being a Senior developer.': '53a19fca3c796a5d7c5f2fa3a24577d71bb6a543e2ed367fd36be3b20b84bd17525adb389db04bec804e4134f63f58132029d1e456f4dee119226a40df024b7c'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_14-58-04_1x_rtx_4080s \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_14-58-04_1x_rtx_4080s\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_14-58-04_1x_rtx_4080s) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/output\n+ tee /root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_14-58-04_1x_rtx_4080s/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_15-26-43_1x_rtx_4090 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-26-43_1x_rtx_4090\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_15-26-43_1x_rtx_4090) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 15:26:54,977 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:29:31,245 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/output | tee ~/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 13:27:06 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 13:27:06 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:27:06 selector.py:116] Using XFormers backend.\nINFO 08-27 13:27:08 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 13:27:08 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:27:08 selector.py:116] Using XFormers backend.\nINFO 08-27 13:27:08 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:27:16 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:27:17 gpu_executor.py:121] # GPU blocks: 2222, # CPU blocks: 682\nmodel loading took 14.50 seconds\nStarting 8 responses generation\n8 responses generation took 128.13 seconds\n{'Count to 1000, skip unpopular numbers': '546198e7a8aab75a2e7e18ced14cfa23be3fc19362496163b14eacca7406e3527f04efd67be916fb87705c100c2e2a81e8ccdaae5d389f862020ef8f3e73a8c4',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'd197beaf5d3a4d5b82f520a5845c76b5c6827d3bf1f5565c6e56f614b5f2cf588f5ab4790be2fd12f31f584923fbac4cd2b3a8b50f484221e9eb5c724cdca973',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '050a7ef858d9be0c386ceefbf2a729f0938201026477137d1a2066c1f2b135f3d3bdf0d17846a9c80fb5ab64296923e86b4ae794c60747d6784749a66d4f7e6e',\n 'Explain me some random problem for me in 2000-5000 words': 'f546a57138eb952dbb8dace7d46d06e976da96ea6e489b532824e4b42b68bdabea4d97c08ec38c4766a1827c247a550a6e542c572dcd0f7b1ca224415fcb5661',\n 'Tell me entire history of USA': '9e1870ab7240877ab7adbc86eb3b62ddf68e5a270020b454bb4c3a77a7a35a214b3eda6b819c2b6a5019bec4f9d712522f94860df377a2c3edcdc685861cb837',\n 'Write a ballad. Pick a random theme.': '30ee7bd7c5393814debaa7008acf5188ffba68af09f539bb141414376cdf322c76f81bdeca1a15a575659569cd1ddd392ff467c7cb4baec16b8cbc9fd41b018c',\n 'Write an epic story about a dragon and a knight': '6447f18aabf3bac7dd79a451a1b66f632061e7a447b25a5be8829c1501518126c64cb42f791441544902be06ea63e0450807303520bb4d3a111fbfff19641e68',\n 'Write an essay about being a Senior developer.': '6a25eb3c90d572794f575470cd8b916892db8777f7830bb0b87afc0e7126e90c090878a19294bc73a5b7a42c56de5ce04f84d368099f9d59df7a2e8d38b0ff3a'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_15-26-43_1x_rtx_4090 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-26-43_1x_rtx_4090\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_15-26-43_1x_rtx_4090) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/output\n+ tee /root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_15-26-43_1x_rtx_4090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_15-27-49_1x_rtx_a6000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-27-49_1x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_15-27-49_1x_rtx_a6000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 15:28:01,180 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:32:00,318 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/output | tee ~/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 13:28:12 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 13:28:14 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:28:14 selector.py:116] Using XFormers backend.\nINFO 08-27 13:28:15 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 13:28:16 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:28:16 selector.py:116] Using XFormers backend.\nINFO 08-27 13:28:16 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:29:47 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:29:48 gpu_executor.py:121] # GPU blocks: 5895, # CPU blocks: 682\nmodel loading took 105.03 seconds\nStarting 8 responses generation\n8 responses generation took 121.58 seconds\n{'Count to 1000, skip unpopular numbers': '203c6738867a8bac3a1a2598ead9c1c83833c9a53417cf938307df27b4891105cba31b0b851f5a837e18c9caf1fd1f7a4715dc8fb42e360b596c14afec9654f9',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'c022baf3b0a1a8cfd1c25950bdb1a756b7777f7b9629eee2e8afb353e78496fabac73f45b2819e626c6e4a520e3ff5624edb1c241809da6aa0962db26342d34c',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '9350420a44aa35c8893c8197b617c3c39e53b69db19a35b49fb45d8b18dcf8795fe243e6afaba10a63d3f63526740a56c1bf3094295ddb10c73bc869f8769592',\n 'Explain me some random problem for me in 2000-5000 words': 'b811a80b587b01aaba45fc044f9530155531790b5d581984b62f03c44884a6f422b37be9a8f14fa146abbafbaf1452d3cf66eea3a8df9162ed4fb07ffcad0d81',\n 'Tell me entire history of USA': '5bb973bbaadb2b72c0bb0200686a2bffc86b62be393b871bd772b59c5685048799a6d97a9f146789de01059752c21497b891027d3f1d02de17606af5b1defd6e',\n 'Write a ballad. Pick a random theme.': '7a78ae643d5ca4c9439b27c17a51cd4070870083f5b99bfc8159bd366b730c1e1c6619a8e21519d030b9cfb86fe7c278e72c6a20bad49f4ad9d72b4bd14f20d1',\n 'Write an epic story about a dragon and a knight': '2c7b3a7204d586ac2a682cfd67a40a39ba8e3dc810ff96148c77e78eadc67fa71c9ac4be8711fbbd26bdb5f83654aa6b8070fd8a81dd0fef6bc33ba662f1d40a',\n 'Write an essay about being a Senior developer.': '726c63d347a91b7d077263ff00bc445ba2d20e69a10ed2057a4e6f9c1606c68e6c49da61410f8907e407f44b0cd4aadc39e70f73436a9d1353e48aa64b366228'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_15-27-49_1x_rtx_a6000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-27-49_1x_rtx_a6000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_15-27-49_1x_rtx_a6000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_15-27-49_1x_rtx_a6000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_15-36-20_1x_a100_sxm4 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-36-20_1x_a100_sxm4\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_15-36-20_1x_a100_sxm4) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 15:37:15,604 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:41:20,756 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/output | tee ~/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 13:37:26 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 13:37:28 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:37:28 selector.py:116] Using XFormers backend.\nINFO 08-27 13:37:30 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 13:37:30 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:37:30 selector.py:116] Using XFormers backend.\nINFO 08-27 13:37:30 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:39:01 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:39:02 gpu_executor.py:121] # GPU blocks: 4639, # CPU blocks: 682\nmodel loading took 104.30 seconds\nStarting 8 responses generation\n8 responses generation took 128.40 seconds\n{'Count to 1000, skip unpopular numbers': '4c4ad56d950159f19bdd12edb2174b7251dcab0d84817b0468b9e20c7360674e4f5e4501f48e79dcc1d159b3c4c5f831c48f9a87662434786035c4356b776b73',\n 'Describe justice system in UK vs USA in 2000-5000 words': '8a2c1f06ea082d73245e084f474b0f98993f389e678850645c711c7f4f7b489b969bf4b0c0b9db0b3773190a1933121bdcd64a2f14df2153e50f6bb93cd12013',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '1c2e330eaedb19b3b8a0fbce46fe7bd0410123be457d42ffdcb139d327a5a1bba025d1fd88b9f3405682a36f3459c2c562134f17c40e192a218526a860b6103c',\n 'Explain me some random problem for me in 2000-5000 words': '69f29410af4e74fb5b76a831fe61c74bcadc3cda83792388775e299aa85965c3a006b8744b33a853d2ebb9c6684e2290093b1a0dde46c00db8c451f3d6d4a10d',\n 'Tell me entire history of USA': 'b6104c38852a60aa9af5216131cb6de27435e14182caab650f7099daab41a6021472fcf29627454570cb1c85c7c5bd389bf67fa72d24187ba192b9b17cc089ee',\n 'Write a ballad. Pick a random theme.': '5c649e8eb9cbd2cc1fac4edd2482afae4fcaa03319d46be8d48d9780f8eb86428ad34a81865f6dd35f217fd7760bba8946b40163f6f35e78a6265e976fbd3164',\n 'Write an epic story about a dragon and a knight': '1844470f0e6e3d17730911fa3c2cbae252c04082653c91e3166f66dd859d49c22883c77c3106d6c7cf5734787876447b8f72ce2f5c3b0754555607c1711cda6c',\n 'Write an essay about being a Senior developer.': '6b58725db3040c3d384cdf2d9b297cd1037f65d2c6a647a7d028ee51e500a22fcdb9615331aa1b3b3b631a742e74a35f854677771f081a62ad80fdacccd69b5b'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_15-36-20_1x_a100_sxm4 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-36-20_1x_a100_sxm4\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_15-36-20_1x_a100_sxm4) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/output\n+ tee /root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_15-36-20_1x_a100_sxm4/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-27_15-54-05_1x_rtx_a4000 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-54-05_1x_rtx_a4000\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-27_15-54-05_1x_rtx_a4000) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-27 15:55:01,441 - __main__ - INFO - Running experiment code on remote +2024-08-27 15:59:05,647 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_16g ~/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/output | tee ~/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/output/stdout.txt' stdout: "gpu_count=1\nStarting model loading\nINFO 08-27 13:55:13 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, rope_scaling=None, rope_theta=None, tokenizer_revision=5a516f86087853f9d560c95eb9209c1d4ed9ff69, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-27 13:55:15 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:55:15 selector.py:116] Using XFormers backend.\nINFO 08-27 13:55:19 model_runner.py:879] Starting to load model microsoft/Phi-3-mini-4k-instruct...\nINFO 08-27 13:55:19 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-27 13:55:19 selector.py:116] Using XFormers backend.\nINFO 08-27 13:55:20 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-27 13:56:07 model_runner.py:890] Loading model weights took 7.1183 GB\nINFO 08-27 13:56:10 gpu_executor.py:121] # GPU blocks: 958, # CPU blocks: 682\nmodel loading took 63.08 seconds\nStarting 8 responses generation\n8 responses generation took 166.26 seconds\n{'Count to 1000, skip unpopular numbers': '7abefbad87e638bbcab79bb3a5614d6efe9a8e4e76d950479051c952259fa5c50b4343c7b930afe3329af7cbaa49989e6c64b5840c9a767768fe1226e8233645',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'b41fc6e907510d065d7d198a982dc7ff67c4c229453ba9a89ce5dc4eae9450f720b5ec373484d00933ede1181f88fe8c3e28ad70f7acccf1421b5e10a3c019f5',\n 'Describe schooling system in UK vs USA in 2000-5000 words': 'fd2f435afb1877967d3950c02df80f8bf1f362de6d961fbb5fa4e961312d35d9335e4b4022d76f1bd20ed14606f86366c38472638327b61529781b20f75e9348',\n 'Explain me some random problem for me in 2000-5000 words': '3a076c9a648c0502297a1a1008e1ffebecd57463ef1e3cf22d8e6afa71f8861b9434deba3f82e3b9e2200b9340b5f88c1bf7f05eef34dc876f479066235dae4b',\n 'Tell me entire history of USA': '98ee9ca87bdecfbeb8dc2a104c9bc6c4b902080eba64e4844f8308320d0be5cf5ab159e419ab636d150a7e51cc2434b4720f133d12da8eab793aa60c4549372b',\n 'Write a ballad. Pick a random theme.': '9da91bde53cbaa2f3a3ff27801a97fd52ab9e39dffa89c9c1d45074fef8fee5dd8542b49df2add9f9d64390ac902bcc99baa6256e29c12fc8f9dc971bc2cd154',\n 'Write an epic story about a dragon and a knight': '54722bd09195af7d447e635403a375f3131660d012732e3bebdd5485dce54169914c800cea87e68b50d5da713f69b01a224899cbc995607c8576765c20c5db37',\n 'Write an essay about being a Senior developer.': '44afb980ca4dea2c71b20e6ecdb925b7b9ddd28b195232b08219f9c64ecf468b7e0470ea088c3b2bd51957ea46e2d8a0526879f071de6500d69da115301644f4'}\n" stderr: '+ cd /root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-27_15-54-05_1x_rtx_a4000 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-27_15-54-05_1x_rtx_a4000\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-27_15-54-05_1x_rtx_a4000) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_16g /root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/output\n+ tee /root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/output/stdout.txt\n/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_16g/2024-08-27_15-54-05_1x_rtx_a4000/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00 ~/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/output/sysinfo.yaml' stdout: '' stderr: "+ cd /root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ '[' -n x ']'\n++ SCRIPT_PATH=.venv/bin/activate\n++ '[' .venv/bin/activate = bash ']'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ hash -r\n++ '[' -z '' ']'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ '[' '!' nondestructive = nondestructive ']'\n++ VIRTUAL_ENV=/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv\n++ '[' linux-gnu = cygwin ']'\n++ '[' linux-gnu = msys ']'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ '[' x2024-08-28_10-03-04_1x_rtx_3090 '!=' x ']'\n++ VIRTUAL_ENV_PROMPT=2024-08-28_10-03-04_1x_rtx_3090\n++ export VIRTUAL_ENV_PROMPT\n++ '[' -z '' ']'\n++ '[' -z '' ']'\n++ _OLD_VIRTUAL_PS1=\n++ PS1='(2024-08-28_10-03-04_1x_rtx_3090) '\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m deterministic_ml._internal.sysinfo\n" status_code: 0 +2024-08-28 10:03:14,020 - __main__ - INFO - Running experiment code on remote +2024-08-28 10:05:32,402 - tools.ssh - INFO - Command: '\n set -exo pipefail\n \n cd ~/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090\n export PATH=$HOME/.cargo/bin:$PATH\n source .venv/bin/activate;\n python -m vllm_phi35 ~/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/output | tee ~/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/output/stdout.txt' stdout: "Starting model loading\nINFO 08-28 08:03:24 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='microsoft/Phi-3.5-mini-instruct', speculative_config=None, tokenizer='microsoft/Phi-3.5-mini-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=cd6881a82d62252f5a84593c61acf290f15d89e3, rope_scaling=None, rope_theta=None, tokenizer_revision=cd6881a82d62252f5a84593c61acf290f15d89e3, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=6144, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Phi-3.5-mini-instruct, use_v2_block_manager=False, enable_prefix_caching=False)\nINFO 08-28 08:03:24 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-28 08:03:24 selector.py:116] Using XFormers backend.\nINFO 08-28 08:03:25 model_runner.py:879] Starting to load model microsoft/Phi-3.5-mini-instruct...\nINFO 08-28 08:03:25 selector.py:236] Cannot use FlashAttention-2 backend due to sliding window.\nINFO 08-28 08:03:25 selector.py:116] Using XFormers backend.\nINFO 08-28 08:03:26 weight_utils.py:236] Using model weights format ['*.safetensors']\nINFO 08-28 08:03:32 model_runner.py:890] Loading model weights took 7.1659 GB\nINFO 08-28 08:03:33 gpu_executor.py:121] # GPU blocks: 2217, # CPU blocks: 682\nmodel loading took 10.75 seconds\nStarting 8 responses generation\n8 responses generation took 116.15 seconds\n{'Count to 1000, skip unpopular numbers': 'fe0119a32e5cb10c8ef7b32824e01f72ed7dacb9619e4e54df43b05a8ce6d978586fcc462b8ef7734ca6331cb5e383f1621f246647a16b6420e2d48f5f63c15c',\n 'Describe justice system in UK vs USA in 2000-5000 words': 'f18f930c23ed062a36f753c4f45da2dee51c9e9c24f2314424e002a8b67cb5f6e9a2525ddcabe7e314c593ce6c86f96f05325f6d6bc98a859dfe5ca5ce8e9d98',\n 'Describe schooling system in UK vs USA in 2000-5000 words': '1902d972aedee49f8aca9d0dd03eff7997d143c84e274296db434c3d2e71b1171233d8b824f989861f51d120c8e70f792390136e311d72152bc52a3212cd29ad',\n 'Explain me some random problem for me in 2000-5000 words': '2953a6cc64465b5c62dcb8257d3292304ea3b9a43a2ce01c82bcd8a59f94b2cc01e1217d7769ceb248bd64c4f11e3f2a60ff79e986e3fb64443aae0f902ac58e',\n 'Tell me entire history of USA': '678c0e2d3fe6dab2eb6c3eba428f98c190b784fd35f16183025a341e4a08c98f39e6aa43d3ed2bf8dc96464d02ee20545f4db73bcb790c57d7e4db781499dfd5',\n 'Write a ballad. Pick a random theme.': 'a2ca9c83288ba84b99d33a391b7b83747a263486c23958d0c30eb35299cd33236371dc78ed4bd9d0e453185878fdc0317117d2b23507bc2bb341279ffdd5d9c4',\n 'Write an epic story about a dragon and a knight': 'b7e133385c527856f91c6d26e16fa5590074d349729aa45b0cc80f2e2df993cb4e7e4a30d2bae4265d310153a71e07086a0121083c895a8cad3406dd2922d945',\n 'Write an essay about being a Senior developer.': '9119725699bc047553df0eb90d3ba431d9e04c52b201cb4eaed599df120f66c08f06449454623447ec7f74b651bca652106bcba9950f881fae8ae27e0f29ffa4'}\n" stderr: '+ cd /root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090\n+ export PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n+ source .venv/bin/activate\n++ \'[\' -n x \']\'\n++ SCRIPT_PATH=.venv/bin/activate\n++ \'[\' .venv/bin/activate = bash \']\'\n++ deactivate nondestructive\n++ unset -f pydoc\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ hash -r\n++ \'[\' -z \'\' \']\'\n++ unset VIRTUAL_ENV\n++ unset VIRTUAL_ENV_PROMPT\n++ \'[\' \'!\' nondestructive = nondestructive \']\'\n++ VIRTUAL_ENV=/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv\n++ \'[\' linux-gnu = cygwin \']\'\n++ \'[\' linux-gnu = msys \']\'\n++ export VIRTUAL_ENV\n++ _OLD_VIRTUAL_PATH=/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ PATH=/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv/bin:/root/.cargo/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n++ export PATH\n++ \'[\' x2024-08-28_10-03-04_1x_rtx_3090 \'!=\' x \']\'\n++ VIRTUAL_ENV_PROMPT=2024-08-28_10-03-04_1x_rtx_3090\n++ export VIRTUAL_ENV_PROMPT\n++ \'[\' -z \'\' \']\'\n++ \'[\' -z \'\' \']\'\n++ _OLD_VIRTUAL_PS1=\n++ PS1=\'(2024-08-28_10-03-04_1x_rtx_3090) \'\n++ export PS1\n++ alias pydoc\n++ true\n++ hash -r\n+ python -m vllm_phi35 /root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/output\n+ tee /root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/output/stdout.txt\n/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:211: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_fwd")\n/root/experiments/vllm_phi35/2024-08-28_10-03-04_1x_rtx_3090/.venv/lib/python3.11/site-packages/xformers/ops/fmha/flash.py:344: FutureWarning: `torch.library.impl_abstract` was renamed to `torch.library.register_fake`. Please use that instead; we will remove `torch.library.impl_abstract` in a future version of PyTorch.\n @torch.library.impl_abstract("xformers_flash::flash_bwd")\n\rLoading safetensors checkpoint shards: 0% Completed | 0/2 [00:00