diff --git a/tests/integration/experiments/vllm_16g/__main__.py b/tests/integration/experiments/vllm_16g/__main__.py
index bf551ae..e64efe0 100644
--- a/tests/integration/experiments/vllm_16g/__main__.py
+++ b/tests/integration/experiments/vllm_16g/__main__.py
@@ -37,7 +37,6 @@ def main():
     args = parser.parse_args()
 
     gpu_count = torch.cuda.device_count()
-    print(f"{gpu_count=}")
 
     model_name = args.model
     if "@" in model_name:
diff --git a/tests/integration/experiments/vllm_phi35/README.md b/tests/integration/experiments/vllm_phi35/README.md
new file mode 100644
index 0000000..fa120a6
--- /dev/null
+++ b/tests/integration/experiments/vllm_phi35/README.md
@@ -0,0 +1,3 @@
+Requirements:
+* 16GB HDD (should be ~14GB)
+* 24GB GPU vRAM
diff --git a/tests/integration/experiments/vllm_phi35/__init__.py b/tests/integration/experiments/vllm_phi35/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/integration/experiments/vllm_phi35/__main__.py b/tests/integration/experiments/vllm_phi35/__main__.py
new file mode 100644
index 0000000..9ececcc
--- /dev/null
+++ b/tests/integration/experiments/vllm_phi35/__main__.py
@@ -0,0 +1,121 @@
+import argparse
+import contextlib
+import io
+import pathlib
+import sys
+import time
+from pprint import pprint
+
+import torch
+import vllm
+import yaml
+from deterministic_ml.v1 import set_deterministic
+from vllm import SamplingParams
+
+SEED = 42
+
+set_deterministic(SEED)
+
+
+@contextlib.contextmanager
+def timed(name):
+    print(f"Starting {name}")
+    start = time.time()
+    yield
+    took = time.time() - start
+    print(f"{name} took {took:.2f} seconds")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("output_path", type=pathlib.Path, help="Path to save the output")
+    parser.add_argument(
+        "--model",
+        default="microsoft/Phi-3.5-mini-instruct@cd6881a82d62252f5a84593c61acf290f15d89e3",
+        help="Model name",
+    )
+    args = parser.parse_args()
+
+    gpu_count = torch.cuda.device_count()
+
+    model_name = args.model
+    if "@" in model_name:
+        model_name, revision = model_name.split("@")
+    else:
+        revision = None
+
+    with timed("model loading"):
+        model = vllm.LLM(
+            model=model_name,
+            revision=revision,
+            # quantization="AWQ",
+            tensor_parallel_size=gpu_count,
+            # quantization="AWQ",  # Ensure quantization is set if needed
+            # tensor_parallel_size=1,  # Set according to the number of GPUs available
+            max_model_len=6144,
+            enforce_eager=True,  # Ensure eager mode is enabled
+        )
+
+    def make_prompt(prompt):
+        role_templates = {
+            "system": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "user": "<|start_header_id|>user<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "assistant": "<|start_header_id|>assistant<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "end": "<|start_header_id|>assistant<|end_header_id|>",
+        }
+        msgs = [
+            {"role": "system", "content": "You are a helpful AI assistant"},
+            {"role": "user", "content": prompt},
+        ]
+        full_prompt = io.StringIO()
+        for msg in msgs:
+            full_prompt.write(role_templates[msg["role"]].format(msg["content"]))
+        full_prompt.write(role_templates["end"])
+        return full_prompt.getvalue()
+
+    sampling_params = SamplingParams(
+        max_tokens=4096,
+        temperature=0.5,
+        top_p=0.95,
+        seed=SEED,
+    )
+
+    def generate_responses(prompts: list[str]):
+        requests = [make_prompt(prompt) for prompt in prompts]
+        response = model.generate(requests, sampling_params, use_tqdm=True)
+        return response
+
+    import hashlib
+
+    output_hashes = {}
+    output_full = {}
+    prompts = [
+        "Count to 1000, skip unpopular numbers",
+        "Describe justice system in UK vs USA in 2000-5000 words",
+        "Describe schooling system in UK vs USA in 2000-5000 words",
+        "Explain me some random problem for me in 2000-5000 words",
+        "Tell me entire history of USA",
+        "Write a ballad. Pick a random theme.",
+        "Write an epic story about a dragon and a knight",
+        "Write an essay about being a Senior developer.",
+    ]
+
+    with timed(f"{len(prompts)} responses generation"):
+        for prompt, r in zip(prompts, generate_responses(prompts)):
+            hasher = hashlib.blake2b()
+            text_response = r.outputs[0].text
+            output_full[prompt] = text_response
+            hasher.update(text_response.encode("utf8"))
+            output_hashes[prompt] = hasher.hexdigest()
+        sys.stderr.flush()
+
+    pprint(output_hashes)
+    with open(args.output_path / "output.yaml", "w") as f:
+        yaml.safe_dump(output_hashes, f, sort_keys=True)
+
+    with open(args.output_path / "output_full.yaml", "w") as f:
+        yaml.safe_dump(output_full, f, sort_keys=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/experiments/vllm_phi35/requirements.txt b/tests/integration/experiments/vllm_phi35/requirements.txt
new file mode 100644
index 0000000..2158a90
--- /dev/null
+++ b/tests/integration/experiments/vllm_phi35/requirements.txt
@@ -0,0 +1,4 @@
+setuptools
+torch
+pyyaml
+vllm