diff --git a/models/demos/llama3/demo/simple_text_demo.py b/models/demos/llama3/demo/simple_text_demo.py
index 67cb1e35e18..4cf6a04f57c 100644
--- a/models/demos/llama3/demo/simple_text_demo.py
+++ b/models/demos/llama3/demo/simple_text_demo.py
@@ -293,6 +293,10 @@ def test_llama_demo_text(
     if is_ci_env and (optimizations == LlamaOptimizations.accuracy or not ci_only):
         pytest.skip("CI only runs the CI-only tests")
 
+    # TODO This can be tackled by reducing the number of iterations we run on CI on N150/N300 machines
+    if is_ci_env and mesh_device.get_num_devices() < 4 and batch_size == 32:
+        pytest.skip("Some llama3 models may run out of memory with CI settings when batch_size=32")
+
     # TODO: Remove this once all batch sizes are supported on TG
     if os.environ.get("FAKE_DEVICE") == "TG" and batch_size not in [1, 32]:
         pytest.skip("TG only supports batch 1 and 32")
diff --git a/models/demos/llama3/lt b/models/demos/llama3/lt
index 31fea098e0e..0283e5d74be 100644
--- a/models/demos/llama3/lt
+++ b/models/demos/llama3/lt
@@ -839,6 +839,8 @@ def run_entry_command(entry, screen_lock, output_entries, screen_needs_update):
         "demo-acc": "pytest models/demos/llama3/demo/simple_text_demo.py -k accuracy-batch-1",
         "demo-32": "pytest models/demos/llama3/demo/simple_text_demo.py -k performance-batch-32",
         "demo-long": "pytest models/demos/llama3/demo/simple_text_demo.py -k performance-long",
+        "demo-ci-1": "pytest models/demos/llama3/demo/simple_text_demo.py -k performance-ci-1",
+        "demo-ci-32": "pytest models/demos/llama3/demo/simple_text_demo.py -k performance-ci-32",
         "attention": "pytest models/demos/llama3/tests/test_llama_attention.py",
         "attention-prefill": "pytest models/demos/llama3/tests/test_llama_attention_prefill.py",
         "mlp": "pytest models/demos/llama3/tests/test_llama_mlp.py",