From a8f645938d4dda3b5f8a8e922d02b6501948b084 Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Wed, 4 Sep 2024 18:27:03 +0800
Subject: [PATCH 1/6] adjust default values

---
 src/compute_horde_prompt_gen/prompt.py | 3 +--
 src/compute_horde_prompt_gen/run.py    | 4 ++--
 src/compute_horde_prompt_gen/utils.py  | 3 ++-
 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/compute_horde_prompt_gen/prompt.py b/src/compute_horde_prompt_gen/prompt.py
index 84828bc..d97f049 100644
--- a/src/compute_horde_prompt_gen/prompt.py
+++ b/src/compute_horde_prompt_gen/prompt.py
@@ -11,7 +11,6 @@ def random_select(self, arr: list[str], num: int = 5) -> str:
         return ", ".join(arr[:num]) + ", etc"
 
     def generate_prompt(self) -> str:
-        num_prompts = random.choice([10, 15, 20, 25, 30])
         relevance_level = random.randint(5, 20)
         complexity_level = random.randint(5, 20)
 
@@ -20,7 +19,7 @@ def generate_prompt(self) -> str:
         formats = self.random_select(FORMATS, num=5)
 
         prompt = (
-            f"Generate a list of {num_prompts} complex prompts (questions or instruct tasks) that cover a wide range of skills and knowledge areas related to the themes of {themes}. "
+            f"Generate a list of 5 complex prompts (questions or instruct tasks) that cover a wide range of skills and knowledge areas related to the themes of {themes}. "
             f"Each of these prompts should: "
             f"\n- have a complexity level of {complexity_level} out of 20 and a relevance level to the theme of {relevance_level} out of 20"
             f"\n- test various cognitive abilities ({abilities}) and require different types of writting formats ({formats})"
diff --git a/src/compute_horde_prompt_gen/run.py b/src/compute_horde_prompt_gen/run.py
index bbd9666..4b12eaf 100644
--- a/src/compute_horde_prompt_gen/run.py
+++ b/src/compute_horde_prompt_gen/run.py
@@ -71,7 +71,7 @@ def generate_prompts(
     parser.add_argument(
         "--batch_size",
         type=int,
-        default=5,
+        default=20,
         help="Batch size - number of prompts given as input per generation request",
     )
     parser.add_argument(
@@ -83,7 +83,7 @@ def generate_prompts(
     parser.add_argument(
         "--max_new_tokens",
         type=int,
-        default=2000,
+        default=500,
         help="Max new tokens",
     )
     parser.add_argument(
diff --git a/src/compute_horde_prompt_gen/utils.py b/src/compute_horde_prompt_gen/utils.py
index 5f4a369..64ca608 100644
--- a/src/compute_horde_prompt_gen/utils.py
+++ b/src/compute_horde_prompt_gen/utils.py
@@ -28,7 +28,8 @@ def parse_output(output: str) -> list[str]:
     lines = [line for line in lines if (len(line) > 10 and len(line) < 300)]
 
     # skip first line as that's frequently broken (i.e. "Here are the prompts:")
-    return lines[1:]
+    # skip last line as it might not be comletely generated
+    return lines[1:-1]
 
 
 def check_prompts_quality(prompts: list[str]):

From abc4a4547f213fce10038004a5316849d3e9c047 Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Wed, 4 Sep 2024 18:44:09 +0800
Subject: [PATCH 2/6] mock no need torch

---
 README.md                             |  2 ++
 pdm.lock                              |  6 +++---
 src/compute_horde_prompt_gen/model.py | 13 +++++++------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index f4c1fee..87612b8 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,9 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 Script to generate batches of random unique prompts to be used in the Compute Horde project synthetic jobs.
+
 The prompt that generates prompts is inspired from [Bittensor Subnet 18 (Cortex. t)] (https://github.com/Datura-ai/cortex.t/blob/276cfcf742e8b442500435a1c1862ac4dffa9e20/cortext/utils.py#L193) (licensed under the MIT License.)
+
 The generated prompts will be saved in `<output_folder_path>/prompts_<uuid>.txt`, each line of the text file containing a prompt.
 
 
diff --git a/pdm.lock b/pdm.lock
index 3838b6f..03789e1 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -611,13 +611,13 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "74.1.0"
+version = "74.1.1"
 requires_python = ">=3.8"
 summary = "Easily download, build, install, upgrade, and uninstall Python packages"
 groups = ["default"]
 files = [
-    {file = "setuptools-74.1.0-py3-none-any.whl", hash = "sha256:cee604bd76cc092355a4e43ec17aee5369095974f41f088676724dc6bc2c9ef8"},
-    {file = "setuptools-74.1.0.tar.gz", hash = "sha256:bea195a800f510ba3a2bc65645c88b7e016fe36709fefc58a880c4ae8a0138d7"},
+    {file = "setuptools-74.1.1-py3-none-any.whl", hash = "sha256:fc91b5f89e392ef5b77fe143b17e32f65d3024744fba66dc3afe07201684d766"},
+    {file = "setuptools-74.1.1.tar.gz", hash = "sha256:2353af060c06388be1cecbf5953dcdb1f38362f87a2356c480b6b4d5fcfc8847"},
 ]
 
 [[package]]
diff --git a/src/compute_horde_prompt_gen/model.py b/src/compute_horde_prompt_gen/model.py
index 4998646..65f1c4f 100644
--- a/src/compute_horde_prompt_gen/model.py
+++ b/src/compute_horde_prompt_gen/model.py
@@ -1,9 +1,4 @@
-import torch
 import logging
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-)
 
 from prompt import PROMPT_ENDING
 
@@ -15,7 +10,7 @@ def __init__(self):
         pass
 
     def generate(self, prompts: list[str], num_return_sequences: int, **_kwargs):
-        return torch.rand(len(prompts) * num_return_sequences)
+        return [1 for _ in range(len(prompts) * num_return_sequences)]
 
     def decode(self, _output):
         return f"COPY PASTE INPUT PROMPT {PROMPT_ENDING} Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
@@ -23,6 +18,12 @@ def decode(self, _output):
 
 class GenerativeModel:
     def __init__(self, model_path: str, quantize: bool = False):
+        import torch
+        from transformers import (
+            AutoTokenizer,
+            AutoModelForCausalLM,
+        )
+
         quantization_config = None
         if quantize:
             from transformers import BitsAndBytesConfig

From f956da5eb6990caad294c20da8087e187d670ccf Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Wed, 4 Sep 2024 18:44:24 +0800
Subject: [PATCH 3/6] add github actions

---
 .github/workflows/build_push_image.yml | 38 ++++++++++++++++++++++++
 .github/workflows/smoke_test.yml       | 41 ++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 .github/workflows/build_push_image.yml
 create mode 100644 .github/workflows/smoke_test.yml

diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml
new file mode 100644
index 0000000..3dd48ed
--- /dev/null
+++ b/.github/workflows/build_push_image.yml
@@ -0,0 +1,38 @@
+name: "CD: build & push image"
+
+env:
+  PYTHON_DEFAULT_VERSION: "3.12"
+  TAG_VERSION: "v0-latest"
+  DOCKER_REPO_NAME: "backenddevelopersltd/compute-horde-prompt-gen"
+
+jobs:
+  deploy:
+    timeout-minutes: 15
+    runs-on:
+      group: bulkier
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
+
+      - name: Login Dockerhub
+        run: echo "${{ secrets.DOCKERHUB_KEY }}" | docker login -u "${{ secrets.DOCKERHUB_USERNAME }}" --password-stdin
+
+      - name: Install dependencies
+        run: | 
+          python -m pip install transformers torch
+
+      - name: Docker build and push
+        run: |
+          df -h
+          IMAGE_NAME="${DOCKER_REPO_NAME}:${TAG_VERSION}"
+
+          cd src/compute_horde_prompt_gen
+
+          python download_model.py --model_name phi3 --huggingface_token "${{ secrets.HUGGINGFACE_API_KEY }}"
+
+          docker build -t $IMAGE_NAME .
diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml
new file mode 100644
index 0000000..331804a
--- /dev/null
+++ b/.github/workflows/smoke_test.yml
@@ -0,0 +1,41 @@
+name: Run Smoke Test
+
+on:
+  push:
+    branches: [master, main]
+
+env:
+  PYTHON_DEFAULT_VERSION: "3.11"
+
+jobs:
+  test:
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python ${{ env.PYTHON_DEFAULT_VERSION }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_DEFAULT_VERSION }}
+
+      - name: Run Test
+        run: | 
+          cd src/compute_horde_prompt_gen
+
+          python3 run.py --model_name mock --number_of_batches 5 --number_of_prompts_per_batch 20 --uuids uuid1,uuid2,uuid3,uuid4,uuid5
+
+          echo -e "\ngenerated batches:"
+          ls ./output/
+
+          echo -e "\nchecking if prompts are generated fine"
+          for i in $(seq 1 5); do
+            if [ $(cat output/prompts_uuid$i.txt | wc -l) -ne 20 ]; then
+              echo "Missing prompts: $(cat output/prompts_uuid{$i}.txt)"
+              exit 1
+            fi
+          done
+          echo "OK"
+

From 323c7c8e06bc0771fa55073ee93a9ca982287a11 Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Fri, 6 Sep 2024 15:47:25 +0800
Subject: [PATCH 4/6] add phi3

---
 README.md                                     | 14 ++--
 .../download_model.py                         | 29 +++++---
 src/compute_horde_prompt_gen/model.py         | 70 ++++++++++++++++---
 src/compute_horde_prompt_gen/prompt.py        | 50 +++++--------
 src/compute_horde_prompt_gen/run.py           | 48 ++++++++-----
 src/compute_horde_prompt_gen/utils.py         |  6 --
 6 files changed, 137 insertions(+), 80 deletions(-)
 rename download_model.py => src/compute_horde_prompt_gen/download_model.py (51%)

diff --git a/README.md b/README.md
index 87612b8..385c117 100644
--- a/README.md
+++ b/README.md
@@ -7,27 +7,31 @@ The prompt that generates prompts is inspired from [Bittensor Subnet 18 (Cortex.
 
 The generated prompts will be saved in `<output_folder_path>/prompts_<uuid>.txt`, each line of the text file containing a prompt.
 
+supports llama3 (`meta-llama/Meta-Llama-3.1-8B-Instruct`) and phi3 (`microsoft/Phi-3.5-mini-instruct`) models
 
 ### build image 
 
 
 ```bash
-# download the model data from huggingface
-python3 download_model.py --huggingface_token <API_KEY>
-
 cd src/compute_horde_prompt_gen
+
+# download model data
+python3 download_model.py --model_name phi3 --huggingface_token <API_KEY>
+
+# build the image
 docker build -t compute-horde-prompt-gen .
 ```
 
 
 ### run image
 ```bash
-docker run -v ./output/:/app/output/ compute-horde-prompt-gen --number_of_batches 3 --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
+docker run -v ./output/:/app/output/ compute-horde-prompt-gen --model_name phi3 --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
 ```
 
 ### testint
 ```bash
-python3 run.py --mock_model --number_of_batches 3 --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
+cd src/compute_horde_prompt_gen
+python3 run.py --model_name mock --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
 ```
 
 ---
diff --git a/download_model.py b/src/compute_horde_prompt_gen/download_model.py
similarity index 51%
rename from download_model.py
rename to src/compute_horde_prompt_gen/download_model.py
index 0336dd7..7516d76 100644
--- a/download_model.py
+++ b/src/compute_horde_prompt_gen/download_model.py
@@ -1,9 +1,15 @@
+import os
 import argparse
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
 )
 
+MODEL_PATHS = {
+    "llama3": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "phi3": "microsoft/Phi-3.5-mini-instruct",
+}
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Save huggingface model")
     parser.add_argument(
@@ -15,24 +21,31 @@
     parser.add_argument(
         "--model_name",
         type=str,
-        default="meta-llama/Meta-Llama-3.1-8B-Instruct",
-        help="Model name to use",
+        choices=["llama3", "phi3"],
+        required=True,
+        help="Model to use - options are llama3 or phi3",
     )
     parser.add_argument(
-        "--model_path",
+        "--save_path",
         type=str,
-        default="./src/compute_horde_prompt_gen/saved_models/",
+        default="./saved_models/",
         help="Path to save the model and tokenizer to",
     )
 
     args = parser.parse_args()
+    save_path = os.path.join(args.save_path, args.model_name)
+    model_name = MODEL_PATHS[args.model_name]
+    print(f"Saving {model_name} model to {save_path}")
 
     model = AutoModelForCausalLM.from_pretrained(
-        args.model_name,
+        model_name,
         # either give token directly or assume logged in with huggingface-cli
         token=args.huggingface_token or True,
     )
-    model.save_pretrained(args.model_path)
+    model.save_pretrained(save_path)
 
-    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
-    tokenizer.save_pretrained(args.model_path)
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        token=args.huggingface_token or True,
+    )
+    tokenizer.save_pretrained(save_path)
diff --git a/src/compute_horde_prompt_gen/model.py b/src/compute_horde_prompt_gen/model.py
index 65f1c4f..d68b513 100644
--- a/src/compute_horde_prompt_gen/model.py
+++ b/src/compute_horde_prompt_gen/model.py
@@ -1,23 +1,28 @@
 import logging
-
-from prompt import PROMPT_ENDING
+import io
 
 log = logging.getLogger(__name__)
 
 
+def strip_input(output: str, ending: str) -> str:
+    # input prompt is repeated in the output, so we need to remove it
+    idx = output.find(ending) + len(ending)
+    return output[idx:].strip()
+
+
 class MockModel:
     def __init__(self):
         pass
 
     def generate(self, prompts: list[str], num_return_sequences: int, **_kwargs):
-        return [1 for _ in range(len(prompts) * num_return_sequences)]
-
-    def decode(self, _output):
-        return f"COPY PASTE INPUT PROMPT {PROMPT_ENDING} Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
+        content = f"Here is the list of prompts:\nHow are you?\nDescribe something\nCount to ten\n"
+        return [content for _ in range(len(prompts) * num_return_sequences)]
 
 
 class GenerativeModel:
     def __init__(self, model_path: str, quantize: bool = False):
+        self.input_prompt_ending = None
+
         import torch
         from transformers import (
             AutoTokenizer,
@@ -45,20 +50,54 @@ def __init__(self, model_path: str, quantize: bool = False):
             model_path,
             local_files_only=True,
         )
+
+    def tokenize(self, prompts: list[str], role: str) -> str:
         # set default padding token
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
+        role_templates = {
+            "system": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "user": "<|start_header_id|>user<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "assistant": "<|start_header_id|>assistant<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
+            "end": "<|start_header_id|>assistant<|end_header_id|>",
+        }
+
+        def tokenize(prompt: str) -> str:
+            msgs = [
+                {"role": "system", "content": role},
+                {"role": "user", "content": prompt},
+            ]
+            full_prompt = io.StringIO()
+            for msg in msgs:
+                full_prompt.write(role_templates[msg["role"]].format(msg["content"]))
+            full_prompt.write(role_templates["end"])
+            return full_prompt.getvalue()
+
+        inputs = [tokenize(prompt) for prompt in prompts]
+        inputs = self.tokenizer(inputs, return_tensors="pt", padding=True).to("cuda")
+        return inputs
+
+    def decode(self, output) -> list[str]:
+        return [
+            strip_input(
+                self.tokenizer.decode(x, skip_special_tokens=True),
+                self.input_prompt_ending,
+            )
+            for x in output
+        ]
+
     def generate(
         self,
         prompts: list[str],
+        role: str,
         num_return_sequences: int,
         max_new_tokens: int,
         temperature: float,
     ):
         # encode the prompts
-        inputs = self.tokenizer(prompts, return_tensors="pt", padding=True).to("cuda")
+        inputs = self.tokenize(prompts, role)
 
-        return self.model.generate(
+        output = self.model.generate(
             **inputs,
             max_new_tokens=max_new_tokens,
             temperature=temperature,
@@ -66,5 +105,16 @@ def generate(
             do_sample=True,  # use sampling-based decoding
         )
 
-    def decode(self, output):
-        return self.tokenizer.decode(output, skip_special_tokens=True)
+        return self.decode(output)
+
+
+class Phi3(GenerativeModel):
+    def __init__(self, model_path: str, quantize: bool = False):
+        super().__init__(model_path, quantize)
+        self.input_prompt_ending = "assistant<|end_header_id|>"
+
+
+class Llama3(GenerativeModel):
+    def __init__(self, model_path: str, quantize: bool = False):
+        super().__init__(model_path, quantize)
+        self.input_prompt_ending = " }}assistant"
diff --git a/src/compute_horde_prompt_gen/prompt.py b/src/compute_horde_prompt_gen/prompt.py
index d97f049..eed919b 100644
--- a/src/compute_horde_prompt_gen/prompt.py
+++ b/src/compute_horde_prompt_gen/prompt.py
@@ -1,25 +1,29 @@
-import io
 import random
 from seeds import THEMES, ABILITIES, FORMATS
 
-PROMPT_ENDING = " }}assistant"
-
 
 class PromptGeneratingPrompt:
     def random_select(self, arr: list[str], num: int = 5) -> str:
         random.shuffle(arr)
         return ", ".join(arr[:num]) + ", etc"
 
-    def generate_prompt(self) -> str:
+    def generate_prompt(self, short=True) -> str:
+        themes = self.random_select(THEMES, num=3)
+
+        if short:
+            return (
+                f"Generate a list of 10 questions or instruct tasks related to the themes of {themes}. "
+                f"Output each prompt on a new line without any extra commentary or special characters."
+            )
+
         relevance_level = random.randint(5, 20)
         complexity_level = random.randint(5, 20)
 
-        themes = self.random_select(THEMES, num=3)
         abilities = self.random_select(ABILITIES, num=4)
         formats = self.random_select(FORMATS, num=5)
 
-        prompt = (
-            f"Generate a list of 5 complex prompts (questions or instruct tasks) that cover a wide range of skills and knowledge areas related to the themes of {themes}. "
+        return (
+            f"Generate a list of 10 complex prompts (questions or instruct tasks) that cover a wide range of skills and knowledge areas related to the themes of {themes}. "
             f"Each of these prompts should: "
             f"\n- have a complexity level of {complexity_level} out of 20 and a relevance level to the theme of {relevance_level} out of 20"
             f"\n- test various cognitive abilities ({abilities}) and require different types of writting formats ({formats})"
@@ -27,30 +31,8 @@ def generate_prompt(self) -> str:
             f"\n- varyingly explore the {themes} in a manner that is consistent with their assigned complexity and relevance levels to the theme"
             f"\nOutput each prompt on a new line without any extra commentary or special characters."
         )
-        return prompt
-
-    def generate_role(self) -> str:
-        role = "You are a prompt engineer tasked with prompts of varying complexity to test the capabilities of a new language model. For each prompt, consider what aspect of the language model's capabilities it is designed to test and ensure that the set of prompts covers a broad spectrum of potential use cases for the language model. Only output the prompts, one per line without any extra commentary. Do not use any special characters or formatting, numbering or styling in the output."
-        return role
-
-    def tokenize(self, prompt: str, role: str) -> str:
-        role_templates = {
-            "system": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
-            "user": "<|start_header_id|>user<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
-            "assistant": "<|start_header_id|>assistant<|end_header_id|>\n{{{{ {} }}}}<|eot_id|>",
-            "end": "<|start_header_id|>assistant<|end_header_id|>",
-        }
-        msgs = [
-            {"role": "system", "content": role},
-            {"role": "user", "content": prompt},
-        ]
-        full_prompt = io.StringIO()
-        for msg in msgs:
-            full_prompt.write(role_templates[msg["role"]].format(msg["content"]))
-        full_prompt.write(role_templates["end"])
-        return full_prompt.getvalue()
-
-    def generate(self):
-        prompt = self.generate_prompt()
-        role = self.generate_role()
-        return self.tokenize(prompt, role)
+
+    def generate_role(self, short=True) -> str:
+        if short:
+            return "You are a prompt engineer tasked with prompts of varying complexity to test the capabilities of a new language model."
+        return "You are a prompt engineer tasked with prompts of varying complexity to test the capabilities of a new language model. For each prompt, consider what aspect of the language model's capabilities it is designed to test and ensure that the set of prompts covers a broad spectrum of potential use cases for the language model. Only output the prompts, one per line without any extra commentary. Do not use any special characters or formatting, numbering or styling in the output."
diff --git a/src/compute_horde_prompt_gen/run.py b/src/compute_horde_prompt_gen/run.py
index 4b12eaf..ce3ec60 100644
--- a/src/compute_horde_prompt_gen/run.py
+++ b/src/compute_horde_prompt_gen/run.py
@@ -4,9 +4,10 @@
 import argparse
 
 from prompt import PromptGeneratingPrompt
-from model import MockModel, GenerativeModel
+from model import MockModel, Llama3, Phi3
 from utils import parse_output, append_to_file
 
+logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
 
@@ -24,25 +25,27 @@ def generate_prompts(
     i = -1
     while total_prompts > 0:
         i += 1
-        prompts = [prompt_generator.generate() for _ in range(batch_size)]
+        prompts = [prompt_generator.generate_prompt() for _ in range(batch_size)]
+        role = prompt_generator.generate_role()
 
         start_ts = datetime.datetime.now()
         sequences = model.generate(
             num_return_sequences=num_return_sequences,
             prompts=prompts,
+            role=role,
             max_new_tokens=max_new_tokens,
             temperature=temperature,
         )
+
         seconds_taken = (datetime.datetime.now() - start_ts).total_seconds()
         log.info(f"{i=} generation took {seconds_taken:.2f}s")
 
         new_prompts = []
         for j, sequence in enumerate(sequences):
-            output = model.decode(sequence)
-            generated_prompts = parse_output(output)
-            log.debug(f"{i=} sequence={j} {generated_prompts=} from {output=}")
+            generated_prompts = parse_output(sequence)
+            log.debug(f"{i=} sequence={j} {generated_prompts=} from {sequence=}")
 
-            log.info(f"{i=} {sequence=} generated {len(generated_prompts)} prompts")
+            log.info(f"{i=} sequence={j} generated {len(generated_prompts)} prompts")
             new_prompts.extend(generated_prompts)
 
         # check_prompts_quality(new_prompts)
@@ -92,6 +95,13 @@ def generate_prompts(
         default=1.0,
         help="Temperature",
     )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        choices=["llama3", "phi3", "mock"],
+        required=True,
+        help="Model to use - options are llama3 or phi3",
+    )
     parser.add_argument(
         "--model_path",
         type=str,
@@ -116,12 +126,6 @@ def generate_prompts(
         required=True,
         help="Comma separated list of uuids, used as file names of output batches, i.e. `output/prompts_{uuid}.txt`",
     )
-    parser.add_argument(
-        "--mock_model",
-        action="store_true",
-        default=False,
-        help="Mock llama3 model for testing purposes only",
-    )
     parser.add_argument(
         "--output_folder_path",
         type=str,
@@ -138,11 +142,21 @@ def generate_prompts(
             len(uuids) == args.number_of_batches
         ), "Number of uuids should be equal to number of batches requested"
 
-    model = (
-        GenerativeModel(model_path=args.model_path, quantize=args.quantize)
-        if not args.mock_model
-        else MockModel()
-    )
+    model_path = os.path.join(args.model_path, args.model_name)
+    if args.model_name == "mock":
+        model = MockModel()
+    elif args.model_name == "llama3":
+        model = Llama3(
+            model_path=model_path,
+            quantize=args.quantize,
+        )
+    elif args.model_name == "phi3":
+        model = Phi3(
+            model_path=model_path,
+            quantize=args.quantize,
+        )
+    else:
+        raise ValueError(f"Invalid model name: {args.model_name}")
 
     for uuid in uuids:
         start_ts = datetime.datetime.now()
diff --git a/src/compute_horde_prompt_gen/utils.py b/src/compute_horde_prompt_gen/utils.py
index 64ca608..5825cab 100644
--- a/src/compute_horde_prompt_gen/utils.py
+++ b/src/compute_horde_prompt_gen/utils.py
@@ -3,8 +3,6 @@
 import logging
 import collections
 
-from prompt import PROMPT_ENDING
-
 log = logging.getLogger(__name__)
 
 
@@ -16,10 +14,6 @@ def clean_line(line: str) -> str:
 
 
 def parse_output(output: str) -> list[str]:
-    # input prompt is repeated in the output, so we need to remove it
-    idx = output.find(PROMPT_ENDING) + len(PROMPT_ENDING)
-    output = output[idx:].strip()
-
     # split into lines and clean them
     lines = output.split("\n")
     lines = [clean_line(line) for line in lines]

From ab44f497963118ac329b8c8ebe36bc303f78556e Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Fri, 6 Sep 2024 15:58:58 +0800
Subject: [PATCH 5/6] wip

---
 .github/workflows/build_push_image.yml | 3 +++
 .github/workflows/smoke_test.yml       | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml
index 3dd48ed..259b3ac 100644
--- a/.github/workflows/build_push_image.yml
+++ b/.github/workflows/build_push_image.yml
@@ -1,5 +1,8 @@
 name: "CD: build & push image"
 
+on:
+  workflow_dispatch:
+
 env:
   PYTHON_DEFAULT_VERSION: "3.12"
   TAG_VERSION: "v0-latest"
diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml
index 331804a..d757b60 100644
--- a/.github/workflows/smoke_test.yml
+++ b/.github/workflows/smoke_test.yml
@@ -28,7 +28,8 @@ jobs:
           python3 run.py --model_name mock --number_of_batches 5 --number_of_prompts_per_batch 20 --uuids uuid1,uuid2,uuid3,uuid4,uuid5
 
           echo -e "\ngenerated batches:"
-          ls ./output/
+          ls
+          ls output/
 
           echo -e "\nchecking if prompts are generated fine"
           for i in $(seq 1 5); do

From 0b955d28fb07f7e76537baa0bda7b134a1899cdb Mon Sep 17 00:00:00 2001
From: Andreea Popescu <andreea.popescu@reef.pl>
Date: Fri, 6 Sep 2024 16:03:52 +0800
Subject: [PATCH 6/6] wip

---
 .github/workflows/build_push_image.yml | 2 ++
 .github/workflows/smoke_test.yml       | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/.github/workflows/build_push_image.yml b/.github/workflows/build_push_image.yml
index 259b3ac..c38630c 100644
--- a/.github/workflows/build_push_image.yml
+++ b/.github/workflows/build_push_image.yml
@@ -1,6 +1,8 @@
 name: "CD: build & push image"
 
 on:
+  push:
+    branches: [build-image]
   workflow_dispatch:
 
 env:
diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml
index d757b60..bd31f41 100644
--- a/.github/workflows/smoke_test.yml
+++ b/.github/workflows/smoke_test.yml
@@ -3,6 +3,9 @@ name: Run Smoke Test
 on:
   push:
     branches: [master, main]
+  pull_request:
+    branches: [master, main]
+  workflow_dispatch:
 
 env:
   PYTHON_DEFAULT_VERSION: "3.11"