From c1be68fb95c4f0bc7534ee25babd8d2a408a078d Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Wed, 29 Jan 2025 15:25:44 +0000
Subject: [PATCH 1/6] remove HF token fron .env in tt-studio

---
 app/.env.default | 1 -
 1 file changed, 1 deletion(-)
diff --git a/app/.env.default b/app/.env.default
index 6b6f24ba..db81d8fe 100644
--- a/app/.env.default
+++ b/app/.env.default
@@ -6,4 +6,3 @@ VLLM_LLAMA31_ENV_FILE=""
 # SECURITY WARNING: keep these secret in production!
 JWT_SECRET=test-secret-456
 DJANGO_SECRET_KEY=django-insecure-default
-HF_TOKEN=<your-hf-token> # Get this from Hugging Face

From aed2b297dc37d7e7e73775298e150c7304e7b217 Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Wed, 29 Jan 2025 15:31:25 +0000
Subject: [PATCH 2/6] startup.sh makes HOST_PERSISTENT_STORAGE_VOLUME if it
 doesnt exist

---
 startup.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/startup.sh b/startup.sh
index 7eb6e04a..54c59ec5 100755
--- a/startup.sh
+++ b/startup.sh
@@ -156,8 +156,16 @@ else
     exit 1
 fi
 
-# Step 2: Source env vars
+# Step 2: Source env vars, ensure directories
 source "${ENV_FILE_PATH}"
+# make persistent volume on host user user permissions
+if [ ! -d "$HOST_PERSISTENT_STORAGE_VOLUME" ]; then
+    mkdir "$HOST_PERSISTENT_STORAGE_VOLUME"
+    if [ $? -ne 0 ]; then
+        echo "⛔ Error: Failed to create directory $HOST_PERSISTENT_STORAGE_VOLUME"
+        exit 1
+    fi
+fi
 
 # Step 3: Check if the Docker network already exists
 NETWORK_NAME="tt_studio_network"

From 3a55bbb31153ba1d67131d9bfe45a788b077c3e2 Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Wed, 29 Jan 2025 15:32:08 +0000
Subject: [PATCH 3/6] startup.sh uses safety set -euo pipefail

---
 startup.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/startup.sh b/startup.sh
index 54c59ec5..ab86be0a 100755
--- a/startup.sh
+++ b/startup.sh
@@ -4,6 +4,8 @@
 # 
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+set -euo pipefail  # Exit on error, print commands, unset variables treated as errors, and exit on pipeline failure
+
 # Define setup script path
 SETUP_SCRIPT="./setup.sh"
 

From 4fa20eed8eff28183eb43c61afa55b3934755c61 Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Wed, 29 Jan 2025 16:37:54 +0000
Subject: [PATCH 4/6] remove HF_TOKEN from app/docker-compose.yml

---
 app/docker-compose.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/docker-compose.yml b/app/docker-compose.yml
index 76895274..3ae07cba 100644
--- a/app/docker-compose.yml
+++ b/app/docker-compose.yml
@@ -40,7 +40,6 @@ services:
       - BACKEND_API_HOSTNAME
       - VLLM_LLAMA31_ENV_FILE
       - JWT_SECRET
-      - HF_TOKEN
     volumes:
       # mounting docker unix socket allows for backend container to run docker cmds
       - /var/run/docker.sock:/var/run/docker.sock

From ea577a70d3094245e10ba2218328674879a69aca Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Wed, 29 Jan 2025 20:05:00 +0000
Subject: [PATCH 5/6] remove VLLM_LLAMA31_ENV_FILE now redundant

---
 app/api/shared_config/backend_config.py | 2 --
 app/docker-compose.yml                  | 1 -
 2 files changed, 3 deletions(-)

diff --git a/app/api/shared_config/backend_config.py b/app/api/shared_config/backend_config.py
index 88cd722c..b6f6a56c 100644
--- a/app/api/shared_config/backend_config.py
+++ b/app/api/shared_config/backend_config.py
@@ -18,7 +18,6 @@ class BackendConfig:
     weights_dir: str
     model_container_cache_root: str
     jwt_secret: str
-    hf_token: str
 
 
 # environment variables are ideally terminated on import to fail-fast and provide obvious
@@ -36,7 +35,6 @@ class BackendConfig:
     weights_dir="model_weights",
     model_container_cache_root="/home/user/cache_root",
     jwt_secret=os.environ["JWT_SECRET"],
-    hf_token=os.environ["HF_TOKEN"],
 )
 
 # make backend volume if not existing
diff --git a/app/docker-compose.yml b/app/docker-compose.yml
index 3ae07cba..e2ffb8ce 100644
--- a/app/docker-compose.yml
+++ b/app/docker-compose.yml
@@ -38,7 +38,6 @@ services:
       - HOST_PERSISTENT_STORAGE_VOLUME
       - INTERNAL_PERSISTENT_STORAGE_VOLUME
       - BACKEND_API_HOSTNAME
-      - VLLM_LLAMA31_ENV_FILE
       - JWT_SECRET
     volumes:
       # mounting docker unix socket allows for backend container to run docker cmds

From c4cc18d0d8381b617e31dbf6a6c29e1b33fe43c6 Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Fri, 31 Jan 2025 05:28:32 +0000
Subject: [PATCH 6/6] Adding Llama 3.x integration using new setup.sh and LLM
 code base

* support multiple models using same container, adds support for MODEL_ID environment variable in tt-inference-server.
* update volume initialization for new file permissions strategy
* add SetupTypes to handle different first run and validation behaviour
* hf_model_id is used to define model_id and model_name if provided (rename hf_model_path to hf_model_id)
* /home/user/cache_root changed to /home/container_app_user/cache_root
* fix get_devices_mounts, add mapping
* use MODEL_ID if in container env_vars to map to impl model config
* set defaults for ModelImpl
* add configs for llama 3.x models
* remove HF_TOKEN from tt-studio .env for ease of setup
* add environment file processing
---
 app/api/docker_control/docker_utils.py  |  46 +++--
 app/api/model_control/apps.py           |   2 +-
 app/api/model_control/views.py          |   2 +-
 app/api/shared_config/backend_config.py |   2 +-
 app/api/shared_config/device_config.py  |   3 +
 app/api/shared_config/model_config.py   | 216 +++++++++++++++---------
 app/api/shared_config/setup_config.py   |   6 +
 7 files changed, 177 insertions(+), 100 deletions(-)
 create mode 100644 app/api/shared_config/setup_config.py

diff --git a/app/api/docker_control/docker_utils.py b/app/api/docker_control/docker_utils.py
index c9d1cc56..163f4ce5 100644
--- a/app/api/docker_control/docker_utils.py
+++ b/app/api/docker_control/docker_utils.py
@@ -35,7 +35,9 @@ def run_container(impl, weights_id):
         logger.info(f"run_container called for {impl.model_name}")
         run_kwargs = copy.deepcopy(impl.docker_config)
         # handle runtime configuration changes to docker kwargs
-        run_kwargs.update({"devices": get_devices_mounts(impl)})
+        device_mounts = get_devices_mounts(impl)
+        if device_mounts:
+            run_kwargs.update({"devices": device_mounts})
         run_kwargs.update({"ports": get_port_mounts(impl)})
         # add bridge inter-container network
         run_kwargs.update({"network": backend_config.docker_bridge_network_name})
@@ -87,14 +89,18 @@ def get_devices_mounts(impl):
     device_config = get_runtime_device_configuration(impl.device_configurations)
     assert isinstance(device_config, DeviceConfigurations)
     # TODO: add logic to handle multiple devices and multiple containers
-    # e.g. running falcon-7B and mistral-7B on 2x n150 machine
-    if device_config in {DeviceConfigurations.N150, DeviceConfigurations.E150}:
-        devices = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
-    elif device_config == DeviceConfigurations.N300x4:
-        devices = ["/dev/tenstorrent:/dev/tenstorrent"]
-    elif device_config == DeviceConfigurations.CPU:
-        devices = None
-    return devices
+    single_device_mounts = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
+    all_device_mounts = ["/dev/tenstorrent:/dev/tenstorrent"]
+    device_map = {
+        DeviceConfigurations.E150: single_device_mounts,
+        DeviceConfigurations.N150: single_device_mounts,
+        DeviceConfigurations.N150_WH_ARCH_YAML: single_device_mounts,
+        DeviceConfigurations.N300: single_device_mounts,
+        DeviceConfigurations.N300x4_WH_ARCH_YAML: all_device_mounts,
+        DeviceConfigurations.N300x4: all_device_mounts,
+    }
+    device_mounts = device_map.get(device_config)
+    return device_mounts
 
 
 def get_port_mounts(impl):
@@ -187,15 +193,19 @@ def get_container_status():
 def update_deploy_cache():
     data = get_container_status()
     for con_id, con in data.items():
-        model_impl = [
-            v
-            for k, v in model_implmentations.items()
-            if v.image_version == con["image_name"]
-        ]
-        assert (
-            len(model_impl) == 1
-        ), f"Cannot find model_impl={model_impl} for {con['image_name']}"
-        model_impl = model_impl[0]
+        con_model_id = con['env_vars'].get("MODEL_ID")
+        model_impl = model_implmentations.get(con_model_id)
+        if not model_impl:
+            # fallback to finding first impl that uses that container 
+            model_impl = [
+                v
+                for k, v in model_implmentations.items()
+                if v.image_version == con["image_name"]
+            ]
+            assert (
+                len(model_impl) == 1
+            ), f"Cannot find model_impl={model_impl} for {con['image_name']}"
+            model_impl = model_impl[0]
         con["model_id"] = model_impl.model_id
         con["weights_id"] = con["env_vars"].get("MODEL_WEIGHTS_ID")
         con["model_impl"] = model_impl
diff --git a/app/api/model_control/apps.py b/app/api/model_control/apps.py
index b310145a..e7a0b543 100644
--- a/app/api/model_control/apps.py
+++ b/app/api/model_control/apps.py
@@ -19,4 +19,4 @@ def ready(self):
         # run once
         logger.info("Initializing models API")
         for model_id, impl in model_implmentations.items():
-            impl.init_volumes()
+            impl.setup()
diff --git a/app/api/model_control/views.py b/app/api/model_control/views.py
index b4b4e1af..7123819e 100644
--- a/app/api/model_control/views.py
+++ b/app/api/model_control/views.py
@@ -38,7 +38,7 @@ def post(self, request, *args, **kwargs):
             internal_url = "http://" + deploy["internal_url"]
             logger.info(f"internal_url:= {internal_url}")
             logger.info(f"using vllm model:= {deploy["model_impl"].model_name}")
-            data["model"] = deploy["model_impl"].hf_model_path
+            data["model"] = deploy["model_impl"].hf_model_id
             response_stream = stream_response_from_external_api(internal_url, data)
             return StreamingHttpResponse(response_stream, content_type="text/plain")
         else:
diff --git a/app/api/shared_config/backend_config.py b/app/api/shared_config/backend_config.py
index b6f6a56c..555b205b 100644
--- a/app/api/shared_config/backend_config.py
+++ b/app/api/shared_config/backend_config.py
@@ -33,7 +33,7 @@ class BackendConfig:
     django_deploy_cache_name="deploy_cache",
     docker_bridge_network_name="tt_studio_network",
     weights_dir="model_weights",
-    model_container_cache_root="/home/user/cache_root",
+    model_container_cache_root="/home/container_app_user/cache_root",
     jwt_secret=os.environ["JWT_SECRET"],
 )
 
diff --git a/app/api/shared_config/device_config.py b/app/api/shared_config/device_config.py
index e4035204..151b1433 100644
--- a/app/api/shared_config/device_config.py
+++ b/app/api/shared_config/device_config.py
@@ -10,6 +10,9 @@ class DeviceConfigurations(Enum):
     CPU = auto()
     E150 = auto()
     N150 = auto()
+    N300 = auto()
+    T3K_RING = auto()
+    T3K_LINE = auto()
     N150_WH_ARCH_YAML = auto()
     N300x4 = auto()
     N300x4_WH_ARCH_YAML = auto()
diff --git a/app/api/shared_config/model_config.py b/app/api/shared_config/model_config.py
index 590ea2a9..e33c3a33 100644
--- a/app/api/shared_config/model_config.py
+++ b/app/api/shared_config/model_config.py
@@ -9,6 +9,7 @@
 
 from shared_config.device_config import DeviceConfigurations
 from shared_config.backend_config import backend_config
+from shared_config.setup_config import SetupTypes
 from shared_config.logger_config import get_logger
 
 logger = get_logger(__name__)
@@ -16,18 +17,24 @@
 
 
 def load_dotenv_dict(env_path: Union[str, Path]) -> Dict[str, str]:
+    if not env_path:
+        return {}
+
+    # instead, use tt-studio configured JWT_SECRET
+    exluded_keys = ["JWT_SECRET"]
     env_path = Path(env_path)
     if not env_path.exists():
         logger.error(f"Env file not found: {env_path}")
     env_dict = {}
+    logger.info(f"Using env file: {env_path}")
     with open(env_path) as f:
         lines = f.readlines()
     for line in lines:
         if line.strip() and not line.startswith('#'):
             key, value = line.strip().split('=', 1)
             # expand any $VAR or ${VAR} and ~
-            value = os.path.expandvars(value)
-            env_dict[key] = value
+            if key not in exluded_keys:
+                env_dict[key] = value
     return env_dict
 
 
@@ -37,25 +44,29 @@ class ModelImpl:
     Model implementation configuration defines everything known about a model
     implementations before runtime, e.g. not handling ports, available devices"""
 
-    model_name: str
-    model_id: str
     image_name: str
     image_tag: str
     device_configurations: Set["DeviceConfigurations"]
     docker_config: Dict[str, Any]
-    user_uid: int  # user inside docker container uid (for file permissions)
-    user_gid: int  # user inside docker container gid (for file permissions)
-    shm_size: str
-    service_port: int
     service_route: str
+    setup_type: SetupTypes
+    hf_model_id: str = None
+    model_name: str = None     # uses defaults based on hf_model_id
+    model_id: str = None       # uses defaults based on hf_model_id
+    impl_id: str = "tt-metal"  # implementation ID
+    version: str = "0.0.1"
+    shm_size: str = "32G"
+    service_port: int = 7000
     env_file: str = ""
     health_route: str = "/health"
-    hf_model_path: str = ""
 
     def __post_init__(self):
+        # _init methods compute values that are dependent on other values
+        self._init_model_name()
+        
         self.docker_config.update({"volumes": self.get_volume_mounts()})
         self.docker_config["shm_size"] = self.shm_size
-        self.docker_config["environment"]["HF_MODEL_PATH"] = self.hf_model_path
+        self.docker_config["environment"]["HF_MODEL_PATH"] = self.hf_model_id
         self.docker_config["environment"]["HF_HOME"] = Path(
             backend_config.model_container_cache_root
         ).joinpath("huggingface")
@@ -64,14 +75,6 @@ def __post_init__(self):
         if DeviceConfigurations.N150 in self.device_configurations or DeviceConfigurations.N300x4 in self.device_configurations:
             self.docker_config["environment"]["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml"
 
-        if self.env_file:
-            logger.info(f"Using env file: {self.env_file}")
-            # env file should be in persistent volume mounted
-            env_dict = load_dotenv_dict(self.env_file)
-            # env file overrides any existing docker environment variables
-            self.docker_config["environment"].update(env_dict)
-      
-
         # Set environment variable if N150_WH_ARCH_YAML or N300x4_WH_ARCH_YAML is in the device configurations
         if (
             DeviceConfigurations.N150_WH_ARCH_YAML in self.device_configurations
@@ -81,12 +84,16 @@ def __post_init__(self):
                 "wormhole_b0_80_arch_eth_dispatch.yaml"
             )
 
-        if self.env_file:
-            logger.info(f"Using env file: {self.env_file}")
-            # env file should be in persistent volume mounted
-            env_dict = load_dotenv_dict(self.env_file)
-            # env file overrides any existing docker environment variables
-            self.docker_config["environment"].update(env_dict)
+        # model env file must be interpreted here
+        if not self.env_file:
+            _env_file = self.get_model_env_file()
+        else:
+            _env_file = self.env_file
+
+        # env file should be in persistent volume mounted
+        env_dict = load_dotenv_dict(_env_file)
+        # env file overrides any existing docker environment variables
+        self.docker_config["environment"].update(env_dict)
 
         # Set environment variable if N150_WH_ARCH_YAML or N300x4_WH_ARCH_YAML is in the device configurations
         if (
@@ -97,13 +104,6 @@ def __post_init__(self):
                 "wormhole_b0_80_arch_eth_dispatch.yaml"
             )
 
-        if self.env_file:
-            logger.info(f"Using env file: {self.env_file}")
-            # env file should be in persistent volume mounted
-            env_dict = load_dotenv_dict(self.env_file)
-            # env file overrides any existing docker environment variables
-            self.docker_config["environment"].update(env_dict)
-
     @property
     def image_version(self) -> str:
         return f"{self.image_name}:{self.image_tag}"
@@ -143,6 +143,36 @@ def model_container_weights_dir(self) -> Path:
     def backend_hf_home(self) -> Path:
         return self.backend_weights_dir.joinpath("huggingface")
 
+    def _init_model_name(self):
+        # Note: ONLY run this in __post_init__
+        # need to use __setattr__ because instance is frozen
+        assert self.hf_model_id or self.model_name, "either hf_model_id or model_name must be set."
+        if not self.model_name:
+            # use basename of HF model ID to use same format as tt-transformers
+            object.__setattr__(self, 'model_name', Path(self.hf_model_id).name)
+        if not self.model_id:
+            object.__setattr__(self, 'model_id', self.get_default_model_id())
+        if not self.hf_model_id:
+            logger.info(f"model_name:={self.model_name} does not have a hf_model_id set")
+
+    def get_default_model_id(self):
+        return f"id_{self.impl_id}-{self.model_name}-v{self.version}"
+        
+    def get_model_env_file(self):
+        ret_env_file = None
+        model_env_dir_name = "model_envs"
+        model_env_dir = Path(backend_config.persistent_storage_volume).joinpath(model_env_dir_name)
+        if model_env_dir.exists():
+            env_fname = f"{self.model_name}.env"
+            model_env_fpath = model_env_dir.joinpath(env_fname)
+            if model_env_fpath.exists():
+                ret_env_file = model_env_fpath
+            else:
+                logger.warning(f"for model {self.model_name} env file: {model_env_fpath} does not exist, have you run tt-inference-server setup.sh for the model?")
+        else:
+            logger.warning(f"{model_env_dir} does not exist, have you run tt-inference-server setup.sh?")
+        return ret_env_file
+
     def get_volume_mounts(self):
         # use type=volume for persistent storage with a Docker managed named volume
         # target: this should be set to same location as the CACHE_ROOT environment var
@@ -156,14 +186,24 @@ def get_volume_mounts(self):
         }
         return volume_mounts
 
+    def setup(self):
+        # verify model setup and runtime setup 
+        self.init_volumes()
+
     def init_volumes(self):
-        # need to make directory in app backend container to allow for correct perimission to be set
-        self.volume_path.mkdir(parents=True, exist_ok=True)
-        os.chown(self.volume_path, uid=self.user_uid, gid=self.user_gid)
-        self.backend_weights_dir.mkdir(parents=True, exist_ok=True)
-        os.chown(self.backend_weights_dir, uid=self.user_uid, gid=self.user_gid)
-        # self.backend_hf_home.mkdir(parents=True, exist_ok=True)
-        # os.chown(self.backend_hf_home, uid=self.user_uid, gid=self.user_gid)
+        # check volumes
+        if self.setup_type == SetupTypes.TT_INFERENCE_SERVER:
+            if self.volume_path.exists():
+                logger.info(f"Found {self.volume_path}")
+            else:
+                logger.info(f"Model volume does not exist: {self.volume_path}")
+                logger.error(f"Initialize this model by running the tt-inference-server setup.sh script")
+        elif self.setup_type == SetupTypes.MAKE_VOLUMES:
+            if not self.volume_path.exists():
+                # if not setup is required for the model, backend can make the volume
+                self.volume_path.mkdir(parents=True, exist_ok=True)
+        elif self.setup_type == SetupTypes.NO_SETUP:
+            logger.info(f"Model {self.model_id} does not require a volume")
 
     def asdict(self):
         return asdict(self)
@@ -172,14 +212,12 @@ def asdict(self):
 def base_docker_config():
     return {
         # Note: mounts and devices are determined in `docker_utils.py`
-        "user": "user",
         "auto_remove": True,
         "cap_add": "ALL",  # TODO: add minimal permissions
         "detach": True,
         "environment": {
             "JWT_SECRET": backend_config.jwt_secret,
             "CACHE_ROOT": backend_config.model_container_cache_root,
-            "HF_TOKEN": backend_config.hf_token,
         },
     }
 
@@ -194,71 +232,91 @@ def base_docker_config():
         image_tag="v0.0.1-tt-metal-65d246482b3f",
         device_configurations={DeviceConfigurations.N150},
         docker_config=base_docker_config(),
-        user_uid=1000,
-        user_gid=1000,
         shm_size="32G",
         service_port=7000,
         service_route="/objdetection_v2",
+        setup_type=SetupTypes.NO_SETUP,
     ),
     ModelImpl(
+        hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
         model_name="Mock-Llama-3.1-70B-Instruct",
         model_id="id_mock_vllm_modelv0.0.1",
         image_name="ghcr.io/tenstorrent/tt-inference-server/mock.vllm.openai.api",
         image_tag="v0.0.1-tt-metal-385904186f81-384f1790c3be",
-        hf_model_path="meta-llama/Llama-3.1-70B-Instruct",
         device_configurations={DeviceConfigurations.CPU},
         docker_config=base_docker_config(),
-        user_uid=1000,
-        user_gid=1000,
         shm_size="1G",
         service_port=7000,
         service_route="/v1/chat/completions",
+        setup_type=SetupTypes.MAKE_VOLUMES,
     ),
     ModelImpl(
-        model_name="Falcon-7B-Instruct",
-        model_id="id_tt-metal-falcon-7bv0.0.13",
-        image_name="tt-metal-falcon-7b",
-        image_tag="v0.0.13",
-        device_configurations={DeviceConfigurations.N150_WH_ARCH_YAML},
-        hf_model_path="tiiuae/falcon-7b-instruct",
-        docker_config=base_docker_config(),
-        user_uid=1000,
-        user_gid=1000,
-        shm_size="32G",
-        service_port=7000,
-        service_route="/inference/falcon7b",
-    ),
-    ModelImpl(
-        model_name="Llama-3.1-70B-Instruct",
-        model_id="id_tt-metal-llama-3.1-70b-instructv0.0.1",
+        hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
         image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm",
         image_tag="v0.0.3-tt-metal-385904186f81-384f1790c3be",
-        hf_model_path="meta-llama/Llama-3.1-70B-Instruct",
         device_configurations={DeviceConfigurations.N300x4_WH_ARCH_YAML},
         docker_config=base_docker_config(),
-        user_uid=1000,
-        user_gid=1000,
         shm_size="32G",
         service_port=7000,
         service_route="/v1/chat/completions",
         env_file=os.environ.get("VLLM_LLAMA31_ENV_FILE"),
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.2-1B-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.2-3B-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
+    ),
+    ModelImpl(
+        hf_model_id="meta-llama/Llama-3.3-70B-Instruct",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-llama3-src-dev-ubuntu-20.04-amd64",
+        image_tag="v0.0.1-47fb1a2fb6e0-2f33504bad49",
+        device_configurations={DeviceConfigurations.N300x4},
+        docker_config=base_docker_config(),
+        service_route="/v1/chat/completions",
+        setup_type=SetupTypes.TT_INFERENCE_SERVER,
     ),
     #! Add new model vLLM model implementations here
-    #     ModelImpl(
-    #     model_name="", #? Add the model name for the vLLM model based on persistent storage
-    #     model_id="", #? Add the model id for the vLLM model based on persistent storage
-    #     image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm",
-    #     image_tag="v0.0.1-tt-metal-685ef1303b5a-54b9157d852b",
-    #     hf_model_path="meta-llama/Llama-3.1-70B-Instruct",
-    #     device_configurations={DeviceConfigurations.N300x4},
-    #     docker_config=base_docker_config(),
-    #     user_uid=1000,
-    #     user_gid=1000,
-    #     shm_size="32G",
-    #     service_port=7000,
-    #     service_route="/inference/**",  #? Add the correct route for the vLLM model
-    #     env_file=os.environ.get("VLLM_LLAMA31_ENV_FILE"),
-    # )
 ]
 
 def validate_model_implemenation_config(impl):
diff --git a/app/api/shared_config/setup_config.py b/app/api/shared_config/setup_config.py
new file mode 100644
index 00000000..c89a966f
--- /dev/null
+++ b/app/api/shared_config/setup_config.py
@@ -0,0 +1,6 @@
+from enum import IntEnum, auto
+
+class SetupTypes(IntEnum):
+    NO_SETUP = auto()  # 1
+    MAKE_VOLUMES = auto()  # 2
+    TT_INFERENCE_SERVER = auto()  # 3