tenstorrent · anirudTT · Feb 24, 2025 · Jan 31, 2025 · Feb 5, 2025 · Feb 7, 2025
diff --git a/app/.env.default b/app/.env.default
@@ -6,4 +6,3 @@ VLLM_LLAMA31_ENV_FILE=""
 # SECURITY WARNING: keep these secret in production!
 JWT_SECRET=test-secret-456
 DJANGO_SECRET_KEY=django-insecure-default
-HF_TOKEN=<your-hf-token> # Get this from Hugging Face
diff --git a/app/api/docker_control/docker_utils.py b/app/api/docker_control/docker_utils.py
@@ -35,7 +35,9 @@ def run_container(impl, weights_id):
         logger.info(f"run_container called for {impl.model_name}")
         run_kwargs = copy.deepcopy(impl.docker_config)
         # handle runtime configuration changes to docker kwargs
-        run_kwargs.update({"devices": get_devices_mounts(impl)})
+        device_mounts = get_devices_mounts(impl)
+        if device_mounts:
+            run_kwargs.update({"devices": device_mounts})
         run_kwargs.update({"ports": get_port_mounts(impl)})
         # add bridge inter-container network
         run_kwargs.update({"network": backend_config.docker_bridge_network_name})
@@ -87,14 +89,18 @@ def get_devices_mounts(impl):
     device_config = get_runtime_device_configuration(impl.device_configurations)
     assert isinstance(device_config, DeviceConfigurations)
     # TODO: add logic to handle multiple devices and multiple containers
-    # e.g. running falcon-7B and mistral-7B on 2x n150 machine
-    if device_config in {DeviceConfigurations.N150, DeviceConfigurations.E150}:
-        devices = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
-    elif device_config == DeviceConfigurations.N300x4:
-        devices = ["/dev/tenstorrent:/dev/tenstorrent"]
-    elif device_config == DeviceConfigurations.CPU:
-        devices = None
-    return devices
+    single_device_mounts = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
+    all_device_mounts = ["/dev/tenstorrent:/dev/tenstorrent"]
+    device_map = {
+        DeviceConfigurations.E150: single_device_mounts,
+        DeviceConfigurations.N150: single_device_mounts,
+        DeviceConfigurations.N150_WH_ARCH_YAML: single_device_mounts,
+        DeviceConfigurations.N300: single_device_mounts,
+        DeviceConfigurations.N300x4_WH_ARCH_YAML: all_device_mounts,
+        DeviceConfigurations.N300x4: all_device_mounts,
+    }
+    device_mounts = device_map.get(device_config)
+    return device_mounts
 
 
 def get_port_mounts(impl):
@@ -187,15 +193,19 @@ def get_container_status():
 def update_deploy_cache():
     data = get_container_status()
     for con_id, con in data.items():
-        model_impl = [
-            v
-            for k, v in model_implmentations.items()
-            if v.image_version == con["image_name"]
-        ]
-        assert (
-            len(model_impl) == 1
-        ), f"Cannot find model_impl={model_impl} for {con['image_name']}"
-        model_impl = model_impl[0]
+        con_model_id = con['env_vars'].get("MODEL_ID")
+        model_impl = model_implmentations.get(con_model_id)
+        if not model_impl:
+            # fallback to finding first impl that uses that container 
+            model_impl = [
+                v
+                for k, v in model_implmentations.items()
+                if v.image_version == con["image_name"]
+            ]
+            assert (
+                len(model_impl) == 1
+            ), f"Cannot find model_impl={model_impl} for {con['image_name']}"
+            model_impl = model_impl[0]
         con["model_id"] = model_impl.model_id
         con["weights_id"] = con["env_vars"].get("MODEL_WEIGHTS_ID")
         con["model_impl"] = model_impl

diff --git a/app/api/model_control/apps.py b/app/api/model_control/apps.py
@@ -19,4 +19,4 @@ def ready(self):
         # run once
         logger.info("Initializing models API")
         for model_id, impl in model_implmentations.items():
-            impl.init_volumes()
+            impl.setup()
diff --git a/app/api/model_control/views.py b/app/api/model_control/views.py
@@ -38,7 +38,7 @@ def post(self, request, *args, **kwargs):
             internal_url = "http://" + deploy["internal_url"]
             logger.info(f"internal_url:= {internal_url}")
             logger.info(f"using vllm model:= {deploy["model_impl"].model_name}")
-            data["model"] = deploy["model_impl"].hf_model_path
+            data["model"] = deploy["model_impl"].hf_model_id
             response_stream = stream_response_from_external_api(internal_url, data)
             return StreamingHttpResponse(response_stream, content_type="text/plain")
         else:

diff --git a/app/api/shared_config/backend_config.py b/app/api/shared_config/backend_config.py
@@ -18,7 +18,6 @@ class BackendConfig:
     weights_dir: str
     model_container_cache_root: str
     jwt_secret: str
-    hf_token: str
 
 
 # environment variables are ideally terminated on import to fail-fast and provide obvious
@@ -34,9 +33,8 @@ class BackendConfig:
     django_deploy_cache_name="deploy_cache",
     docker_bridge_network_name="tt_studio_network",
     weights_dir="model_weights",
-    model_container_cache_root="/home/user/cache_root",
+    model_container_cache_root="/home/container_app_user/cache_root",
     jwt_secret=os.environ["JWT_SECRET"],
-    hf_token=os.environ["HF_TOKEN"],
 )
 
 # make backend volume if not existing

diff --git a/app/api/shared_config/device_config.py b/app/api/shared_config/device_config.py
@@ -10,6 +10,9 @@ class DeviceConfigurations(Enum):
     CPU = auto()
     E150 = auto()
     N150 = auto()
+    N300 = auto()
+    T3K_RING = auto()
+    T3K_LINE = auto()
     N150_WH_ARCH_YAML = auto()
     N300x4 = auto()
     N300x4_WH_ARCH_YAML = auto()