Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rc v1.2.0 #174

Merged
merged 4 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion app/.env.default
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@ VLLM_LLAMA31_ENV_FILE=""
# SECURITY WARNING: keep these secret in production!
JWT_SECRET=test-secret-456
DJANGO_SECRET_KEY=django-insecure-default
HF_TOKEN=<your-hf-token> # Get this from Hugging Face
46 changes: 28 additions & 18 deletions app/api/docker_control/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def run_container(impl, weights_id):
logger.info(f"run_container called for {impl.model_name}")
run_kwargs = copy.deepcopy(impl.docker_config)
# handle runtime configuration changes to docker kwargs
run_kwargs.update({"devices": get_devices_mounts(impl)})
device_mounts = get_devices_mounts(impl)
if device_mounts:
run_kwargs.update({"devices": device_mounts})
run_kwargs.update({"ports": get_port_mounts(impl)})
# add bridge inter-container network
run_kwargs.update({"network": backend_config.docker_bridge_network_name})
Expand Down Expand Up @@ -87,14 +89,18 @@ def get_devices_mounts(impl):
device_config = get_runtime_device_configuration(impl.device_configurations)
assert isinstance(device_config, DeviceConfigurations)
# TODO: add logic to handle multiple devices and multiple containers
# e.g. running falcon-7B and mistral-7B on 2x n150 machine
if device_config in {DeviceConfigurations.N150, DeviceConfigurations.E150}:
devices = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
elif device_config == DeviceConfigurations.N300x4:
devices = ["/dev/tenstorrent:/dev/tenstorrent"]
elif device_config == DeviceConfigurations.CPU:
devices = None
return devices
single_device_mounts = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
all_device_mounts = ["/dev/tenstorrent:/dev/tenstorrent"]
device_map = {
DeviceConfigurations.E150: single_device_mounts,
DeviceConfigurations.N150: single_device_mounts,
DeviceConfigurations.N150_WH_ARCH_YAML: single_device_mounts,
DeviceConfigurations.N300: single_device_mounts,
DeviceConfigurations.N300x4_WH_ARCH_YAML: all_device_mounts,
DeviceConfigurations.N300x4: all_device_mounts,
}
device_mounts = device_map.get(device_config)
return device_mounts


def get_port_mounts(impl):
Expand Down Expand Up @@ -187,15 +193,19 @@ def get_container_status():
def update_deploy_cache():
data = get_container_status()
for con_id, con in data.items():
model_impl = [
v
for k, v in model_implmentations.items()
if v.image_version == con["image_name"]
]
assert (
len(model_impl) == 1
), f"Cannot find model_impl={model_impl} for {con['image_name']}"
model_impl = model_impl[0]
con_model_id = con['env_vars'].get("MODEL_ID")
model_impl = model_implmentations.get(con_model_id)
if not model_impl:
# fallback to finding first impl that uses that container
model_impl = [
v
for k, v in model_implmentations.items()
if v.image_version == con["image_name"]
]
assert (
len(model_impl) == 1
), f"Cannot find model_impl={model_impl} for {con['image_name']}"
model_impl = model_impl[0]
con["model_id"] = model_impl.model_id
con["weights_id"] = con["env_vars"].get("MODEL_WEIGHTS_ID")
con["model_impl"] = model_impl
Expand Down
2 changes: 1 addition & 1 deletion app/api/model_control/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ def ready(self):
# run once
logger.info("Initializing models API")
for model_id, impl in model_implmentations.items():
impl.init_volumes()
impl.setup()
2 changes: 1 addition & 1 deletion app/api/model_control/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def post(self, request, *args, **kwargs):
internal_url = "http://" + deploy["internal_url"]
logger.info(f"internal_url:= {internal_url}")
logger.info(f"using vllm model:= {deploy["model_impl"].model_name}")
data["model"] = deploy["model_impl"].hf_model_path
data["model"] = deploy["model_impl"].hf_model_id
response_stream = stream_response_from_external_api(internal_url, data)
return StreamingHttpResponse(response_stream, content_type="text/plain")
else:
Expand Down
4 changes: 1 addition & 3 deletions app/api/shared_config/backend_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class BackendConfig:
weights_dir: str
model_container_cache_root: str
jwt_secret: str
hf_token: str


# environment variables are ideally terminated on import to fail-fast and provide obvious
Expand All @@ -34,9 +33,8 @@ class BackendConfig:
django_deploy_cache_name="deploy_cache",
docker_bridge_network_name="tt_studio_network",
weights_dir="model_weights",
model_container_cache_root="/home/user/cache_root",
model_container_cache_root="/home/container_app_user/cache_root",
jwt_secret=os.environ["JWT_SECRET"],
hf_token=os.environ["HF_TOKEN"],
)

# make backend volume if not existing
Expand Down
3 changes: 3 additions & 0 deletions app/api/shared_config/device_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ class DeviceConfigurations(Enum):
CPU = auto()
E150 = auto()
N150 = auto()
N300 = auto()
T3K_RING = auto()
T3K_LINE = auto()
N150_WH_ARCH_YAML = auto()
N300x4 = auto()
N300x4_WH_ARCH_YAML = auto()
Expand Down
Loading