diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7306e41c4..05188af28 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -8,6 +8,7 @@ on:
       - '*'
     paths:
       - 'openllm-python/src/openllm/**'
+      - 'openllm-python/src/openllm_cli/**'
       - 'openllm-core/src/openllm_core/**'
       - 'openllm-client/src/openllm_client/**'
   pull_request:
@@ -15,6 +16,7 @@ on:
       - 'main'
     paths:
       - 'openllm-python/src/openllm/**'
+      - 'openllm-python/src/openllm_cli/**'
       - 'openllm-core/src/openllm_core/**'
       - 'openllm-client/src/openllm_client/**'
     types: [labeled, opened, synchronize, reopened]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b4821c933..8100a6ae8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
         types_or: [python, javascript]
         exclude: |
           (?x)^(
-              openllm-python/src/openllm/cli/entrypoint.py |
+              openllm-python/src/openllm_cli/entrypoint.py |
               openllm-client/src/openllm_client/pb.*
           )$
   - repo: meta
diff --git a/cz.py b/cz.py
index b4223727a..ae068b250 100755
--- a/cz.py
+++ b/cz.py
@@ -48,6 +48,7 @@ def run_cz(dir: str, package: str):
 
 def main() -> int:
   run_cz('openllm-python', 'openllm')
+  run_cz('openllm-python', 'openllm_cli')
   run_cz('openllm-core', 'openllm_core')
   run_cz('openllm-client', 'openllm_client')
   return 0
diff --git a/openllm-python/pyproject.toml b/openllm-python/pyproject.toml
index 87129dbfd..24f573370 100644
--- a/openllm-python/pyproject.toml
+++ b/openllm-python/pyproject.toml
@@ -75,14 +75,14 @@ license = "Apache-2.0"
 name = "openllm"
 requires-python = ">=3.8"
 [project.scripts]
-openllm = "openllm.cli.entrypoint:cli"
-openllm-build-base-container = "openllm.cli.extension.build_base_container:cli"
-openllm-dive-bentos = "openllm.cli.extension.dive_bentos:cli"
-openllm-get-containerfile = "openllm.cli.extension.get_containerfile:cli"
-openllm-get-prompt = "openllm.cli.extension.get_prompt:cli"
-openllm-list-bentos = "openllm.cli.extension.list_bentos:cli"
-openllm-list-models = "openllm.cli.extension.list_models:cli"
-openllm-playground = "openllm.cli.extension.playground:cli"
+openllm = "openllm_cli.entrypoint:cli"
+openllm-build-base-container = "openllm_cli.extension.build_base_container:cli"
+openllm-dive-bentos = "openllm_cli.extension.dive_bentos:cli"
+openllm-get-containerfile = "openllm_cli.extension.get_containerfile:cli"
+openllm-get-prompt = "openllm_cli.extension.get_prompt:cli"
+openllm-list-bentos = "openllm_cli.extension.list_bentos:cli"
+openllm-list-models = "openllm_cli.extension.list_models:cli"
+openllm-playground = "openllm_cli.extension.playground:cli"
 
 [project.urls]
 Blog = "https://modelserving.com"
@@ -136,7 +136,7 @@ root = ".."
 [tool.hatch.metadata]
 allow-direct-references = true
 [tool.hatch.build.targets.wheel]
-only-include = ["src/openllm"]
+only-include = ["src/openllm", "src/openllm_cli"]
 sources = ["src"]
 [tool.hatch.build.targets.sdist]
 exclude = [
diff --git a/openllm-python/src/openllm/__init__.py b/openllm-python/src/openllm/__init__.py
index 73ce07e10..272ee8d02 100644
--- a/openllm-python/src/openllm/__init__.py
+++ b/openllm-python/src/openllm/__init__.py
@@ -14,6 +14,10 @@
 import pathlib as _pathlib
 import warnings as _warnings
 
+import openllm_cli as _cli
+
+from openllm_cli import _sdk
+
 from . import utils as utils
 
 
@@ -55,7 +59,6 @@
     '_strategies': ['CascadingResourceStrategy', 'get_resource'],
     'entrypoints': ['mount_entrypoints'],
     'serialisation': ['ggml', 'transformers'],
-    'cli._sdk': ['start', 'start_grpc', 'build', 'import_model', 'list_models'],
     '_quantisation': ['infer_quantisation_config'],
     '_llm': ['LLM', 'LLMRunner', 'LLMRunnable'],
     '_generation': [
@@ -66,7 +69,15 @@
       'prepare_logits_processor',
     ],
   },
-  extra_objects={'COMPILED': COMPILED},
+  extra_objects={
+    'COMPILED': COMPILED,
+    'cli': _cli,
+    'start': _sdk.start,
+    'start_grpc': _sdk.start_grpc,
+    'build': _sdk.build,
+    'import_model': _sdk.import_model,
+    'list_models': _sdk.list_models,
+  },
 )
 __all__ = __lazy.__all__
 __dir__ = __lazy.__dir__
diff --git a/openllm-python/src/openllm/__init__.pyi b/openllm-python/src/openllm/__init__.pyi
index 7d444eded..07b82917a 100644
--- a/openllm-python/src/openllm/__init__.pyi
+++ b/openllm-python/src/openllm/__init__.pyi
@@ -1,3 +1,4 @@
+import openllm_cli as _cli
 from openllm_core._configuration import GenerationConfig as GenerationConfig
 from openllm_core._configuration import LLMConfig as LLMConfig
 from openllm_core._configuration import SamplingParams as SamplingParams
@@ -21,7 +22,6 @@ from openllm_core.config import StableLMConfig as StableLMConfig
 from openllm_core.config import StarCoderConfig as StarCoderConfig
 from . import exceptions as exceptions
 from . import bundle as bundle
-from . import cli as cli
 from . import client as client
 from . import playground as playground
 from . import serialisation as serialisation
@@ -39,11 +39,11 @@ from ._llm import LLMRunner as LLMRunner
 from ._quantisation import infer_quantisation_config as infer_quantisation_config
 from ._strategies import CascadingResourceStrategy as CascadingResourceStrategy
 from ._strategies import get_resource as get_resource
-from .cli._sdk import build as build
-from .cli._sdk import import_model as import_model
-from .cli._sdk import list_models as list_models
-from .cli._sdk import start as start
-from .cli._sdk import start_grpc as start_grpc
+from openllm_cli._sdk import build as build
+from openllm_cli._sdk import import_model as import_model
+from openllm_cli._sdk import list_models as list_models
+from openllm_cli._sdk import start as start
+from openllm_cli._sdk import start_grpc as start_grpc
 from .client import AsyncHTTPClient as AsyncHTTPClient
 from .client import HTTPClient as HTTPClient
 from .entrypoints import mount_entrypoints as mount_entrypoints
@@ -51,4 +51,5 @@ from .protocol import openai as openai
 from .serialisation import ggml as ggml
 from .serialisation import transformers as transformers
 
+cli = _cli
 COMPILED: bool = ...
diff --git a/openllm-python/src/openllm/__main__.py b/openllm-python/src/openllm/__main__.py
index 6721cd106..2babfd90b 100644
--- a/openllm-python/src/openllm/__main__.py
+++ b/openllm-python/src/openllm/__main__.py
@@ -8,6 +8,6 @@
 """
 
 if __name__ == '__main__':
-  from openllm.cli.entrypoint import cli
+  from openllm_cli.entrypoint import cli
 
   cli()
diff --git a/openllm-python/src/openllm/bundle/_package.py b/openllm-python/src/openllm/bundle/_package.py
index 86b61c604..714b7e8e5 100644
--- a/openllm-python/src/openllm/bundle/_package.py
+++ b/openllm-python/src/openllm/bundle/_package.py
@@ -130,7 +130,7 @@ def construct_docker_options(
   container_registry: LiteralContainerRegistry,
   container_version_strategy: LiteralContainerVersionStrategy,
 ) -> DockerOptions:
-  from openllm.cli._factory import parse_config_options
+  from openllm_cli._factory import parse_config_options
 
   environ = parse_config_options(llm.config, llm.config['timeout'], 1.0, None, True, os.environ.copy())
   env_dict = {
diff --git a/openllm-python/src/openllm/cli/__init__.py b/openllm-python/src/openllm_cli/__init__.py
similarity index 100%
rename from openllm-python/src/openllm/cli/__init__.py
rename to openllm-python/src/openllm_cli/__init__.py
diff --git a/openllm-python/src/openllm/cli/_factory.py b/openllm-python/src/openllm_cli/_factory.py
similarity index 100%
rename from openllm-python/src/openllm/cli/_factory.py
rename to openllm-python/src/openllm_cli/_factory.py
diff --git a/openllm-python/src/openllm/cli/_sdk.py b/openllm-python/src/openllm_cli/_sdk.py
similarity index 99%
rename from openllm-python/src/openllm/cli/_sdk.py
rename to openllm-python/src/openllm_cli/_sdk.py
index bcc5d8f35..47e46aaee 100644
--- a/openllm-python/src/openllm/cli/_sdk.py
+++ b/openllm-python/src/openllm_cli/_sdk.py
@@ -189,7 +189,7 @@ def _build(
   Returns:
       ``bentoml.Bento | str``: BentoLLM instance. This can be used to serve the LLM or can be pushed to BentoCloud.
   """
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights
 
   args: list[str] = [
     sys.executable,
diff --git a/openllm-python/src/openllm/cli/entrypoint.py b/openllm-python/src/openllm_cli/entrypoint.py
similarity index 98%
rename from openllm-python/src/openllm/cli/entrypoint.py
rename to openllm-python/src/openllm_cli/entrypoint.py
index d3641890a..60ff86cc5 100644
--- a/openllm-python/src/openllm/cli/entrypoint.py
+++ b/openllm-python/src/openllm_cli/entrypoint.py
@@ -172,7 +172,7 @@ def list_commands(self, ctx: click.Context) -> list[str]:
 
   def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command | None:
     try:
-      mod = __import__(f'openllm.cli.extension.{cmd_name}', None, None, ['cli'])
+      mod = __import__(f'openllm_cli.extension.{cmd_name}', None, None, ['cli'])
     except ImportError:
       return None
     return mod.cli
@@ -345,12 +345,16 @@ def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) ->
           formatter.write_dl(rows)
 
 
+_PACKAGE_NAME = 'openllm'
+
+
 @click.group(cls=OpenLLMCommandGroup, context_settings=termui.CONTEXT_SETTINGS, name='openllm')
 @click.version_option(
   None,
   '--version',
   '-v',
-  message=f'%(prog)s, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
+  package_name=_PACKAGE_NAME,
+  message=f'{_PACKAGE_NAME}, %(version)s (compiled: {openllm.COMPILED})\nPython ({platform.python_implementation()}) {platform.python_version()}',
 )
 def cli() -> None:
   """\b
@@ -421,7 +425,7 @@ def start_command(
   adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
   prompt_template = prompt_template_file.read() if prompt_template_file is not None else None
 
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights
 
   serialisation = t.cast(
     LiteralSerialisation,
@@ -545,7 +549,7 @@ def start_grpc_command(
   adapter_map: dict[str, str] | None = attrs.pop('adapter_map', None)
   prompt_template = prompt_template_file.read() if prompt_template_file is not None else None
 
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights
 
   serialisation = first_not_none(
     serialisation, default='safetensors' if has_safetensors_weights(model_id, model_version) else 'legacy'
@@ -786,7 +790,7 @@ def import_command(
   > only use this option if you want the weight to be quantized by default. Note that OpenLLM also
   > support on-demand quantisation during initial startup.
   """
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm.serialisation.transformers.weights import has_safetensors_weights
 
   if model_id in openllm.CONFIG_MAPPING:
     _model_name = model_id
@@ -971,8 +975,8 @@ def build_command(
   > To build the bento with compiled OpenLLM, make sure to prepend HATCH_BUILD_HOOKS_ENABLE=1. Make sure that the deployment
   > target also use the same Python version and architecture as build machine.
   """
-  from .._llm import normalise_model_name
-  from ..serialisation.transformers.weights import has_safetensors_weights
+  from openllm._llm import normalise_model_name
+  from openllm.serialisation.transformers.weights import has_safetensors_weights
 
   if model_id in openllm.CONFIG_MAPPING:
     _model_name = model_id
@@ -1402,7 +1406,7 @@ def query_command(
     raise click.ClickException("'grpc' is currently disabled.")
   _memoized = {k: orjson.loads(v[0]) for k, v in _memoized.items() if v}
   # TODO: grpc support
-  client = openllm.client.HTTPClient(address=endpoint, timeout=timeout)
+  client = openllm.HTTPClient(address=endpoint, timeout=timeout)
   input_fg, generated_fg = 'magenta', 'cyan'
 
   if stream:
diff --git a/openllm-python/src/openllm/cli/extension/__init__.py b/openllm-python/src/openllm_cli/extension/__init__.py
similarity index 100%
rename from openllm-python/src/openllm/cli/extension/__init__.py
rename to openllm-python/src/openllm_cli/extension/__init__.py
diff --git a/openllm-python/src/openllm/cli/extension/build_base_container.py b/openllm-python/src/openllm_cli/extension/build_base_container.py
similarity index 93%
rename from openllm-python/src/openllm/cli/extension/build_base_container.py
rename to openllm-python/src/openllm_cli/extension/build_base_container.py
index 43c687b29..4cd444f98 100644
--- a/openllm-python/src/openllm/cli/extension/build_base_container.py
+++ b/openllm-python/src/openllm_cli/extension/build_base_container.py
@@ -6,9 +6,9 @@
 
 import openllm
 
-from openllm.cli import termui
-from openllm.cli._factory import container_registry_option
-from openllm.cli._factory import machine_option
+from openllm_cli import termui
+from openllm_cli._factory import container_registry_option
+from openllm_cli._factory import machine_option
 
 
 if t.TYPE_CHECKING:
diff --git a/openllm-python/src/openllm/cli/extension/dive_bentos.py b/openllm-python/src/openllm_cli/extension/dive_bentos.py
similarity index 91%
rename from openllm-python/src/openllm/cli/extension/dive_bentos.py
rename to openllm-python/src/openllm_cli/extension/dive_bentos.py
index b944dff08..3cdd06782 100644
--- a/openllm-python/src/openllm/cli/extension/dive_bentos.py
+++ b/openllm-python/src/openllm_cli/extension/dive_bentos.py
@@ -12,9 +12,9 @@
 import bentoml
 
 from bentoml._internal.configuration.containers import BentoMLContainer
-from openllm.cli import termui
-from openllm.cli._factory import bento_complete_envvar
-from openllm.cli._factory import machine_option
+from openllm_cli import termui
+from openllm_cli._factory import bento_complete_envvar
+from openllm_cli._factory import machine_option
 
 
 if t.TYPE_CHECKING:
diff --git a/openllm-python/src/openllm/cli/extension/get_containerfile.py b/openllm-python/src/openllm_cli/extension/get_containerfile.py
similarity index 96%
rename from openllm-python/src/openllm/cli/extension/get_containerfile.py
rename to openllm-python/src/openllm_cli/extension/get_containerfile.py
index 20e824aae..783f3dea8 100644
--- a/openllm-python/src/openllm/cli/extension/get_containerfile.py
+++ b/openllm-python/src/openllm_cli/extension/get_containerfile.py
@@ -12,8 +12,8 @@
 from bentoml._internal.bento.build_config import DockerOptions
 from bentoml._internal.configuration.containers import BentoMLContainer
 from bentoml._internal.container.generate import generate_containerfile
-from openllm.cli import termui
-from openllm.cli._factory import bento_complete_envvar
+from openllm_cli import termui
+from openllm_cli._factory import bento_complete_envvar
 from openllm_core.utils import converter
 
 
diff --git a/openllm-python/src/openllm/cli/extension/get_prompt.py b/openllm-python/src/openllm_cli/extension/get_prompt.py
similarity index 97%
rename from openllm-python/src/openllm/cli/extension/get_prompt.py
rename to openllm-python/src/openllm_cli/extension/get_prompt.py
index 88a667034..23f5c41e3 100644
--- a/openllm-python/src/openllm/cli/extension/get_prompt.py
+++ b/openllm-python/src/openllm_cli/extension/get_prompt.py
@@ -12,8 +12,8 @@
 import openllm
 import openllm_core
 
-from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar
+from openllm_cli import termui
+from openllm_cli._factory import model_complete_envvar
 from openllm_core.prompts import process_prompt
 
 
diff --git a/openllm-python/src/openllm/cli/extension/list_bentos.py b/openllm-python/src/openllm_cli/extension/list_bentos.py
similarity index 97%
rename from openllm-python/src/openllm/cli/extension/list_bentos.py
rename to openllm-python/src/openllm_cli/extension/list_bentos.py
index a271736b5..0ba3a858a 100644
--- a/openllm-python/src/openllm/cli/extension/list_bentos.py
+++ b/openllm-python/src/openllm_cli/extension/list_bentos.py
@@ -8,7 +8,7 @@
 import openllm
 
 from bentoml._internal.utils import human_readable_size
-from openllm.cli import termui
+from openllm_cli import termui
 
 
 @click.command('list_bentos', context_settings=termui.CONTEXT_SETTINGS)
diff --git a/openllm-python/src/openllm/cli/extension/list_models.py b/openllm-python/src/openllm_cli/extension/list_models.py
similarity index 91%
rename from openllm-python/src/openllm/cli/extension/list_models.py
rename to openllm-python/src/openllm_cli/extension/list_models.py
index 57610adff..0898b00c7 100644
--- a/openllm-python/src/openllm/cli/extension/list_models.py
+++ b/openllm-python/src/openllm_cli/extension/list_models.py
@@ -9,9 +9,9 @@
 import openllm
 
 from bentoml._internal.utils import human_readable_size
-from openllm.cli import termui
-from openllm.cli._factory import model_complete_envvar
-from openllm.cli._factory import model_name_argument
+from openllm_cli import termui
+from openllm_cli._factory import model_complete_envvar
+from openllm_cli._factory import model_name_argument
 
 
 if t.TYPE_CHECKING:
diff --git a/openllm-python/src/openllm/cli/extension/playground.py b/openllm-python/src/openllm_cli/extension/playground.py
similarity index 99%
rename from openllm-python/src/openllm/cli/extension/playground.py
rename to openllm-python/src/openllm_cli/extension/playground.py
index d51c15eab..ff59f0555 100644
--- a/openllm-python/src/openllm/cli/extension/playground.py
+++ b/openllm-python/src/openllm_cli/extension/playground.py
@@ -14,7 +14,7 @@
 import yaml
 
 from openllm import playground
-from openllm.cli import termui
+from openllm_cli import termui
 from openllm_core.utils import is_jupyter_available
 from openllm_core.utils import is_jupytext_available
 from openllm_core.utils import is_notebook_available
diff --git a/openllm-python/src/openllm/cli/termui.py b/openllm-python/src/openllm_cli/termui.py
similarity index 100%
rename from openllm-python/src/openllm/cli/termui.py
rename to openllm-python/src/openllm_cli/termui.py
diff --git a/tools/dependencies.py b/tools/dependencies.py
index cd09d9025..f0ed6e09a 100755
--- a/tools/dependencies.py
+++ b/tools/dependencies.py
@@ -10,14 +10,17 @@
 
 from ghapi.all import GhApi
 
+
 if t.TYPE_CHECKING:
-  from tomlkit.items import Array, Table
+  from tomlkit.items import Array
+  from tomlkit.items import Table
 
 ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, os.path.join(ROOT, 'openllm-python', 'src'))
 
 import openllm
 
+
 _OWNER, _REPO = 'bentoml', 'openllm'
 
 
@@ -292,15 +295,15 @@ def keywords() -> Array:
 
 def build_cli_extensions() -> Table:
   table = tomlkit.table()
-  ext: dict[str, str] = {'openllm': 'openllm.cli.entrypoint:cli'}
+  ext: dict[str, str] = {'openllm': 'openllm_cli.entrypoint:cli'}
   ext.update(
     {
-      f'openllm-{inflection.dasherize(ke)}': f'openllm.cli.extension.{ke}:cli'
+      f'openllm-{inflection.dasherize(ke)}': f'openllm_cli.extension.{ke}:cli'
       for ke in sorted(
         [
           fname[:-3]
           for fname in os.listdir(
-            os.path.abspath(os.path.join(ROOT, 'openllm-python', 'src', 'openllm', 'cli', 'extension'))
+            os.path.abspath(os.path.join(ROOT, 'openllm-python', 'src', 'openllm_cli', 'extension'))
           )
           if fname.endswith('.py') and not fname.startswith('__')
         ]