diff --git a/README.md b/README.md index 8e14f547aa..1a20d3c0d4 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,66 @@ litgpt chat \   +### Deploy an LLM + +Deploy a LitGPT model using [LitServe](https://github.com/Lightning-AI/litserve): + +```bash +# pip install litserve + +import litserve as ls +from litgpt.generate.base import main +from functools import partial +from pathlib import Path + + +# STEP 1: DEFINE YOUR MODEL API +class SimpleAPIForLitGPT(ls.LitAPI): + def setup(self, device): + # Setup the model so it can be called in `predict`. + self.generate = partial( + main, + top_k=200, + temperature=0.8, + checkpoint_dir=Path("litgpt/checkpoints/microsoft/phi-2"), + precision="bf16-true", + quantize=None, + compile=False + ) + + def decode_request(self, request): + # Convert the request payload to your model input. + return request["input"] + + def predict(self, x): + # Run the model on the input and return the output. + return self.generate(prompt=x) + + def encode_response(self, output): + # Convert the model output to a response payload. + return {"output": output} + +# STEP 2: START THE SERVER +api = SimpleAPIForLitGPT() +server = ls.LitServer(api, accelerator="gpu") +server.run(port=8000) +``` + +In a new Python session: + +```python +# STEP 3: USE THE SERVER +import requests + +response = requests.post( + "http://127.0.0.1:8000/predict", + json={"prompt": "Fix typos in the following sentence: Exampel input"} +) +print(response.content) +``` + +  + > [!NOTE] > **[Read the full docs](tutorials/0_to_litgpt.md)**. diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py index 7f2afc8f19..47c5152b18 100644 --- a/litgpt/chat/base.py +++ b/litgpt/chat/base.py @@ -9,7 +9,10 @@ import torch from lightning.fabric.plugins import BitsandbytesPrecision -from litgpt import GPT, Config, PromptStyle, Tokenizer +from litgpt.model import GPT +from litgpt.config import Config +from litgpt.prompts import PromptStyle +from litgpt.tokenizer import Tokenizer from litgpt.generate.base import next_token from litgpt.prompts import has_prompt_style, load_prompt_style from litgpt.scripts.merge_lora import merge_lora diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py index 6488717429..1f91ef364e 100644 --- a/litgpt/generate/base.py +++ b/litgpt/generate/base.py @@ -11,7 +11,10 @@ import torch._inductor.config from lightning.fabric.plugins import BitsandbytesPrecision -from litgpt import GPT, Config, PromptStyle, Tokenizer +from litgpt.model import GPT +from litgpt.config import Config +from litgpt.prompts import PromptStyle +from litgpt.tokenizer import Tokenizer from litgpt.prompts import has_prompt_style, load_prompt_style from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint