add deploy example

Lightning-AI · Apr 11, 2024 · 876b1b7 · 876b1b7
1 parent 88f6574
commit 876b1b7
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -127,6 +127,66 @@ litgpt chat \
 
 &nbsp;
 
+### Deploy an LLM
+
+Deploy a LitGPT model using [LitServe](https://github.com/Lightning-AI/litserve):
+
+```bash
+# pip install litserve
+
+import litserve as ls
+from litgpt.generate.base import main
+from functools import partial
+from pathlib import Path
+
+
+# STEP 1: DEFINE YOUR MODEL API
+class SimpleAPIForLitGPT(ls.LitAPI):
+    def setup(self, device):
+        # Setup the model so it can be called in `predict`.
+        self.generate = partial(
+            main,
+            top_k=200,
+            temperature=0.8,
+            checkpoint_dir=Path("litgpt/checkpoints/microsoft/phi-2"),
+            precision="bf16-true",
+            quantize=None,
+            compile=False
+        )
+
+    def decode_request(self, request):
+        # Convert the request payload to your model input.
+        return request["input"]
+
+    def predict(self, x):
+        # Run the model on the input and return the output.
+        return self.generate(prompt=x)
+
+    def encode_response(self, output):
+        # Convert the model output to a response payload.
+        return {"output": output}
+
+# STEP 2: START THE SERVER
+api = SimpleAPIForLitGPT()
+server = ls.LitServer(api, accelerator="gpu")
+server.run(port=8000)
+```
+
+In a new Python session:
+
+```python
+# STEP 3: USE THE SERVER
+import requests
+
+response = requests.post(
+  "http://127.0.0.1:8000/predict",
+  json={"prompt": "Fix typos in the following sentence: Exampel input"}
+)
+print(response.content)
+```
+
+&nbsp;
+
 > [!NOTE]
 > **[Read the full docs](tutorials/0_to_litgpt.md)**.
 

diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py
@@ -9,7 +9,10 @@
 import torch
 from lightning.fabric.plugins import BitsandbytesPrecision
 
-from litgpt import GPT, Config, PromptStyle, Tokenizer
+from litgpt.model import GPT
+from litgpt.config import Config
+from litgpt.prompts import PromptStyle
+from litgpt.tokenizer import Tokenizer
 from litgpt.generate.base import next_token
 from litgpt.prompts import has_prompt_style, load_prompt_style
 from litgpt.scripts.merge_lora import merge_lora

diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py
@@ -11,7 +11,10 @@
 import torch._inductor.config
 from lightning.fabric.plugins import BitsandbytesPrecision
 
-from litgpt import GPT, Config, PromptStyle, Tokenizer
+from litgpt.model import GPT
+from litgpt.config import Config
+from litgpt.prompts import PromptStyle
+from litgpt.tokenizer import Tokenizer
 from litgpt.prompts import has_prompt_style, load_prompt_style
 from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint