Skip to content

Commit

Permalink
chore: update jupyter notebooks with new API (#623)
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
  • Loading branch information
aarnphm authored Nov 13, 2023
1 parent de04de7 commit e77a7fb
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 37 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ repos:
alias: r
verbose: true
args: [--exit-non-zero-on-fix, --show-fixes]
types_or: [python, pyi, jupyter]
- id: ruff-format
alias: rf
verbose: true
types_or: [python, pyi, jupyter]
- repo: local
hooks:
- id: mypy
Expand Down
64 changes: 27 additions & 37 deletions examples/openllm-llama2-demo/openllm_llama2_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"import psutil\n",
"import torch\n",
"\n",
"\n",
"ram = psutil.virtual_memory()\n",
"ram_total = ram.total / (1024**3)\n",
"print('MemTotal: %.2f GB' % ram_total)\n",
Expand Down Expand Up @@ -344,31 +345,8 @@
"source": [
"import openllm\n",
"\n",
"llm = openllm.LLM('meta-llama/Llama-2-7b-chat-hf', backend='vllm') # NOTE: You don't have to pass 'backend' if vllm is available in the environment"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6slh7cFpwohd"
},
"source": [
"You can manually trigger the model download if it is not downloaded automatically."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ukY2Cugr8tBD",
"outputId": "bba81848-865b-4286-b052-542e556d3c4b"
},
"outputs": [],
"source": [
"llm.save_pretrained()"
"\n",
"llm = openllm.LLM('meta-llama/Llama-2-7b-chat-hf', backend='vllm')"
]
},
{
Expand All @@ -393,8 +371,10 @@
"outputs": [],
"source": [
"import asyncio\n",
"\n",
"import nest_asyncio\n",
"\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"\n",
Expand Down Expand Up @@ -457,8 +437,10 @@
"outputs": [],
"source": [
"import gc\n",
"\n",
"import torch\n",
"\n",
"\n",
"torch.cuda.empty_cache()\n",
"gc.collect()"
]
Expand Down Expand Up @@ -498,7 +480,7 @@
},
"outputs": [],
"source": [
"!nohup openllm start llama --model-id NousResearch/llama-2-7b-chat-hf --port 8001 --backend vllm > openllm.log 2>&1 &"
"!nohup openllm start meta-llama/Llama-2-7b-chat-hf --port 8001 --backend vllm > openllm.log 2>&1 &"
]
},
{
Expand Down Expand Up @@ -596,6 +578,7 @@
"source": [
"import sys\n",
"\n",
"\n",
"if 'google.colab' in sys.modules:\n",
" # using colab proxy URL\n",
" from google.colab.output import eval_js\n",
Expand All @@ -622,13 +605,14 @@
"source": [
"import openllm\n",
"\n",
"\n",
"# sync API\n",
"client = openllm.client.HTTPClient('http://127.0.0.1:8001', timeout=120)\n",
"client = openllm.HTTPClient('http://127.0.0.1:8001', timeout=120)\n",
"res = client.generate('What is the weight of the earth?', max_new_tokens=8192)\n",
"\n",
"# Async API\n",
"# async_client = openllm.client.AsyncHTTPClient(\"http://127.0.0.1:8001\", timeout=120)\n",
"# res = await async_client.query(\"what is the weight of the earth?\", max_new_tokens=8192)\n",
"# async_client = openllm.AsyncHTTPClient(\"http://127.0.0.1:8001\", timeout=120)\n",
"# res = await async_client.generate(\"what is the weight of the earth?\", max_new_tokens=8192)\n",
"print(res.outputs[0].text)"
]
},
Expand Down Expand Up @@ -665,7 +649,8 @@
" 'http://127.0.0.1:8001/v1/generate_stream' \\\n",
" -H 'accept: text/event-stream' \\\n",
" -H 'Content-Type: application/json' \\\n",
" -d '{\"prompt\":\"write a tagline for an ice cream shop\\n\", \"llm_config\": {\"max_new_tokens\": 256}}'"
" -d '{\"prompt\":\"write a tagline for an ice cream shop\n",
"\", \"llm_config\": {\"max_new_tokens\": 256}}'"
]
},
{
Expand All @@ -687,13 +672,14 @@
"source": [
"import openai\n",
"\n",
"\n",
"openai.api_base = 'http://localhost:8001/v1'\n",
"openai.api_key = 'na'\n",
"\n",
"response = openai.Completion.create(model='llama2', prompt='Say this is a test')\n",
"response = openai.Completion.create(model='meta-llama--Llama-2-7b-chat-hf', prompt='Say this is a test')\n",
"print(response)\n",
"\n",
"chatCompletion = openai.ChatCompletion.create(model='llama2', messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}])\n",
"chatCompletion = openai.ChatCompletion.create(model='meta-llama--Llama-2-7b-chat-hf', messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Hello!'}])\n",
"print(chatCompletion)"
]
},
Expand All @@ -718,6 +704,7 @@
"source": [
"from langchain.llms import OpenLLM\n",
"\n",
"\n",
"llm = OpenLLM(server_url='http://localhost:8001')"
]
},
Expand All @@ -729,8 +716,9 @@
},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"from langchain.prompts import PromptTemplate\n",
"\n",
"\n",
"template = 'What is a good name for a company that makes {product}?'\n",
"\n",
Expand Down Expand Up @@ -815,7 +803,7 @@
"Use OpenLLM to build the model into a standardized distribution unit in BentoML, also known as a Bento. Command:\n",
"\n",
"```\n",
"openllm build llama --model-id {model-id} --backend [pt|vllm]\n",
"openllm build {model-id} --backend [pt|vllm]\n",
"```"
]
},
Expand All @@ -831,7 +819,7 @@
},
"outputs": [],
"source": [
"!openllm build llama --model-id NousResearch/llama-2-7b-chat-hf --backend vllm"
"!openllm build NousResearch/llama-2-7b-chat-hf --backend vllm"
]
},
{
Expand Down Expand Up @@ -924,9 +912,11 @@
"outputs": [],
"source": [
"###@title Alternatively, use the BentoML client to create a Deployment.\n",
"import bentoml\n",
"import json\n",
"\n",
"import bentoml\n",
"\n",
"\n",
"return_code = !bentoml cloud list-context\n",
"if 'colab-user' not in ''.join(return_code):\n",
" print('please login first!')\n",
Expand All @@ -943,7 +933,7 @@
"\n",
" res = client.deployment.create(\n",
" deployment_name='test-llama2',\n",
" bento='nousresearch--llama-2-7b-chat-hf-service:37892f30c23786c0d5367d80481fa0d9fba93cf8',\n",
" bento='meta-llama--llama-2-7b-chat-hf-service:37892f30c23786c0d5367d80481fa0d9fba93cf8',\n",
" context='colab-user',\n",
" cluster_name='default',\n",
" # mode=\"deployment\",\n",
Expand Down

0 comments on commit e77a7fb

Please sign in to comment.