diff --git a/.coveragerc b/.coveragerc index 1041c77..0029af9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,3 +1,4 @@ [run] omit = + lanarky/clients.py lanarky/adapters/*/__init__.py diff --git a/docs/getting-started.md b/docs/getting-started.md index 2aa094f..1d3348b 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -3,22 +3,18 @@ hide: - navigation --- -This is a quick tutorial on getting started with Lanarky. +Let's build our first LLM microservice with Lanarky! -We will use LangChain as the LLM tooling framework and OpenAI as the LLM provider to -build our first LLM microservice. - -## Install Dependencies +We need to first install some extra dependencies as we will use OpenAI as the LLM +provider. <!-- termynal --> ``` -$ pip install lanarky[langchain,openai] +$ pip install lanarky[openai] ``` -## Example - -We will use the `ConversationChain` from LangChain library to build our first LLM microservice. +## Application !!! info @@ -28,27 +24,26 @@ We will use the `ConversationChain` from LangChain library to build our first LL ```python import os -from langchain.chains import ConversationChain -from langchain.chat_models import ChatOpenAI - from lanarky import Lanarky -from lanarky.adapters.langchain.routing import LangchainAPIRouter +from lanarky.adapters.openai.resources import ChatCompletionResource +from lanarky.adapters.openai.routing import OpenAIAPIRouter os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" app = Lanarky() -langchain_router = LangchainAPIRouter() +router = OpenAIAPIRouter() -@langchain_router.post("/chat") -def chat(streaming: bool = True) -> ConversationChain: - return ConversationChain(llm=ChatOpenAI(streaming=streaming)) +@router.post("/chat") +def chat(stream: bool = True) -> ChatCompletionResource: + system = "You are a sassy assistant" + return ChatCompletionResource(stream=stream, system=system) -app.include_router(langchain_router) +app.include_router(router) ``` -Run the application: +Run application: <!-- termynal --> @@ -57,56 +52,81 @@ $ pip install uvicorn $ uvicorn app:app --reload ``` -View the Swagger docs at [http://localhost:8000/docs](http://localhost:8000/docs). +!!! tip -## Testing + Swagger docs will be available at [http://localhost:8000/docs](http://localhost:8000/docs). -<!-- termynal --> +## Client -``` -$ pip install httpx-sse -``` +Now that the application script is running, we will setup a client script for testing. -Create `client.py` script: +Create `client.py`: ```python import click -import httpx -from httpx_sse import connect_sse + +from lanarky.clients import StreamingClient @click.command() @click.option("--input", required=True) -@click.option("--streaming", is_flag=True) -def main(input: str, streaming: bool): - url = f"http://localhost:8000/chat?streaming={str(streaming).lower()}" - with httpx.Client() as client: - with connect_sse( - client, - "POST", - url, - json={"input": input}, - ) as event_source: - for sse in event_source.iter_sse(): - print(sse.event, sse.data) +@click.option("--stream", is_flag=True) +def main(input: str, stream: bool): + client = StreamingClient() + for event in client.stream_response( + "POST", + "/chat", + params={"stream": str(stream).lower()}, + json={"messages": [dict(role="user", content=input)]}, + ): + print(f"{event.event}: {event.data}") if __name__ == "__main__": main() ``` -Stream output: +Since we have exposed only `stream` as the query parameter, we can test 2 scenarios: + +1. Recieve output as it is generated: <!-- termynal --> ``` -$ python client.py --input hi --streaming +$ python client.py --input "hi" --stream +completion: +completion: Well +completion: , +completion: hello +completion: there +completion: ! +completion: How +completion: can +completion: I +completion: sass +completion: ... +completion: I +completion: mean +completion: assist +completion: you +completion: today +completion: ? ``` -Recieve all output at once: +2. Recieve all output at once: <!-- termynal --> ``` -$ python client.py --input hi +$ python client.py --input "hi" +completion: Oh, hello there! What can I sass...I mean assist you with today? ``` + +## Next Steps + +Congrats on building your first LLM microservice with Lanarky! + +Now that you have a basic understanding of how Lanarky works, let's learn more about +the core concepts of Lanarky. + +[Let's Learn!](./learn/index.md){ .md-button .md-button--primary } diff --git a/docs/learn/adapters/index.md b/docs/learn/adapters/index.md new file mode 100644 index 0000000..519a7c6 --- /dev/null +++ b/docs/learn/adapters/index.md @@ -0,0 +1,14 @@ +# Adapters + +The **Adapters API** allows Lanarky users to build microservices using popular LLM frameworks. + +We will cover the following adapters in depth: + +- [OpenAI](./openai/index.md): build microservices using the + [OpenAI Python SDK](https://platform.openai.com/docs/api-reference?lang=python) +- [LangChain](./langchain/index.md): build microservices using the + [LangChain](https://www.langchain.com/) + +!!! note "Note from Author" + + The **Adapters API** is still in active development. I will add more adapters in the future. diff --git a/docs/learn/adapters/langchain/callbacks.md b/docs/learn/adapters/langchain/callbacks.md new file mode 100644 index 0000000..87924a9 --- /dev/null +++ b/docs/learn/adapters/langchain/callbacks.md @@ -0,0 +1,48 @@ +Lanarky offers a collection of callback handlers for LangChain. These callback +handlers are useful in executing intermediate callback events related to your LangChain +microservice. + +Lanarky offers callback handlers for both streaming and WebSockets. We will take a look at +both of them in this guide. + +!!! note + + All callback handlers can be imported from the `lanarky.adapters.langchain.callbacks` + module. + +## Tokens + +- `TokenStreamingCallbackHandler`: handles streaming of the intermediate tokens over HTTP +- `TokenWebSocketCallbackHandler`: handles streaming of the intermediate tokens over WebSockets + +Both callback handlers offer token streaming in two modes: `text` and `json`. In `text` mode, +the callback handlers will use raw token string as event data. In `json` mode, the callback +handlers will use a JSON object containing the token string as event data. + +These callback handlers are useful for all chains where the `llm` component supports streaming. + +## Source Documents + +- `SourceDocumentStreamingCallbackHandler`: handles streaming of the source documents + over HTTP +- `SourceDocumentWebSocketCallbackHandler`: handles streaming of the source documents + over WebSockets + +The source documents are sent at the end of a chain execution as a `source_documents` event. + +These callback handlers are useful for retrieval-based chains like `RetrievalQA`. + +## Agents + +- `FinalTokenStreamingCallbackHandler`: handles streaming of the final answer tokens over HTTP +- `FinalTokenWebSocketCallbackHandler`: handles streaming of the final answer tokens over WebSockets + +Both callback handlers are extension of the token streaming callback handlers where the tokens are +streamed only when the LLM agent has reached the final step of its execution. + +These callback handlers are useful for all agent types like `ZeroShotAgent`. + +!!! note + + The callback handlers also inherit some functionality of the `FinalStreamingStdOutCallbackHandler` + callback handler. Check out [LangChain Docs](https://api.python.langchain.com/en/latest/callbacks/langchain.callbacks.streaming_stdout_final_only.FinalStreamingStdOutCallbackHandler.html) to know more. diff --git a/docs/learn/adapters/langchain/dependency.md b/docs/learn/adapters/langchain/dependency.md new file mode 100644 index 0000000..ac878ba --- /dev/null +++ b/docs/learn/adapters/langchain/dependency.md @@ -0,0 +1,103 @@ +--- +hide: + - toc +--- + +FastAPI offers a powerful [Dependency Injection](https://fastapi.tiangolo.com/tutorial/dependencies/) +system that allows you to inject dependencies into your API endpoints. Lanarky extends this functionality +by offering LangChain as a dependency. + +!!! example "Experimental" + + LLM-based dependency injection is an experimental feature. We will add more functionality + based on community feedback and viable use cases. If you have ideas or suggestions, we + would love to hear from you. Feel free to open an issue on + [GitHub](https://github.com/ajndkr/lanarky/issues/new/choose). + +Let's take a look at how we can use LangChain as a dependency. + +```python +import os + +from langchain.chains import LLMChain +from langchain.chat_models import ChatOpenAI +from langchain.prompts import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + PromptTemplate, +) + +from lanarky import Lanarky +from lanarky.adapters.langchain.dependencies import Depends + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() + + +def chain_factory(temperature: float = 0.0, verbose: bool = False) -> LLMChain: + return LLMChain( + llm=ChatOpenAI(temperature=temperature), + prompt=ChatPromptTemplate.from_messages( + [ + HumanMessagePromptTemplate( + prompt=PromptTemplate.from_template("Respond in JSON: {input}") + ), + ] + ), + verbose=verbose, + ) + + +@app.post("/") +async def endpoint(outputs: dict = Depends(chain_factory)): + return outputs["text"] +``` + +In the above example, we pass `chain_factory` as a dependency to the endpoint. The endpoint +exposes the dependency function arguments as query parameters. This allows us to configure +the dependency at runtime. + +To test the above endpoint, let's create a client script: + +```python +import click +import httpx + + +@click.command() +@click.option("--input", required=True) +def main(input: str): + url = "http://localhost:8000/" + + with httpx.Client() as client: + response = client.post(url, json={"input": input}) + if response.status_code == 200: + data = response.json() + print(f"Received: {data}") + else: + print(response.text) + + +if __name__ == "__main__": + main() +``` + +First, start the server: + +```bash +uvicorn app:app +``` + +Then, run the client script: + +<!-- termynal --> + +``` +$ python client.py --input "Who won the world series in 2020?" +Received: { + "team": "Los Angeles Dodgers", + "year": 2020 +} +``` diff --git a/docs/learn/adapters/langchain/fastapi.md b/docs/learn/adapters/langchain/fastapi.md new file mode 100644 index 0000000..f50c003 --- /dev/null +++ b/docs/learn/adapters/langchain/fastapi.md @@ -0,0 +1,133 @@ +Lanarky is built on top of FastAPI and offers backwards compatibility with all FastAPI features. +Nonetheless, if your project uses FastAPI and Lanarky is not a drop-in replacement, you can still +the low-level Lanarky modules to build your microservice. + +We will use the examples from the [LangChain API Router](./router.md) guide to demonstrate how to +use the low-level modules as well as understand how the router works under the hood. + +## Streaming + +LangChain adapter extends the `StreamingResponse` class to support streaming for LangChain microservices. + +!!! note + + Before you start, make sure you have read the [Streaming](../../streaming.md) and + [LangChain API Router](./router.md) guides. + +```python +import os + +from fastapi import Depends +from langchain.chains import ConversationChain +from langchain.chat_models import ChatOpenAI +from pydantic import BaseModel + +from lanarky import Lanarky +from lanarky.adapters.langchain.callbacks import TokenStreamingCallbackHandler +from lanarky.adapters.langchain.responses import StreamingResponse + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() + + +class ChatInput(BaseModel): + input: str + + +def chain_factory( + temperature: float = 0.0, verbose: bool = False, streaming: bool = True +) -> ConversationChain: + return ConversationChain( + llm=ChatOpenAI(temperature=temperature, streaming=streaming), + verbose=verbose, + ) + + +@app.post("/chat") +async def chat( + request: ChatInput, + chain: ConversationChain = Depends(chain_factory) +): + return StreamingResponse( + chain=chain, + config={ + "inputs": request.model_dump(), + "callbacks": [ + TokenStreamingCallbackHandler(output_key=chain.output_key), + ], + }, + ) +``` + +The `/chat` endpoint is similar to the one we created using `LangChainAPIRouter` in the +[LangChain API Router](./router.md) guide. Besides the `StreamingResponse` class, we also use +the `TokenStreamingCallbackHandler` callback handler to stream the intermediate tokens back to +the client. Check out [Callbacks](../../callbacks.md) to learn more about the lanarky callback +handlers. + +!!! tip + + You can use the same client script from the [LangChain API Router](./router.md) guide to test + the above example. + +## Websockets + +In addition to streaming, LangChain adapter also supports websockets. Let's take a look at how we can +build an LangChain microservice using websockets. + +```python + +import os + +from fastapi import Depends +from langchain.chains import ConversationChain +from langchain.chat_models import ChatOpenAI +from pydantic import BaseModel + +from lanarky import Lanarky +from lanarky.adapters.langchain.callbacks import TokenWebSocketCallbackHandler +from lanarky.events import Events +from lanarky.websockets import WebSocket, WebsocketSession + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() + + +class ChatInput(BaseModel): + input: str + + +def chain_factory() -> ConversationChain: + return ConversationChain(llm=ChatOpenAI(streaming=True)) + + +@app.websocket("/ws") +async def ws( + websocket: WebSocket, + chain: ConversationChain = Depends(chain_factory) +): + async with WebsocketSession().connect(websocket) as session: + async for data in session: + await chain.acall( + inputs=ChatInput(**data).model_dump(), + callbacks=[ + TokenWebSocketCallbackHandler( + websocket=websocket, output_key=chain.output_key + ) + ], + ) + await websocket.send_json(dict(data="", event=Events.END)) +``` + +In this example, we use the `WebsocketSession` context manager to connect to the websocket +and communicate with the client. We pass the client data to the `ConversationChain` and stream +the response back to the client. + +!!! tip + + Similar to the streaming example, you can use the same client script from the + [LangChain API Router](./router.md) guide to test the websocket example. diff --git a/docs/learn/adapters/langchain/index.md b/docs/learn/adapters/langchain/index.md new file mode 100644 index 0000000..624cba2 --- /dev/null +++ b/docs/learn/adapters/langchain/index.md @@ -0,0 +1,28 @@ +# LangChain Adapter + +The **LangChain Adapter** allows Lanarky users to build microservices using the +[LangChain](https://www.langchain.com/) framework. + +To enable this adapter, install lanarky with extra dependencies: + +<!-- termynal --> + +``` +$ pip install lanarky[langchain] +``` + +!!! tip + + LangChain is an LLM tooling framework to construct LLM chains and agents using + LLM providers such OpenAI, Anthropic, etc. Visit their [Python SDK](https://python.langchain.com/docs/) + documentation for more information. + +Here's an overview of the supported features: + +- [Langchain API Router](./router.md): Lanarky router for LangChain +- [Callbacks](./callbacks.md): collection of Lanarky callbacks for LangChain + +Additionally, we will cover some advanced topics: + +- [Dependency Injection](./dependency.md): use LangChain as a dependency in your microservice +- [FastAPI Backport](./fastapi.md): low-level modules for FastAPI users diff --git a/docs/learn/adapters/langchain/router.md b/docs/learn/adapters/langchain/router.md new file mode 100644 index 0000000..5c29cd2 --- /dev/null +++ b/docs/learn/adapters/langchain/router.md @@ -0,0 +1,164 @@ +# LangChain API Router + +The `LangChainAPIRouter` class is an abstraction layer which provides a quick and easy +way to build microservices using LangChain. + +Let's understand how to use `LangChainAPIRouter` to build streaming and websocket +endpoints. + +## Streaming + +```python +import os + +from langchain.chains import ConversationChain +from langchain.chat_models import ChatOpenAI + +from lanarky import Lanarky +from lanarky.adapters.langchain.routing import LangchainAPIRouter + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() +router = LangchainAPIRouter() + + +@router.post("/chat") +def chat( + temperature: float = 0.0, verbose: bool = False, streaming: bool = True +) -> ConversationChain: + return ConversationChain( + llm=ChatOpenAI(temperature=temperature, streaming=streaming), + verbose=verbose, + ) + + +app.include_router(router) +``` + +In this example, we use `chat` as a `ConversationChain` factory function and send it +to the router to build a streaming endpoint. The additional parameters such as +`temperature`, `verbose`, and `streaming` are exposed as query parameters. + +To receive the events, we will use the following client script: + +```python +import click + +from lanarky.clients import StreamingClient + + +@click.command() +@click.option("--input", required=True) +@click.option("--stream", is_flag=True) +def main(input: str, stream: bool): + client = StreamingClient() + for event in client.stream_response( + "POST", + "/chat", + params={"streaming": str(stream).lower()}, + json={"input": input}, + ): + print(f"{event.event}: {event.data}") + + +if __name__ == "__main__": + main() +``` + +First run the application server: + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script: + +<!-- termynal --> + +``` +$ python client.py --input "hi" +completion: {'token': 'Hello! How can I assist you today?'} +``` + +## Websocket + +```python +import os + +from langchain.chains import ConversationChain +from langchain.chat_models import ChatOpenAI + +from lanarky import Lanarky +from lanarky.adapters.langchain.routing import LangchainAPIRouter + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() +router = LangchainAPIRouter() + + +@router.websocket("/ws") +def chat() -> ConversationChain: + return ConversationChain(llm=ChatOpenAI(streaming=True), verbose=True) + + +app.include_router(router) +``` + +Similar to the streaming example, we use `chat` as a `ConversationChain` factory +function and send it to the router to build a websocket endpoint. + +To communicate with the server, we will use the following client script: + +```python +from lanarky.clients import WebSocketClient + + +def main(): + client = WebSocketClient() + with client.connect() as session: + while True: + user_input = input("\nEnter a message: ") + session.send(dict(input=user_input)) + print("Received: ", end="") + for chunk in session.stream_response(): + print(chunk["data"]["token"], end="", flush=True) + + +if __name__ == "__main__": + main() +``` + +First run the application server: + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script: + +<!-- termynal --> + +``` +$ python client.py +Enter a message: hi +Received: Hello! How can I assist you today? +Enter a message: i am lanarky +Received: Hello Lanarky! It's nice to meet you. How can I assist +you today? +Enter a message: who am i? +Received: You are Lanarky, as you mentioned earlier. Is there anything +specific you would like to know about yourself? +``` + +!!! note "Note from Author" + + If you want to build more complex logic, I recommend using the low-level modules + to define the endpoint from scratch: [Learn more](./fastapi.md) diff --git a/docs/learn/adapters/openai/dependency.md b/docs/learn/adapters/openai/dependency.md new file mode 100644 index 0000000..0420ea2 --- /dev/null +++ b/docs/learn/adapters/openai/dependency.md @@ -0,0 +1,99 @@ +--- +hide: + - toc +--- + +FastAPI offers a powerful [Dependency Injection](https://fastapi.tiangolo.com/tutorial/dependencies/) +system that allows you to inject dependencies into your API endpoints. Lanarky extends this functionality +by offering OpenAI as a dependency. + +!!! example "Experimental" + + LLM-based dependency injection is an experimental feature. We will add more functionality + based on community feedback and viable use cases. If you have ideas or suggestions, we + would love to hear from you. Feel free to open an issue on + [GitHub](https://github.com/ajndkr/lanarky/issues/new/choose). + +Let's take a look at how we can use OpenAI as a dependency. + +```python +import os + +from lanarky import Lanarky +from lanarky.adapters.openai.dependencies import Depends +from lanarky.adapters.openai.resources import ChatCompletion, ChatCompletionResource + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + +app = Lanarky() + + +def chat_completion_factory() -> ChatCompletionResource: + return ChatCompletionResource( + system="You are a helpful assistant designed to output JSON.", + model="gpt-3.5-turbo-1106", + response_format={"type": "json_object"}, + ) + + +@app.post("/") +async def endpoint(outputs: ChatCompletion = Depends(chat_completion_factory)): + return outputs.choices[0].message.content +``` + +In the above example, we pass `chat_completion_factory` as a dependency to the `POST /` endpoint. +Similar to how FasAPI handles dependencies, you can expose additional parameters by defining arguments +in the `chat_completion_factory` function. For example, if you want to expose the `temperature` parameter, +you can do so by adding a `temperature` argument to the `chat_completion_factory` function. + +```python +def chat_completion_factory(temperature: float = 0.5) -> ChatCompletionResource: + return ChatCompletionResource( + system="You are a helpful assistant designed to output JSON.", + model="gpt-3.5-turbo-1106", + response_format={"type": "json_object"}, + temperature=temperature, + ) +``` + +To test the above endpoint, let's create a client script: + +```python +import click +import httpx + + +@click.command() +@click.option("--input", required=True) +def main(input: str): + url = "http://localhost:8000/" + + with httpx.Client() as client: + response = client.post( + url, json={"messages": [dict(role="user", content=input)]} + ) + if response.status_code == 200: + data = response.json() + print(f"Received: {data}") + + +if __name__ == "__main__": + main() +``` + +First, start the server: + +```bash +uvicorn app:app +``` + +Then, run the client script: + +<!-- termynal --> + +``` +$ python client.py --input "Who won the world series in 2020?" +Received: { + "result": "The Los Angeles Dodgers won the World Series in 2020." +} +``` diff --git a/docs/learn/adapters/openai/fastapi.md b/docs/learn/adapters/openai/fastapi.md new file mode 100644 index 0000000..594c4b9 --- /dev/null +++ b/docs/learn/adapters/openai/fastapi.md @@ -0,0 +1,110 @@ +Lanarky is built on top of FastAPI and offers backwards compatibility with all FastAPI features. +Nonetheless, if your project uses FastAPI and Lanarky is not a drop-in replacement, you can still +the low-level Lanarky modules to build your microservice. + +We will use the examples from the [OpenAI API Router](./router.md) guide to demonstrate how to +use the low-level modules as well as understand how the router works under the hood. + +## Streaming + +OpenAI adapter extends the `StreamingResponse` class to support streaming for OpenAI microservices. + +!!! note + + Before you start, make sure you have read the [Streaming](../../streaming.md) and + [OpenAI API Router](./router.md) guides. + +```python +import os + +from fastapi import Depends +from pydantic import BaseModel + +from lanarky import Lanarky +from lanarky.adapters.openai.resources import ChatCompletionResource, Message +from lanarky.adapters.openai.responses import StreamingResponse + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + +app = Lanarky() + + +class ChatInput(BaseModel): + messages: list[Message] + + +def chat_completion_factory(stream: bool = True) -> ChatCompletionResource: + system = "You are a sassy assistant" + return ChatCompletionResource(system=system, stream=stream) + + +@app.post("/chat") +async def chat( + request: ChatInput, + resource: ChatCompletionResource = Depends(chat_completion_factory), +): + return StreamingResponse(resource=resource, **request.model_dump()) +``` + +The `/chat` endpoint is similar to the one we created using `OpenAIAPIRouter` in the +[OpenAI API Router](./router.md) guide. + +!!! tip + + You can use the same client script from the [OpenAI API Router](./router.md) guide to test + the above example. + +## Websockets + +In addition to streaming, OpenAI adapter also supports websockets. Let's take a look at how we can +build an OpenAI microservice using websockets. + +```python +import os + +from fastapi import Depends +from pydantic import BaseModel + +from lanarky import Lanarky +from lanarky.adapters.openai.resources import ChatCompletionResource, Message +from lanarky.events import Events +from lanarky.websockets import WebSocket, WebsocketSession + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + +app = Lanarky() + + +class ChatInput(BaseModel): + messages: list[Message] + + +def chat_completion_factory() -> ChatCompletionResource: + system = "You are a sassy assistant" + return ChatCompletionResource(system=system, stream=True) + + +@app.websocket("/ws") +async def chat( + websocket: WebSocket, + resource: ChatCompletionResource = Depends(chat_completion_factory), +): + async with WebsocketSession().connect(websocket) as session: + async for data in session: + async for chunk in resource.stream_response( + **ChatInput(**data).model_dump() + ): + await websocket.send_json( + dict(data=chunk, event=Events.COMPLETION) + ) + await websocket.send_json(dict(data="", event=Events.END)) +``` + +In this example, we use the `WebsocketSession` context manager to connect to the websocket +and communicate with the client. We pass the client data to the OpenAI resource and stream +the response back to the client. + +!!! tip + + Similar to the streaming example, you can use the same client script from the + [OpenAI API Router](./router.md) guide to test the websocket example. diff --git a/docs/learn/adapters/openai/index.md b/docs/learn/adapters/openai/index.md new file mode 100644 index 0000000..04b0193 --- /dev/null +++ b/docs/learn/adapters/openai/index.md @@ -0,0 +1,29 @@ +# OpenAI Adapter + +The **OpenAI Adapter** allows Lanarky users to build microservices using the +[OpenAI Python SDK](https://platform.openai.com/docs/api-reference?lang=python). + +To enable this adapter, install lanarky with extra dependencies: + +<!-- termynal --> + +``` +$ pip install lanarky[openai] +``` + +!!! tip + + To use OpenAI, you need to create an openai account and generate an API key. + Visit [openai.com](https://openai.com) for more information. + + To use the generated API key, you need to set the `OPENAI_API_KEY` environment + variable. + +Here's an overview of the supported features: + +- [OpenAI API Router](./router.md): Lanarky router for OpenAI + +Additionally, we will cover some advanced topics: + +- [Dependency Injection](./dependency.md): use OpenAI as a dependency in your microservice +- [FastAPI Backport](./fastapi.md): low-level modules for FastAPI users diff --git a/docs/learn/adapters/openai/router.md b/docs/learn/adapters/openai/router.md new file mode 100644 index 0000000..e9b7755 --- /dev/null +++ b/docs/learn/adapters/openai/router.md @@ -0,0 +1,177 @@ +# OpenAI API Router + +The `OpenAIAPIRouter` class is an abstraction layer which provides a quick and easy +way to build microservices using supported OpenAI models. + +!!! warning + + OpenAI SDK support is currently limited to chat models only (i.e. + GPT-3.5-Turbo, GPT-4 and GPT-4-Turbo). Other models/services will be + added in the future. + +Let's understand how to use `OpenAIAPIRouter` to build streaming and websocket +endpoints. + +## Streaming + +!!! note + + We are using the example from the [Getting Started](../../../getting-started.md) guide. + +```python +import os + +from lanarky import Lanarky +from lanarky.adapters.openai.resources import ChatCompletionResource +from lanarky.adapters.openai.routing import OpenAIAPIRouter + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() +router = OpenAIAPIRouter() + + +@router.post("/chat") +def chat(stream: bool = True) -> ChatCompletionResource: + system = "You are a sassy assistant" + return ChatCompletionResource(stream=stream, system=system) + + +app.include_router(router) +``` + +In this example, `ChatCompletionResource` is a wrapper class to use the OpenAI +Python SDK. `chat` acts as a factory function where we define the parameters of +`ChatCompletionResource` and send it to the router to build the endpoint for us. +The factory function arguments can be used to define query or header parameters +which are exposed to the client. + +To receive the events, we will use the following client script: + +```python +import click + +from lanarky.clients import StreamingClient + + +@click.command() +@click.option("--input", required=True) +@click.option("--stream", is_flag=True) +def main(input: str, stream: bool): + client = StreamingClient() + for event in client.stream_response( + "POST", + "/chat", + params={"stream": str(stream).lower()}, + json={"messages": [dict(role="user", content=input)]}, + ): + print(f"{event.event}: {event.data}") + + +if __name__ == "__main__": + main() +``` + +First run the application server: + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script: + +<!-- termynal --> + +``` +$ python client.py --input "hi" +completion: Oh, hello there! What can I sass...I mean assist +you with today? +``` + +## Websocket + +```python +import os + +from lanarky import Lanarky +from lanarky.adapters.openai.resources import ChatCompletionResource +from lanarky.adapters.openai.routing import OpenAIAPIRouter + +os.environ["OPENAI_API_KEY"] = "add-your-openai-api-key-here" + + +app = Lanarky() +router = OpenAIAPIRouter() + + +@router.websocket("/ws") +def chat() -> ChatCompletionResource: + system = "You are a sassy assistant" + return ChatCompletionResource(stream=True, system=system) + + +app.include_router(router) +``` + +Similar to the streaming example, we use `chat` as a `ChatCompletionResource` +factory function and send it to the router to build a websocket endpoint. + +To communicate with the server, we will use the following client script: + +```python +from lanarky.clients import WebSocketClient + + +def main(): + client = WebSocketClient() + with client.connect() as session: + messages = [] + while True: + user_input = input("\nEnter a message: ") + messages.append(dict(role="user", content=user_input)) + session.send(dict(messages=messages)) + print("Received: ", end="") + assistant_message = dict(role="assistant", content="") + for chunk in session.stream_response(): + print(chunk["data"], end="", flush=True) + assistant_message["content"] += chunk["data"] + messages.append(assistant_message) + + +if __name__ == "__main__": + main() +``` + +First run the application server: + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script: + +<!-- termynal --> + +``` +$ python client.py +Enter a message: hi +Received: Well, hello there! How can I assist you today? +Enter a message: i am lanarky +Received: Oh, aren't you just full of mischief, Lanarky? +What trouble can I help you stir up today? +Enter a message: who am i? +Received: Well, that's a question only you can answer, Lanarky. +But if I had to guess, based on your sassy spirit, I would say you're +someone who loves to dance to the beat of your own drum and has a +mischievous sense of humor. Am I close? +``` + +!!! note "Note from Author" + + If you want to build more complex logic, I recommend using the low-level modules + to define the endpoint from scratch: [Learn more](./fastapi.md) diff --git a/docs/learn/index.md b/docs/learn/index.md new file mode 100644 index 0000000..f898d19 --- /dev/null +++ b/docs/learn/index.md @@ -0,0 +1,14 @@ +--- +hide: + - toc +--- + +# Learn + +Hello user! Here we will learn about the core concepts of Lanarky. + +Here's a quick overview of what we will cover: + +- [Streaming](./streaming.md): build streaming microservices using FastAPI routers +- [Websockets](./websockets.md): build websocket microservices using FastAPI routers +- [Adapters](./adapters/index.md): build microservices using popular LLM frameworks diff --git a/docs/learn/streaming.md b/docs/learn/streaming.md new file mode 100644 index 0000000..bdd9910 --- /dev/null +++ b/docs/learn/streaming.md @@ -0,0 +1,75 @@ +--- +hide: + - toc +--- + +Lanarky uses [Server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events) +to implement streaming support over HTTP. + +## `StreamingResponse` + +The `StreamingResponse` class follows the [`EventSource`](https://developer.mozilla.org/en-US/docs/Web/API/EventSource) +protocol to send events to the client. + +### Example + +To understand how to use `StreamingResponse`, let's look at an example. + +```python +from lanarky import Lanarky +from lanarky.responses import StreamingResponse + +app = Lanarky() + + +@app.get("/") +def index(): + def stream(): + for word in ["Hello", "World!"]: + yield word + + return StreamingResponse(content=stream()) +``` + +Here, we have a simple endpoint streams the message "Hello World!" to the client. +`StreamingResponse` takes a generator function as its content, iterates over it and +sends each item as an event to the client. + +To receive the events, let's build a simple client script. + +```python +from lanarky.clients import StreamingClient + + +def main(): + client = StreamingClient() + for event in client.stream_response("GET", "/"): + print(f"{event.event}: {event.data}") + + +if __name__ == "__main__": + main() +``` + +First run the application server. + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script. + +<!-- termynal --> + +``` +$ python client.py +message: Hello +message: World! +``` + +!!! warning + + The `StreamingResponse` classes inside the **Adapters API** behave differently from the + above example. To learn more, see [Adapters API](./adapters/index.md) documentation. diff --git a/docs/learn/websockets.md b/docs/learn/websockets.md new file mode 100644 index 0000000..1097da1 --- /dev/null +++ b/docs/learn/websockets.md @@ -0,0 +1,79 @@ +--- +hide: + - toc +--- + +[WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) are useful for +LLM microservices which require a bi-directional connection between the client and server. +For example, a chat application would require a WebSocket to hold a persistent connection +between the client and server during an active user session. + +Lanarky builds on top of FastAPI to support LLM microservices over WebSockets. + +## `WebsocketSession` + +The `WebsocketSession` class establishes a WebSocket session inside an endpoint logic to define +the interaction between the client and server. This is particularly useful for building chatbot +applications. + +### Example + +To understand how to use `WebsocketSession`, let's look at an example. + +```python +from lanarky import Lanarky +from lanarky.websockets import WebSocket, WebsocketSession + +app = Lanarky() + + +@app.websocket("/ws") +async def endpoint(websocket: WebSocket): + async with WebsocketSession().connect(websocket) as session: + async for message in session: + await websocket.send_json({"data": message["data"].capitalize()}) +``` + +Here, we have a simple websocket endpoint which capitalizes the message sent by the client. +We use the `WebsocketSession` class to establish a session with the client. The session +allows us to send and receive messages from the client. + +To receive the events, let's build a simple client script. + +```python +from lanarky.clients import WebSocketClient + + +def main(): + client = WebSocketClient(uri="ws://localhost:8001/ws") + with client.connect() as session: + while True: + user_input = input("Enter a message: ") + session.send(dict(data=user_input)) + response = session.receive() + print(f"Received: {response}") + + +if __name__ == "__main__": + main() +``` + +First run the application server. + +<!-- termynal --> + +``` +$ uvicorn app:app +``` + +Then run the client script. + +<!-- termynal --> + +``` +$ python client.py +Enter a message: hi +Received: {'data': 'Hi'} +Enter a message: hola +Received: {'data': 'Hola'} +``` diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 5409314..db82495 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -47,3 +47,11 @@ [data-md-color-scheme="slate"] img[src$="#gh-light-mode-only"] { display: none; } + +.md-typeset .md-button--primary { + color: #303841; +} + +.md-nav--primary .md-nav__item--active > .md-nav__link { + color: #f25353; +} diff --git a/lanarky/adapters/langchain/utils.py b/lanarky/adapters/langchain/utils.py index 70c951b..1347373 100644 --- a/lanarky/adapters/langchain/utils.py +++ b/lanarky/adapters/langchain/utils.py @@ -16,7 +16,8 @@ TokenStreamingCallbackHandler, TokenWebSocketCallbackHandler, ) -from lanarky.adapters.langchain.responses import StreamingResponse +from lanarky.adapters.langchain.responses import HTTPStatusDetail, StreamingResponse +from lanarky.events import Events from lanarky.logging import logger from lanarky.websockets import WebSocket, WebsocketSession @@ -55,10 +56,26 @@ async def factory_endpoint(websocket: WebSocket, chain: Chain = Depends(endpoint callbacks = get_websocket_callbacks(chain, websocket) async with WebsocketSession().connect(websocket) as session: async for data in session: - await chain.acall( - inputs=request_model(**data).model_dump(), - callbacks=callbacks, - ) + try: + await chain.acall( + inputs=request_model(**data).model_dump(), + callbacks=callbacks, + ) + except Exception as e: + logger.error(f"langchain error: {e}") + await websocket.send_json( + dict( + data=dict( + status=500, + detail=HTTPStatusDetail( + code=500, + message="Internal Server Error", + ), + ), + event=Events.ERROR, + ) + ) + await websocket.send_json(dict(data="", event=Events.END)) return factory_endpoint diff --git a/lanarky/adapters/openai/resources.py b/lanarky/adapters/openai/resources.py index 3085f10..313eefb 100644 --- a/lanarky/adapters/openai/resources.py +++ b/lanarky/adapters/openai/resources.py @@ -11,6 +11,11 @@ class Message(BaseModel): content: str +class SystemMessage(BaseModel): + role: str = "system" + content: str + + class OpenAIResource: def __init__(self, client: AsyncOpenAI = None): self._client = client or AsyncOpenAI() @@ -27,17 +32,18 @@ def __init__( client: AsyncOpenAI = None, model: str = "gpt-3.5-turbo", stream: bool = False, + system: str = None, **create_kwargs, ): super().__init__(client=client) self.model = model self.stream = stream + self.system = SystemMessage(content=system) if system else None self.create_kwargs = create_kwargs - async def stream_response( - self, messages: list[Message] - ) -> Generator[str, None, None]: + async def stream_response(self, messages: list[dict]) -> Generator[str, None, None]: + messages = self._prepare_messages(messages) data = await self._client.chat.completions.create( messages=messages, model=self.model, @@ -56,9 +62,15 @@ async def stream_response( raise TypeError(f"Unexpected data type: {type(data)}") yield data.choices[0].message.content - async def __call__(self, messages: list[Message]) -> ChatCompletion: + async def __call__(self, messages: list[dict]) -> ChatCompletion: + messages = self._prepare_messages(messages) return await self._client.chat.completions.create( messages=messages, model=self.model, **self.create_kwargs, ) + + def _prepare_messages(self, messages: list[dict]) -> list[dict]: + if self.system is not None: + messages = [self.system.model_dump()] + messages + return messages diff --git a/lanarky/adapters/openai/responses.py b/lanarky/adapters/openai/responses.py index 4fada05..5ddd647 100644 --- a/lanarky/adapters/openai/responses.py +++ b/lanarky/adapters/openai/responses.py @@ -33,8 +33,7 @@ async def stream_response(self, send: Send) -> None: ) try: - data = await self.resource.stream_response(self.messages) - async for chunk in data: + async for chunk in self.resource.stream_response(self.messages): event_body = ServerSentEvent( data=chunk, event=Events.COMPLETION, diff --git a/lanarky/adapters/openai/utils.py b/lanarky/adapters/openai/utils.py index 3892b0f..162d34d 100644 --- a/lanarky/adapters/openai/utils.py +++ b/lanarky/adapters/openai/utils.py @@ -48,21 +48,24 @@ async def factory_endpoint( async for chunk in resource.stream_response( **request_model(**data).model_dump() ): - chunk_message = dict( - data=chunk, - event=Events.COMPLETION, + await websocket.send_json( + dict( + data=chunk, + event=Events.COMPLETION, + ) ) - await websocket.send_json(chunk_message) except Exception as e: logger.error(f"openai error: {e}") - error_message = dict( - data=dict( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=HTTPStatusDetail.INTERNAL_SERVER_ERROR, - ), - event=Events.ERROR, + await websocket.send_json( + dict( + data=dict( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=HTTPStatusDetail.INTERNAL_SERVER_ERROR, + ), + event=Events.ERROR, + ) ) - await websocket.send_json(error_message) + await websocket.send_json(dict(data="", event=Events.END)) return factory_endpoint diff --git a/lanarky/applications.py b/lanarky/applications.py index 28c9872..ac6d17e 100644 --- a/lanarky/applications.py +++ b/lanarky/applications.py @@ -1,6 +1,13 @@ from typing import Any from fastapi.applications import AppType, FastAPI +from fastapi.openapi.docs import ( + get_redoc_html, + get_swagger_ui_html, + get_swagger_ui_oauth2_redirect_html, +) +from fastapi.requests import Request +from fastapi.responses import HTMLResponse, JSONResponse class Lanarky(FastAPI): @@ -12,3 +19,59 @@ class Lanarky(FastAPI): def __init__(self: AppType, *, title: str = "Lanarky", **kwargs: Any) -> None: super().__init__(title=title, **kwargs) + + def setup(self) -> None: # pragma: no cover + if self.openapi_url: + urls = (server_data.get("url") for server_data in self.servers) + server_urls = {url for url in urls if url} + + async def openapi(req: Request) -> JSONResponse: + root_path = req.scope.get("root_path", "").rstrip("/") + if root_path not in server_urls: + if root_path and self.root_path_in_servers: + self.servers.insert(0, {"url": root_path}) + server_urls.add(root_path) + return JSONResponse(self.openapi()) + + self.add_route(self.openapi_url, openapi, include_in_schema=False) + if self.openapi_url and self.docs_url: + + async def swagger_ui_html(req: Request) -> HTMLResponse: + root_path = req.scope.get("root_path", "").rstrip("/") + openapi_url = root_path + self.openapi_url + oauth2_redirect_url = self.swagger_ui_oauth2_redirect_url + if oauth2_redirect_url: + oauth2_redirect_url = root_path + oauth2_redirect_url + return get_swagger_ui_html( + openapi_url=openapi_url, + title=self.title + " - Swagger UI", + oauth2_redirect_url=oauth2_redirect_url, + init_oauth=self.swagger_ui_init_oauth, + swagger_ui_parameters=self.swagger_ui_parameters, + swagger_favicon_url="https://lanarky.ajndkr.com/assets/icon.svg", + ) + + self.add_route(self.docs_url, swagger_ui_html, include_in_schema=False) + + if self.swagger_ui_oauth2_redirect_url: + + async def swagger_ui_redirect(req: Request) -> HTMLResponse: + return get_swagger_ui_oauth2_redirect_html() + + self.add_route( + self.swagger_ui_oauth2_redirect_url, + swagger_ui_redirect, + include_in_schema=False, + ) + if self.openapi_url and self.redoc_url: + + async def redoc_html(req: Request) -> HTMLResponse: + root_path = req.scope.get("root_path", "").rstrip("/") + openapi_url = root_path + self.openapi_url + return get_redoc_html( + openapi_url=openapi_url, + title=self.title + " - ReDoc", + redoc_favicon_url="https://lanarky.ajndkr.com/assets/icon.svg", + ) + + self.add_route(self.redoc_url, redoc_html, include_in_schema=False) diff --git a/lanarky/clients.py b/lanarky/clients.py new file mode 100644 index 0000000..e6b1d72 --- /dev/null +++ b/lanarky/clients.py @@ -0,0 +1,71 @@ +import json +from contextlib import contextmanager +from typing import Any, Generator, Optional + +import httpx +from httpx_sse import connect_sse +from websockets.sync.client import connect as websocket_connect + +from lanarky.websockets import DataMode + + +class StreamingClient: + def __init__( + self, + base_url: str = "http://localhost:8000", + client: Optional[httpx.Client] = None, + ): + self.base_url = base_url + self.client = client or httpx.Client() + + def stream_response(self, method: str, path: str, **kwargs) -> Generator: + url = self.base_url + path + with connect_sse(self.client, method, url, **kwargs) as event_source: + for sse in event_source.iter_sse(): + yield sse + + +class WebSocketClient: + def __init__( + self, uri: str = "ws://localhost:8000/ws", mode: DataMode = DataMode.JSON + ): + self.uri = uri + self.mode = mode + self.websocket = None + + @contextmanager + def connect(self): + with websocket_connect(self.uri) as websocket: + self.websocket = websocket + yield self + self.websocket = None + + def send(self, message: Any): + if self.websocket: + if self.mode == DataMode.JSON: + message = json.dumps(message) + elif self.mode == DataMode.TEXT: + message = str(message) + elif self.mode == DataMode.BYTES: + message = message.encode("utf-8") + self.websocket.send(message) + + def receive(self, mode: DataMode = None): + mode = mode or self.mode + if self.websocket: + response = self.websocket.recv() + if mode == DataMode.JSON: + response = json.loads(response) + elif mode == DataMode.TEXT: + response = str(response) + elif mode == DataMode.BYTES: + response = response.decode("utf-8") + return response + + def stream_response(self): + if self.websocket: + while True: + response = self.receive(mode=DataMode.JSON) + if response["event"] == "end": + break + yield response diff --git a/lanarky/events.py b/lanarky/events.py index 9b41b9a..315fbdb 100644 --- a/lanarky/events.py +++ b/lanarky/events.py @@ -7,3 +7,4 @@ class Events(str, Enum): COMPLETION = "completion" ERROR = "error" + END = "end" diff --git a/mkdocs.yml b/mkdocs.yml index 62d4c60..667e19c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,14 +24,18 @@ theme: features: - search.suggest - search.highlight - - content.tabs.link + - navigation.path + - navigation.sections + - navigation.tabs + - navigation.top + - navigation.footer - navigation.indexes + - navigation.tracking + - content.tabs.link - content.tooltips - - navigation.path - content.code.annotate - content.code.copy - content.code.select - - navigation.tabs icon: repo: fontawesome/brands/github-alt logo: assets/icon.svg @@ -44,6 +48,25 @@ theme: nav: - Lanarky: index.md - Getting Started: getting-started.md + - Learn: + - learn/index.md + - Streaming: learn/streaming.md + - WebSockets: learn/websockets.md + - Adapters: + - learn/adapters/index.md + - OpenAI: + - learn/adapters/openai/index.md + - Router: learn/adapters/openai/router.md + - Advanced: + - Dependency Injection: learn/adapters/openai/dependency.md + - FastAPI Backport: learn/adapters/openai/fastapi.md + - LangChain: + - learn/adapters/langchain/index.md + - Router: learn/adapters/langchain/router.md + - learn/adapters/langchain/callbacks.md + - Advanced: + - Dependency Injection: learn/adapters/langchain/dependency.md + - FastAPI Backport: learn/adapters/langchain/fastapi.md markdown_extensions: - attr_list diff --git a/poetry.lock b/poetry.lock index ea42a5b..0cfe9b8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -973,13 +973,13 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.4" +version = "3.6" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, ] [[package]] @@ -1440,13 +1440,13 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp [[package]] name = "mkdocs-material" -version = "9.4.12" +version = "9.4.14" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.4.12-py3-none-any.whl", hash = "sha256:1adf4e49a6ae59b333d82acedef176f381c27be808bba82074f88dfefc4a5b01"}, - {file = "mkdocs_material-9.4.12.tar.gz", hash = "sha256:378fae65dbd47e422cabb912f417b3a67db087d150a601dc7b94dbcc26981cbe"}, + {file = "mkdocs_material-9.4.14-py3-none-any.whl", hash = "sha256:dbc78a4fea97b74319a6aa9a2f0be575a6028be6958f813ba367188f7b8428f6"}, + {file = "mkdocs_material-9.4.14.tar.gz", hash = "sha256:a511d3ff48fa8718b033e7e37d17abd9cc1de0fdf0244a625ca2ae2387e2416d"}, ] [package.dependencies] @@ -1713,13 +1713,13 @@ files = [ [[package]] name = "pexpect" -version = "4.8.0" +version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." optional = false python-versions = "*" files = [ - {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, - {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, + {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, + {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"}, ] [package.dependencies] @@ -2015,17 +2015,17 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pymdown-extensions" -version = "10.4" +version = "10.5" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.4-py3-none-any.whl", hash = "sha256:cfc28d6a09d19448bcbf8eee3ce098c7d17ff99f7bd3069db4819af181212037"}, - {file = "pymdown_extensions-10.4.tar.gz", hash = "sha256:bc46f11749ecd4d6b71cf62396104b4a200bad3498cb0f5dad1b8502fe461a35"}, + {file = "pymdown_extensions-10.5-py3-none-any.whl", hash = "sha256:1f0ca8bb5beff091315f793ee17683bc1390731f6ac4c5eb01e27464b80fe879"}, + {file = "pymdown_extensions-10.5.tar.gz", hash = "sha256:1b60f1e462adbec5a1ed79dac91f666c9c0d241fa294de1989f29d20096cfd0b"}, ] [package.dependencies] -markdown = ">=3.2" +markdown = ">=3.5" pyyaml = "*" [package.extras] @@ -2574,13 +2574,13 @@ sqlcipher = ["sqlcipher3-binary"] [[package]] name = "sse-starlette" -version = "1.8.1" +version = "1.8.2" description = "SSE plugin for Starlette" optional = false python-versions = ">=3.8" files = [ - {file = "sse_starlette-1.8.1-py3-none-any.whl", hash = "sha256:c5ff4e7f0dadd8a221b5739dab0a064679d450386dfd8f7a7934d5494fbac39a"}, - {file = "sse_starlette-1.8.1.tar.gz", hash = "sha256:b1598e0bfcb1dbb9f7d0b51a88a4c9f0e30f9f90233a8267abbba48ba6fac39c"}, + {file = "sse_starlette-1.8.2-py3-none-any.whl", hash = "sha256:70cc7ef5aca4abe8a25dec1284cce4fe644dd7bf0c406d3e852e516092b7f849"}, + {file = "sse_starlette-1.8.2.tar.gz", hash = "sha256:e0f9b8dec41adc092a0a6e0694334bd3cfd3084c44c497a6ebc1fb4bdd919acd"}, ] [package.dependencies] @@ -3257,4 +3257,4 @@ openai = ["openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "66532764f3cdfc6a4f1ca46ff3c06e1d2ffdb9488625bfa1ab403dfdc8ddca7d" +content-hash = "b33d9c711d6c6ac0e4769b01877492a1ec48f0c8dbb4890a5e6b4a06c641d0d5" diff --git a/pyproject.toml b/pyproject.toml index e05b7bc..93c2db2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,8 @@ fastapi = ">=0.97.0" pydantic = ">=1,<3" sse-starlette = "^1.6.5" loguru = "^0.7.2" +httpx-sse = "^0.3.1" +websockets = "^12.0" openai = {version = "^1", optional = true} tiktoken = {version = "^0.4.0", optional = true} langchain = {version = "^0.0.337", optional = true} @@ -26,7 +28,6 @@ pyclean = "^2.7.5" uvicorn = {extras = ["standard"], version = "<1"} [tool.poetry.group.tests.dependencies] -httpx-sse = "^0.3.1" pytest = "^7.3.2" pytest-cov = "^4.1.0" pytest-asyncio = "^0.21.0" diff --git a/tests/adapters/openai/test_openai_responses.py b/tests/adapters/openai/test_openai_responses.py index b658812..b80bfac 100644 --- a/tests/adapters/openai/test_openai_responses.py +++ b/tests/adapters/openai/test_openai_responses.py @@ -14,9 +14,11 @@ @pytest.mark.asyncio async def test_stream_response_successful(send: Send): + async def async_generator(): + yield "" + resource = MagicMock(spec=ChatCompletionResource) - resource.stream_response = AsyncMock() - resource.stream_response.__aiter__ = AsyncMock(return_value="") + resource.stream_response.__aiter__ = MagicMock(return_value=async_generator()) response = StreamingResponse( resource=resource,