diff --git a/tutorials/evals/local_llm.ipynb b/tutorials/evals/local_llm.ipynb new file mode 100644 index 0000000000..c01a735754 --- /dev/null +++ b/tutorials/evals/local_llm.ipynb @@ -0,0 +1,410 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \"phoenix\n", + "
\n", + " Docs\n", + " |\n", + " GitHub\n", + " |\n", + " Community\n", + "

\n", + "
\n", + "

Using a Local LLM

\n", + "\n", + "Below is an example of using a local LLM to perform evals. In this example we will be using [ollama](https://ollama.com/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qq \"arize-phoenix-evals>=0.0.5\" \"litellm\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from phoenix.evals import LiteLLMModel\n", + "\n", + "os.environ[\"OLLAMA_API_BASE\"] = \"http://localhost:11434\"\n", + "\n", + "model = LiteLLMModel(model=\"ollama/llama3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"Hello there! It's great to meet you. I'm your friendly AI assistant, here to help with any questions or topics you'd like to discuss. What brings you to this corner of the internet today?\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model(\"Hello, world!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
query_idquery_textdocument_titledocument_textdocument_text_with_emphasisrelevant
0Q1how are glacier caves formed?Glacier caveA partly submerged glacier cave on Perito More...A partly submerged glacier cave on Perito More...True
1Q10how an outdoor wood boiler worksOutdoor wood-fired boilerThe outdoor wood boiler is a variant of the cl...The outdoor wood boiler is a variant of the cl...False
2Q100what happens to the light independent reactio...Light-independent reactionsThe simplified internal structure of a chlorop...The simplified internal structure of a chlorop...True
3Q1000where in the bible that palestine have no land...PhilistinesThe Philistine cities of Gaza, Ashdod, Ashkelo...The Philistine cities of Gaza, Ashdod, Ashkelo...False
4Q1001what are the test scores on asvabArmed Services Vocational Aptitude BatteryThe Armed Services Vocational Aptitude Battery...The Armed Services Vocational Aptitude Battery...False
\n", + "
" + ], + "text/plain": [ + " query_id ... relevant\n", + "0 Q1 ... True\n", + "1 Q10 ... False\n", + "2 Q100 ... True\n", + "3 Q1000 ... False\n", + "4 Q1001 ... False\n", + "\n", + "[5 rows x 6 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from phoenix.evals import download_benchmark_dataset\n", + "\n", + "df = download_benchmark_dataset(\n", + " task=\"binary-relevance-classification\", dataset_name=\"wiki_qa-train\"\n", + ")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f986a0040c5a41138ad525c91964ee61", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "llm_classify | | 0/100 (0.0%) | ⏳ 00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
label
87relevant
1632relevant
2069relevant
1974relevant
39relevant
1379relevant
1947relevant
1862relevant
147relevant
973relevant
1658unrelated
1901relevant
1103relevant
2006unrelated
1308relevant
1846relevant
290relevant
797relevant
1599relevant
1755relevant
\n", + "" + ], + "text/plain": [ + " label\n", + "87 relevant\n", + "1632 relevant\n", + "2069 relevant\n", + "1974 relevant\n", + "39 relevant\n", + "1379 relevant\n", + "1947 relevant\n", + "1862 relevant\n", + "147 relevant\n", + "973 relevant\n", + "1658 unrelated\n", + "1901 relevant\n", + "1103 relevant\n", + "2006 unrelated\n", + "1308 relevant\n", + "1846 relevant\n", + "290 relevant\n", + "797 relevant\n", + "1599 relevant\n", + "1755 relevant" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "relevance_df.head(20)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "phoenix", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}