From 65513c601667fd532a920d7a0b63074eca7ccae5 Mon Sep 17 00:00:00 2001
From: Ryan Mullins <ryanmullins@google.com>
Date: Mon, 24 Jun 2024 14:23:06 +0000
Subject: [PATCH] LLM Comparator: Docs and licensing for OSS release.

PiperOrigin-RevId: 646085859
---
 python/README.md                              | 176 +++++++++++-
 python/notebooks/basic_demo.ipynb             | 264 ++++++++++++++++++
 .../run_scripts_for_llm_comparator.ipynb      | 157 -----------
 python/pyproject.toml                         |   5 +-
 python/src/llm_comparator/_colab.py           |  14 +
 python/src/llm_comparator/_logging.py         |  14 +
 python/src/llm_comparator/comparison.py       |  16 +-
 python/src/llm_comparator/llm_judge_runner.py |  14 +
 python/src/llm_comparator/model_helper.py     |  16 +-
 python/src/llm_comparator/prompt_templates.py |  14 +
 .../rationale_bullet_generator.py             |  14 +
 .../rationale_cluster_generator.py            |  30 +-
 python/src/llm_comparator/types.py            |  43 +--
 python/src/llm_comparator/utils.py            |  16 +-
 python/src/llm_comparator/utils_test.py       |  16 +-
 15 files changed, 623 insertions(+), 186 deletions(-)
 create mode 100644 python/notebooks/basic_demo.ipynb
 delete mode 100644 python/notebooks/run_scripts_for_llm_comparator.ipynb

diff --git a/python/README.md b/python/README.md
index c4565e6..6a595ba 100644
--- a/python/README.md
+++ b/python/README.md
@@ -1 +1,175 @@
-# LLM Comparator Python Scripts
+# LLM Comparator: Python Library
+
+The LLM Comparator Python Library provides a simple API for configuring and
+running **comparative evaluations** of models, and generating the JSON files
+required to analyze these models side-by-side in the
+[LLM Comparator app][llm-comparator-app].
+
+## Installation
+
+```sh
+# [OPTIONAL] We recommend using a Python virtual environemnt.
+python3 -m venv ~/.venv/llm_comparator
+source ~/.venv/llm_comparator/bin/activate
+```
+
+You can install this library from the PyPI registry. This workflow is
+recommended for most usage.
+
+```sh
+pip install llm_comparator
+```
+
+Or by cloning the repo from GitHub and installing from source. This workflow is
+recommended for contributors fixing bugs or adding new features to the library.
+
+```sh
+git clone https://github.com/PAIR-code/llm-comparator.git
+cd llm-comparator/python
+pip install -e .
+```
+
+## Core Concepts
+
+The primary entrypoint to the LLM Comparator Python Library is the
+`llm_comparator.comparison` module. This module provides a `run()` function that
+coordinates the three phases of comparative evaluation: judging, bulletizing,
+and clustering. The library provides modules for each of these phases, as well
+as wrappers for interacting with LLMs.
+
+### Model Helpers
+
+The `llm_comparator.model_helper` module is used to initialize API wrappers to
+interface with LLMs. Broadly, we support two kinds of models: generation models,
+which should be text-to-text language models, and embedding models.
+
+We provide concrete two concrete implementations of wrappers that interact with
+the [Google Vertex AI API][vertex-api]:
+
+*   `VertexGenerationModelHelper` supports any
+    [generative language model][model-garden-gen] available in the Model Garden.
+    By default we use `gemini-pro`, but alternatives can be configured with the
+    `model_name=` constructor parameter.
+*   `VertexEmbeddingModelHelper` supports any
+    [text embedding model][model-garden-emb] available in the Model Garden.
+    By default we use `textembedding-gecko@003`, but alternatives can be
+    configured with the `model_name=` constructor parameter.
+
+Additional wrapper classes can be implemented by subclassing
+`GenerationModelHelper` and `EmbeddingModelHelper` as necessary. We welcome
+contributions of these classes to the library via
+[Pull Requests][llm-comparator-prs].
+
+### Judges
+
+The "judge" is the model responsible for actually doing the comparison between
+the two model responses.
+
+This functionality is encapsulated in the `LLMJudgeRunner` from the
+`llm_comparator.llm_judge_runner` module. It requires a generator model that
+conforms to the `GenerationModelHelper` protocol in order to call an LLM to
+generate their judgements of which of the two model responses is better for
+every prompt.
+
+We provide a default judge prompt in
+`llm_comparator.llm_judge_runner.DEFAULT_LLM_JUDGE_PROMPT_TEMPLATE`, and you can
+use the `llm_judge_prompt_template=` parameter to provide a custom prompt that
+may better suit your needs at initialization time. Prompts should require the
+judge to phrase its responses in a simple XML format that includes the `verdict`
+and an `explanation`, to enable downstream processing by the buletizer and
+clusterer. Note that if you do provide a custom prompt, you can use the
+`rating_to_score_map=` parameter to ensure judgement `verdict`s can be mapped to
+numeric values.
+
+```xml
+<result>
+  <explanation>YOUR EXPLANATION GOES HERE.</explanation>
+  <verdict>A is slightly better</verdict>
+</result>
+```
+The same judge is run multiple times during each comparative analysis to get a
+diversity of ratings. This can be configured via the `num_repeats=` key of the
+optional `judge_opts=` dictionary passed to `llm_comparator.comparison.run()`.
+By default, we call the judge 6 times.
+
+### Bulletizers
+
+A "bulletizer" condenses the results provided by the judge into a set of bullets
+to make them easier to understand and consume in the UI.
+
+This functionality is encapsulated in the `RationaleBulletGenerator` from the
+`llm_comparator.rationale_bullet_generator` module. It requires a generator
+model that conforms to the `GenerationModelHelper` protocol in order to call an
+LLM to generate the bulleted summaries.
+
+The primary configuration is the `win_rate_threshold=` parameter, which can be
+configured per-comparative evaluation run via the `bulletizer_opts=` dictionary
+passed to `llm_comparator.comparison.run()`. By default, the threshold is set to
+`0.25` based on the default scoring range of [-1.5, 1.5].
+
+### Clusterers
+
+A "clusterer" takes the bullets, embeds them, groups them into clusters based on
+embedding similarity, and generates a label for those clusters.
+
+This functionality is encapsulated in the `RationaleClusterGenerator` from the
+`llm_comparator.rationale_cluster_generator` module. It requires a generator
+model that conforms to the `GenerationModelHelper` protocol and an embedder that
+conforms to the `EmbeddingModelHelper` protocol in order to call LLMs to
+generate the clustered summary of rationale bullets.
+
+The primary configuration is the `num_clusters=` parameter, which can be
+configured per-comparative evaluation run via the `clusterer_opts=` dictionary
+passed to `llm_comparator.comparison.run()`. By default, a clusterer creates 10
+clusters from the bullets.
+
+## Basic Usage
+
+We have prepared a Python notebook that you can
+[open in Google Colab][llm-comparator-colab] to follow along interactively. It
+uses Google's Vertex AI APIs to call models, and requires you to
+[authenticate][vertex-auth] in order to follow along.
+
+The following pseudo-script is the minimal distillation of the notebook linked
+above, without any AI platform-specific guidance.
+
+```python
+from llm_comparator import comparison
+from llm_comparator import model_helper
+from llm_comparator import llm_judge_runner
+from llm_comparator import rationale_bullet_generator
+from llm_comparator import rationale_cluster_generator
+
+inputs = [
+  # Provide your inputs here.
+  # They must conform to llm_comparator.types.LLMJudgeInput
+]
+
+# Initialize the models-calling classes.
+generator = # Initialize a model_helper.GenerationModelHelper() subclass
+embedder = # Initialize a model_helper.EmbeddingModelHelper() subclass
+
+# Initialize the instances that run work on the models.
+judge = llm_judge_runner.LLMJudgeRunner(generator)
+bulletizer = rationale_bullet_generator.RationaleBulletGenerator(generator)
+clusterer = rationale_cluster_generator.RationaleClusterGenerator(
+    generator, embedder
+)
+
+# Configure and run the comparative evaluation.
+comparison_result = comparison.run(inputs, judge, bulletizer, clusterer)
+
+# Write the results to a JSON file that can be loaded in
+# https://pair-code.github.io/llm-comparator
+file_path = "path/to/file.json"
+comparison.write(comparison_result, file_path)
+```
+
+<!-- LINKS -->
+[llm-comparator-app]: https://pair-code.github.io/llm-comparator
+[llm-comparator-colab]: https://colab.research.google.com/github/PAIR-code/llm-comparator/blob/main/python/notebooks/basic_demo.ipynb
+[llm-comparator-prs]: https://github.com/PAIR-code/llm-comparator/pulls
+[model-garden-emb]: https://console.cloud.google.com/vertex-ai/model-garden?pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%22supportedTasks%22,%22inputTypes%22%5D,%22o%22:%5B%22EMBEDDING%22,%22LANGUAGE%22%5D),%22s%22:%22%22))
+[model-garden-gen]: https://console.cloud.google.com/vertex-ai/model-garden?pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%22supportedTasks%22,%22inputTypes%22%5D,%22o%22:%5B%22GENERATION%22,%22LANGUAGE%22%5D),%22s%22:%22%22))
+[vertex-api]: https://cloud.google.com/vertex-ai/docs/reference
+[vertex-auth]: https://cloud.google.com/vertex-ai/docs/authentication
diff --git a/python/notebooks/basic_demo.ipynb b/python/notebooks/basic_demo.ipynb
new file mode 100644
index 0000000..a9060d2
--- /dev/null
+++ b/python/notebooks/basic_demo.ipynb
@@ -0,0 +1,264 @@
+{
+  "cells": [
+    {
+      "metadata": {
+        "id": "TMG3IgcdtzdI"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "# LLM Comparator: Running Comparative Evaluations with Google Vertex AI"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "Lu90H8Os4UFZ"
+      },
+      "cell_type": "code",
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0\n",
+        "# Copyright 2024 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kunEkAgFB9Yt"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install the LLM Comparator package\n",
+        "! pip install llm_comparator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QZlVpN83nJBv"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Import relevant packages\n",
+        "import vertexai\n",
+        "from google.colab import auth\n",
+        "\n",
+        "# The comparison library provides the primary API for running Comparative\n",
+        "# Evaluations and generating the JSON files required by the LLM Comparator web\n",
+        "# app.\n",
+        "from llm_comparator import comparison\n",
+        "\n",
+        "# The model_helper library is used to initialize API wrapper to interface with\n",
+        "# models. For this demo we focus on models served by Google Vertex AI, but you\n",
+        "# can extend the llm_comparator.model_helper.GenerationModelHelper and\n",
+        "# llm_comparator.model_helper.EmbeddingModelHelper classes to work with other\n",
+        "# providers or models you host yourself.\n",
+        "from llm_comparator import model_helper\n",
+        "\n",
+        "# The following libraries contain wrappers that implement the core functionality\n",
+        "# of the Comparative Evaluation workflow. More on these below.\n",
+        "from llm_comparator import llm_judge_runner\n",
+        "from llm_comparator import rationale_bullet_generator\n",
+        "from llm_comparator import rationale_cluster_generator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3ygUJPVxDMB7"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Setup and authenticate with Google Vertex AI.\n",
+        "PROJECT_ID = 'your_project_id'  #@param {type: \"string\"}\n",
+        "REGION = 'us-central1'  #@param {type: \"string\"}\n",
+        "\n",
+        "auth.authenticate_user()\n",
+        "! gcloud config set project {PROJECT_ID}\n",
+        "vertexai.init(project=PROJECT_ID, location=REGION)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JBGp4LiVCO00"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Prepare Your Inputs\n",
+        "\n",
+        "# See llm_comparator.llm_judge_runner.LLMJudgeInput for the required input type.\n",
+        "llm_judge_inputs = [\n",
+        "    {'prompt': 'how are you?', 'response_a': 'good', 'response_b': 'bad'},\n",
+        "    {'prompt': 'hello?', 'response_a': 'hello', 'response_b': 'hi'},\n",
+        "    {'prompt': 'what is the capital of korea?', 'response_a': 'Seoul', 'response_b': 'Vancouver'}\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Xy6GxkgJCD-M"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Initialize models used in the LLM Comparator evaluation.\n",
+        "\n",
+        "# The generator model can be any Text-to-Text LLM provided by Vertex AI. This\n",
+        "# model will be asked to do a series of tasks---judge, bulletize, and cluster---\n",
+        "# and it is often beneficial to use a larger model for this reason.\n",
+        "#\n",
+        "# We default to 'gemini-pro' but you can change this with the `model_name=`\n",
+        "# param. For a full list of models available via the Model Garden, check out\n",
+        "# https://console.cloud.google.com/vertex-ai/model-garden?pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%22supportedTasks%22,%22inputTypes%22%5D,%22o%22:%5B%22GENERATION%22,%22LANGUAGE%22%5D),%22s%22:%22%22)).\n",
+        "#\n",
+        "# Since we're using Gemini Pro, a very competent and flexible foundation model,\n",
+        "# we are sharing the same generator across all downstream tasks. However, you\n",
+        "# could use different models for each task if desired.\n",
+        "generator = model_helper.VertexGenerationModelHelper()\n",
+        "\n",
+        "# The embedding model can be any text embedder provided by Vertex AI. We default\n",
+        "# to 'textembedding-gecko@003' but you can change this with the `model_name=`\n",
+        "# param. For a full list of models available via the Model Garden, check out\n",
+        "# https://console.cloud.google.com/vertex-ai/model-garden?pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%22supportedTasks%22,%22inputTypes%22%5D,%22o%22:%5B%22EMBEDDING%22,%22LANGUAGE%22%5D),%22s%22:%22%22))\n",
+        "embedder = model_helper.VertexEmbeddingModelHelper()\n",
+        "\n",
+        "# The following models do the core work of a Comparative Evaluation: judge,\n",
+        "# bulletize, and cluster. Each class provides a `.run()` function, and the\n",
+        "# `llm_comparator.comparison.run()` API orchestrates configuring and calling\n",
+        "# these APIs on the instances you pass in. More on how to configure these below.\n",
+        "\n",
+        "# The `judge` is the model responsible for actually doing the comparison between\n",
+        "# the two models. The same judge is run multiple times to get a diversity of\n",
+        "# perspectives, more on how to configure this below.\n",
+        "#\n",
+        "# A judge must phrase its responses in a simple XML format that includes the\n",
+        "# verdict and an explanation of the results, to enable downstream processing by\n",
+        "# the bulletizer and clusterer.\n",
+        "#\n",
+        "#     \u003cresult\u003e\n",
+        "#       \u003cexplanation\u003eYOUR EXPLANATION GOES HERE.\u003c/explanation\u003e\n",
+        "#       \u003cverdict\u003eA is slightly better\u003c/verdict\u003e\n",
+        "#     \u003c/result\u003e\n",
+        "#\n",
+        "# We provide a default \"judge\" prompt in\n",
+        "# llm_comparator.llm_judge_runner.DEFAULT_LLM_JUDGE_PROMPT_TEMPLATE, and you can\n",
+        "# use the `llm_judge_prompt_template=` parameter to provide a custom prompt that\n",
+        "# may better suit your needs.\n",
+        "judge = llm_judge_runner.LLMJudgeRunner(generator)\n",
+        "\n",
+        "# The `bulletizer` condenses the results provided by the judge into a set of\n",
+        "# bullets to make them easier to understand and consume in the UI.\n",
+        "bulletizer = rationale_bullet_generator.RationaleBulletGenerator(generator)\n",
+        "\n",
+        "# The `clusterer` takes the bullets, embeds them, groups them into clusters\n",
+        "# based on embedding similarity, and generates a label for those clusters.\n",
+        "clusterer = rationale_cluster_generator.RationaleClusterGenerator(\n",
+        "    generator, embedder\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QUU3V63vVbvS"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Run the Comparative Evaluation.\n",
+        "\n",
+        "# The comparison.run() function is the primary interface for running a\n",
+        "# Comparative Evaluation. It take your prepared inputs, a judge, a buletizer,\n",
+        "# and a clusterer and returns a Python dictioary in the required format for use\n",
+        "# in the LLM Comparator web app. You can inspect this dictionary in Python if\n",
+        "# you like, but it's more useful once written to a file.\n",
+        "#\n",
+        "# The example below is basic, but you can use the judge_opts=, bulletizer_opts=,\n",
+        "# and/or clusterer_opts= parameters (all of which are optional dictionaries that\n",
+        "# are converted to keyword options) to further customize the behaviors. See the\n",
+        "# Docsrtrings for more.\n",
+        "comparison_result = comparison.run(\n",
+        "    llm_judge_inputs,\n",
+        "    judge,\n",
+        "    bulletizer,\n",
+        "    clusterer,\n",
+        ")"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "hViMkxUGhnTA"
+      },
+      "cell_type": "code",
+      "source": [
+        "#@title [Optional] Save the results to a file.\n",
+        "file_path = 'json_for_llm_comparator.json' # @param {type: \"string\"}\n",
+        "comparison.write(comparison_result, file_path)"
+      ],
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {
+        "id": "7W4B1uzqvJZ-"
+      },
+      "cell_type": "code",
+      "source": [
+        "#@title [Optional] View the results in the app in Colab.\n",
+        "comparison.show_in_colab(file_path)"
+      ],
+      "outputs": [],
+      "execution_count": null
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "last_runtime": {
+        "build_target": "",
+        "kind": "local"
+      },
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:comparator12-2::citc",
+          "timestamp": 1718335330333
+        },
+        {
+          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:fig-export-comparator12-2-3818-change-2::citc",
+          "timestamp": 1718334507750
+        },
+        {
+          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:fig-export-comparator12-2-3818-change-2::citc",
+          "timestamp": 1718215678229
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/python/notebooks/run_scripts_for_llm_comparator.ipynb b/python/notebooks/run_scripts_for_llm_comparator.ipynb
deleted file mode 100644
index 6f53b90..0000000
--- a/python/notebooks/run_scripts_for_llm_comparator.ipynb
+++ /dev/null
@@ -1,157 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "kunEkAgFB9Yt"
-      },
-      "outputs": [],
-      "source": [
-        "! pip install /content/llm_comparator-0.1-py3-none-any.whl"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "QZlVpN83nJBv"
-      },
-      "outputs": [],
-      "source": [
-        "import vertexai\n",
-        "from google.colab import auth\n",
-        "\n",
-        "from llm_comparator import comparison\n",
-        "from llm_comparator import model_helper\n",
-        "from llm_comparator import llm_judge_runner\n",
-        "from llm_comparator import rationale_bullet_generator\n",
-        "from llm_comparator import rationale_cluster_generator"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "3ygUJPVxDMB7"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Setup for using Vertex AI.\n",
-        "auth.authenticate_user()\n",
-        "\n",
-        "PROJECT_ID = 'pair-experimental'  #@param {type: \"string\"}\n",
-        "REGION = 'us-central1'  #@param {type: \"string\"}\n",
-        "\n",
-        "! gcloud config set project {PROJECT_ID}\n",
-        "\n",
-        "vertexai.init(project=PROJECT_ID, location=REGION)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JBGp4LiVCO00"
-      },
-      "outputs": [],
-      "source": [
-        "llm_judge_inputs = [\n",
-        "    {'prompt': 'how are you?', 'response_a': 'good', 'response_b': 'bad'},\n",
-        "    {'prompt': 'hello?', 'response_a': 'hello', 'response_b': 'hi'},\n",
-        "    {'prompt': 'what is the capital of korea?', 'response_a': 'Seoul', 'response_b': 'Vancouver'}\n",
-        "]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Xy6GxkgJCD-M"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Initialize models used in the LLM Comparator evaluation.\n",
-        "generator = model_helper.VertexGenerationModelHelper()\n",
-        "embedder = model_helper.VertexEmbeddingModelHelper()\n",
-        "judge = llm_judge_runner.LLMJudgeRunner(generator)\n",
-        "bulletizer = rationale_bullet_generator.RationaleBulletGenerator(generator)\n",
-        "clusterer = rationale_cluster_generator.RationaleClusterGenerator(\n",
-        "    generator, embedder\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "QUU3V63vVbvS"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Run the LLM Comparator evauation.\n",
-        "comparison_result = comparison.run(\n",
-        "    llm_judge_inputs,\n",
-        "    judge,\n",
-        "    bulletizer,\n",
-        "    clusterer,\n",
-        ")"
-      ]
-    },
-    {
-      "metadata": {
-        "id": "hViMkxUGhnTA"
-      },
-      "cell_type": "code",
-      "source": [
-        "file_path = 'json_for_llm_comparator.json' # @param {type: \"string\"}\n",
-        "comparison.write(comparison_result, file_path)"
-      ],
-      "outputs": [],
-      "execution_count": null
-    },
-    {
-      "metadata": {
-        "id": "7W4B1uzqvJZ-"
-      },
-      "cell_type": "code",
-      "source": [
-        "comparison.show_in_colab(file_path)"
-      ],
-      "outputs": [],
-      "execution_count": null
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "last_runtime": {
-        "build_target": "",
-        "kind": "local"
-      },
-      "private_outputs": true,
-      "provenance": [
-        {
-          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:comparator12-2::citc",
-          "timestamp": 1718335330333
-        },
-        {
-          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:fig-export-comparator12-2-3818-change-2::citc",
-          "timestamp": 1718334507750
-        },
-        {
-          "file_id": "/piper/depot/google3/third_party/javascript/llm_comparator/python/notebooks/run_scripts_for_llm_comparator.ipynb?workspaceId=kahng:fig-export-comparator12-2-3818-change-2::citc",
-          "timestamp": 1718215678229
-        }
-      ]
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 7d2894b..f798ffc 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -13,7 +13,10 @@ readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
 dependencies = [
-  "tqdm"
+  "absl-py",
+  "numpy",
+  "tqdm",
+  "vertexai",
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
diff --git a/python/src/llm_comparator/_colab.py b/python/src/llm_comparator/_colab.py
index d203521..381542f 100644
--- a/python/src/llm_comparator/_colab.py
+++ b/python/src/llm_comparator/_colab.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Provides a constant that incidates whether we are running in Google Colab."""
 
 try:
diff --git a/python/src/llm_comparator/_logging.py b/python/src/llm_comparator/_logging.py
index 71426e7..25e8d6f 100644
--- a/python/src/llm_comparator/_logging.py
+++ b/python/src/llm_comparator/_logging.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Logging utilities."""
 
 import logging
diff --git a/python/src/llm_comparator/comparison.py b/python/src/llm_comparator/comparison.py
index 392e3d8..6b05ed2 100644
--- a/python/src/llm_comparator/comparison.py
+++ b/python/src/llm_comparator/comparison.py
@@ -1,4 +1,18 @@
-"""Primary entry point for running evaluations with LLM Comparator."""
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Entrypoint for running comparative evaluations with LLM Comparator."""
 
 from collections.abc import Sequence
 import json
diff --git a/python/src/llm_comparator/llm_judge_runner.py b/python/src/llm_comparator/llm_judge_runner.py
index 3333db6..463901b 100644
--- a/python/src/llm_comparator/llm_judge_runner.py
+++ b/python/src/llm_comparator/llm_judge_runner.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Runner for LLM Judge."""
 
 from collections.abc import Sequence
diff --git a/python/src/llm_comparator/model_helper.py b/python/src/llm_comparator/model_helper.py
index b928615..36b6089 100644
--- a/python/src/llm_comparator/model_helper.py
+++ b/python/src/llm_comparator/model_helper.py
@@ -1,4 +1,18 @@
-"""Helper classes for calling LLMs."""
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Classes for calling generating LLMs and embedding models."""
 
 import abc
 from collections.abc import Iterable, Sequence
diff --git a/python/src/llm_comparator/prompt_templates.py b/python/src/llm_comparator/prompt_templates.py
index 9570ace..4120ee0 100644
--- a/python/src/llm_comparator/prompt_templates.py
+++ b/python/src/llm_comparator/prompt_templates.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Prompt templates for the rationale summary generation script."""
 
 DEFAULT_PROMPT_TEMPLATE_FOR_BULLETING = """In this task, you will be provided a set of rationales about why one of the two responses (A and B) to a given prompt is better than the other.
diff --git a/python/src/llm_comparator/rationale_bullet_generator.py b/python/src/llm_comparator/rationale_bullet_generator.py
index 90d4d21..6d05e03 100644
--- a/python/src/llm_comparator/rationale_bullet_generator.py
+++ b/python/src/llm_comparator/rationale_bullet_generator.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Runner for generating rationale bullets for LLM Comparator."""
 
 from collections.abc import Sequence
diff --git a/python/src/llm_comparator/rationale_cluster_generator.py b/python/src/llm_comparator/rationale_cluster_generator.py
index 1e7a7f8..375f302 100644
--- a/python/src/llm_comparator/rationale_cluster_generator.py
+++ b/python/src/llm_comparator/rationale_cluster_generator.py
@@ -1,7 +1,21 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Runner for creating rationale clusters for LLM Comparator."""
 
+from collections.abc import Mapping, Sequence
 import random
-from typing import Mapping, Sequence
 
 import numpy as np
 import tqdm.auto
@@ -23,7 +37,7 @@
 
 
 class RationaleClusterGenerator:
-  """Runner for rationale cluster generation."""
+  """Runner for generating labeled clusters using embedding similarity."""
 
   def __init__(
       self,
@@ -216,15 +230,17 @@ def run(
       Sequence[_RationaleCluster],
       Sequence[Sequence[_RationaleBulletWithClusterSimilarity]],
   ]:
-    """Generating rationale clusters.
+    """Generate clusters of similar rationale bullets.
 
     Args:
-      rationale_bullets_for_examples: Rationale bullets.
-      num_clusters: Number of clusters.
+      rationale_bullets_for_examples: The rationale bullets from a
+        llm_comparator.rationale_bullet_generator.RationaleBulletGenerator that
+        will be clustered.
+      num_clusters: The number of clusters to generate.
 
     Returns:
-      List of clusters and rationale_bullets augmented with similarities to
-      clusters for dynamic assignments from the client.
+      A tuple of index-aligned lists of clusters and rationale_bullets augmented
+      with similarities to clusters for dynamic assignments from the client.
     """
     # Flatten rationales.
     flattened_rationales = self._flatten_rationales(
diff --git a/python/src/llm_comparator/types.py b/python/src/llm_comparator/types.py
index 82333c1..ccff075 100644
--- a/python/src/llm_comparator/types.py
+++ b/python/src/llm_comparator/types.py
@@ -1,4 +1,18 @@
-"""Type classes for LLM Comparator."""
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Types for LLM Comparator."""
 
 from collections.abc import Mapping
 from typing import Any, Optional, TypedDict
@@ -7,11 +21,11 @@
 
 
 class IndividualRating(TypedDict):
-  score: float | None
-  rating_label: str | None
-  is_flipped: bool | None
-  rationale: str | None
-  custom_fields: JsonDict = {}
+  score: Optional[float]
+  rating_label: Optional[str]
+  is_flipped: Optional[bool]
+  rationale: Optional[str]
+  custom_fields: JsonDict
 
 
 class LLMJudgeInput(TypedDict):
@@ -22,29 +36,26 @@ class LLMJudgeInput(TypedDict):
 
 class LLMJudgeOutput(TypedDict):
   score: float
-  individual_rater_scores: list[IndividualRating] = []
+  individual_rater_scores: list[IndividualRating]
 
 
 class RationaleBullet(TypedDict):
   rationale: str
 
 
-class RationaleBulletWithClusterSimilarity(TypedDict):
-  rationale: str
+class RationaleBulletWithClusterSimilarity(RationaleBullet):
   similarities: list[float]
 
 
 class Example(TypedDict):
   input_text: str
-  tags: list[str] = []
+  tags: list[str]
   output_text_a: str
   output_text_b: str
-  score: Optional[float] = None
-  individual_rater_scores: list[IndividualRating] = []
-  rationale_list: list[
-      RationaleBullet | RationaleBulletWithClusterSimilarity
-  ] = []
-  custom_fields: JsonDict = {}
+  score: Optional[float]
+  individual_rater_scores: list[IndividualRating]
+  rationale_list: list[RationaleBullet]
+  custom_fields: JsonDict
 
 
 class RationaleCluster(TypedDict):
diff --git a/python/src/llm_comparator/utils.py b/python/src/llm_comparator/utils.py
index 8696462..526602f 100644
--- a/python/src/llm_comparator/utils.py
+++ b/python/src/llm_comparator/utils.py
@@ -1,3 +1,17 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Utils for LLM Comparator scripts."""
 
 from collections.abc import Sequence
@@ -13,7 +27,7 @@
 
 
 def extract_xml_part(raw_output: str, tag_name: str) -> Optional[ET.Element]:
-  """Find parts where <result> is in the XML-formatted output."""
+  """Find parts where <{tag_name}> is in the XML-formatted output."""
   xml_output = re.search(
       rf'<{tag_name}>(.*?)</{tag_name}>', raw_output, flags=re.DOTALL
   )
diff --git a/python/src/llm_comparator/utils_test.py b/python/src/llm_comparator/utils_test.py
index 108c24c..9606fef 100644
--- a/python/src/llm_comparator/utils_test.py
+++ b/python/src/llm_comparator/utils_test.py
@@ -1,4 +1,18 @@
-"""Tests for utils for LLM Comparator scripts."""
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for llm_comparator.utils."""
 
 import unittest
 import xml.etree.ElementTree as ET