From fd14d3a97c54f5de1c77c9bf64b019a8393ef7e5 Mon Sep 17 00:00:00 2001
From: Giom-V <guillaume.vernade__github@normalesup.org>
Date: Thu, 12 Sep 2024 17:27:01 +0200
Subject: [PATCH] Code analysis using Gemini, LangChain and DeepLake example

---
 ..._using_Gemini_LangChain_and_DeepLake.ipynb | 593 ++++++++++++++++++
 1 file changed, 593 insertions(+)
 create mode 100644 examples/langchain/Code_analysis_using_Gemini_LangChain_and_DeepLake.ipynb
diff --git a/examples/langchain/Code_analysis_using_Gemini_LangChain_and_DeepLake.ipynb b/examples/langchain/Code_analysis_using_Gemini_LangChain_and_DeepLake.ipynb
new file mode 100644
index 00000000..d3bc70ae
--- /dev/null
+++ b/examples/langchain/Code_analysis_using_Gemini_LangChain_and_DeepLake.ipynb
@@ -0,0 +1,593 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KLHiTPXNTf2a"
+      },
+      "source": [
+        "##### Copyright 2024 Google LLC."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "oTuT5CsaTigz"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZNM-D0pLXZeR"
+      },
+      "source": [
+        "# Gemini API: Code analysis using LangChain and DeepLake"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PRZo8H09Bs6u"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Code analysis using Gemini, LangChain and DeepLake.ipynb\"><img src = \"https://www.tensorflow.org/images/colab_logo_32px.png\"/>Run in Google Colab</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mOGNjAZMwMIk"
+      },
+      "source": [
+        "This notebook shows how to use Gemini API with [Langchain](https://python.langchain.com/v0.2/docs/introduction/) and [DeepLake](https://www.deeplake.ai/) for code analysis. The notebook will teach you:\n",
+        "- loading and splitting files\n",
+        "- creating a Deeplake database with embedding information\n",
+        "- setting up a retrieval QA chain"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jlzRUaWguYiE"
+      },
+      "source": [
+        "### Load dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6BiMHjZuRkQM"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/615.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m317.4/615.9 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m615.9/615.9 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m78.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m60.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.9/395.9 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.2/139.2 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.6/150.6 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m76.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m47.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.1/82.1 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m80.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m845.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.8/56.8 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Building wheel for deeplake (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install -q -U langchain-google-genai deeplake langchain langchain-text-splitters langchain-community"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FAsv4ybKOiUK"
+      },
+      "outputs": [],
+      "source": [
+        "from glob import glob\n",
+        "from IPython.display import Markdown, display\n",
+        "\n",
+        "from langchain.vectorstores import DeepLake\n",
+        "from langchain.document_loaders import TextLoader\n",
+        "from langchain_text_splitters import (\n",
+        "    Language,\n",
+        "    RecursiveCharacterTextSplitter,\n",
+        ")\n",
+        "from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings\n",
+        "from langchain.chains import RetrievalQA"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FQOGMejVu-6D"
+      },
+      "source": [
+        "### Configure your API key\n",
+        "\n",
+        "To run the following cell, your API key must be stored in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) for an example.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ysayz8skEfBW"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from google.colab import userdata\n",
+        "GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')\n",
+        "\n",
+        "os.environ[\"GOOGLE_API_KEY\"] = GOOGLE_API_KEY"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vUwX1PxWg31O"
+      },
+      "source": [
+        "## Prepare the files"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-ye873pizjeR"
+      },
+      "source": [
+        "First, download a [langchain-google](https://github.com/langchain-ai/langchain-google) repository. It is the repository you will analyze in this example.\n",
+        "\n",
+        "It contains code integrating Gemini API, VertexAI, and other Google products with langchain."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xa5Om2YJZMs1"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Cloning into 'langchain-google'...\n",
+            "remote: Enumerating objects: 3322, done.\u001b[K\n",
+            "remote: Counting objects: 100% (1028/1028), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (353/353), done.\u001b[K\n",
+            "remote: Total 3322 (delta 849), reused 736 (delta 675), pack-reused 2294 (from 1)\u001b[K\n",
+            "Receiving objects: 100% (3322/3322), 1.78 MiB | 8.74 MiB/s, done.\n",
+            "Resolving deltas: 100% (2266/2266), done.\n"
+          ]
+        }
+      ],
+      "source": [
+        "!git clone https://github.com/langchain-ai/langchain-google"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w4M2xVb9zlbp"
+      },
+      "source": [
+        "This example will focus only on the integration of Gemini API with langchain and ignore the rest of the codebase."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nKXFtWDSz87m"
+      },
+      "outputs": [],
+      "source": [
+        "repo_match = \"langchain-google/libs/genai/langchain_google_genai**/*.py\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w8ecqCn8z8l5"
+      },
+      "source": [
+        "Each file with a matching path will be loaded and split by `RecursiveCharacterTextSplitter`.\n",
+        "In this example, it is specified, that the files are written in Python. It helps split the files without having documents that lack context."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PYU4TJmXZrHF"
+      },
+      "outputs": [],
+      "source": [
+        "docs = []\n",
+        "for file in glob(repo_match, recursive=True):\n",
+        "  loader = TextLoader(file, encoding='utf-8')\n",
+        "  splitter = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON, chunk_size=2000, chunk_overlap=0)\n",
+        "  docs.extend(loader.load_and_split(splitter))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_nBTLdVD34Mg"
+      },
+      "source": [
+        "`Language` Enum provides common separators used in most popular programming languages, it lowers the chances of classes or functions being split in the middle."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qtp7Vg0835LR"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "['\\nclass ', '\\ndef ', '\\n\\tdef ', '\\n\\n', '\\n', ' ', '']"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# common seperators used for Python files\n",
+        "RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "80xuFyotsaZs"
+      },
+      "source": [
+        "## Create the database\n",
+        "The data will be loaded into the memory since the database doesn't need to be permanent in this case and is small enough to fit.\n",
+        "\n",
+        "The type of storage used is specified by prefix in the path, in this case by `mem://`.\n",
+        "\n",
+        "Check out other types of storage [here](https://docs.activeloop.ai/setup/storage-and-creds/storage-options)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VKBZTFWUuYLA"
+      },
+      "outputs": [],
+      "source": [
+        "# define path to database\n",
+        "dataset_path = 'mem://deeplake/langchain_google'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ak3qzDgRuY4S"
+      },
+      "outputs": [],
+      "source": [
+        "# define the embedding model\n",
+        "embeddings = GoogleGenerativeAIEmbeddings(model=\"models/text-embedding-004\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "27Cr4TkMualL"
+      },
+      "source": [
+        "Everything needed is ready, and now you can create the database. It should not take longer than a few seconds."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1XCQBbW0g7M0"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Creating 97 embeddings in 1 batches of size 97:: 100%|██████████| 1/1 [00:02<00:00,  2.41s/it]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Dataset(path='mem://deeplake/langchain_google', tensors=['text', 'metadata', 'embedding', 'id'])\n",
+            "\n",
+            "  tensor      htype      shape     dtype  compression\n",
+            "  -------    -------    -------   -------  ------- \n",
+            "   text       text      (97, 1)     str     None   \n",
+            " metadata     json      (97, 1)     str     None   \n",
+            " embedding  embedding  (97, 768)  float32   None   \n",
+            "    id        text      (97, 1)     str     None   \n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "db = DeepLake.from_documents(docs, embeddings, dataset_path=dataset_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R594TesMl6Pl"
+      },
+      "source": [
+        "## Question Answering"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F4Ih7Hveuzkj"
+      },
+      "source": [
+        "Set-up the document retriever."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DtEUchpAmBFQ"
+      },
+      "outputs": [],
+      "source": [
+        "retriever = db.as_retriever()\n",
+        "retriever.search_kwargs['distance_metric'] = 'cos'\n",
+        "retriever.search_kwargs['k'] = 20 # number of documents to return"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rvF5W6OvvAXB"
+      },
+      "outputs": [],
+      "source": [
+        "# define the chat model\n",
+        "llm = ChatGoogleGenerativeAI(model = \"gemini-1.5-flash-latest\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fqM2_zAvwpgJ"
+      },
+      "source": [
+        "Now, you can create a chain for Question Answering. In this case, `RetrievalQA` chain will be used.\n",
+        "\n",
+        "If you want to use the chat option instead, use `ConversationalRetrievalChain`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "G__78Wv1nsNd"
+      },
+      "outputs": [],
+      "source": [
+        "qa = RetrievalQA.from_llm(llm, retriever=retriever)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3SRcc8LGx6ki"
+      },
+      "source": [
+        "The chain is ready to answer your questions.\n",
+        "\n",
+        "NOTE: `Markdown` is used for improved formatting of the output."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VE30oIsHFZJs"
+      },
+      "outputs": [],
+      "source": [
+        "# a helper function for calling retrival chain\n",
+        "def call_qa_chain(prompt):\n",
+        "  response = qa.invoke(prompt)\n",
+        "  display(Markdown(response[\"result\"]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "g4ttIFvmn392"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": "```\n_BaseGoogleGenerativeAI\n    - GoogleGenerativeAI\n    - ChatGoogleGenerativeAI\n```",
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "call_qa_chain(\"Show hierarchy for _BaseGoogleGenerativeAI. Do not show content of classes.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VL8FXqQKgRB9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": "The return type of embedding models is a list of lists of floats. \n\nHere's a breakdown:\n\n* **List[List[float]]**: This means the model returns a list of embedding vectors, where each vector is represented as a list of floats.\n\n* **Each embedding vector**:  Represents a single piece of text (a document or a query) as a numerical representation. \n\n* **Floats**: Each element in the embedding vector is a floating-point number, capturing the semantic meaning of the text in a multi-dimensional space. \n",
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "call_qa_chain(\"What is the return type of embedding models.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3VEO6-TwE0MN"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": "The following classes are related to Attributed Question and Answering (AQA) in the provided context:\n\n* **`GenAIAqa`**: This is the main class representing Google's AQA service. It takes a user's query and a list of passages as input and returns a grounded response, meaning the response is backed by the provided passages.\n* **`AqaInput`**: This class defines the input structure for the `GenAIAqa` class. It contains the user's `prompt` and a list of `source_passages` to be used by the AQA model.\n* **`AqaOutput`**: This class defines the output structure for the `GenAIAqa` class. It contains the `answer` to the user's query, the `attributed_passages` used to generate the answer, and the `answerable_probability`, which indicates the likelihood that the question can be answered from the provided passages.\n* **`_AqaModel`**: This is an internal wrapper class for Google's AQA model. It handles the communication with the Generative AI API and manages parameters like answer style, safety settings, and temperature.\n* **`GoogleVectorStore`**: This class provides a way to store and search documents in Google's vector database. It can be used to retrieve relevant passages for AQA, either from an entire corpus or a specific document.\n* **`Passage`**: This class represents a single passage of text. It includes the `text` itself and an optional `id`.\n* **`GroundedAnswer`**: This dataclass represents a grounded answer, containing the `answer`, the `attributed_passages`, and the `answerable_probability`.\n\nThese classes work together to provide a comprehensive AQA solution, allowing users to ask questions and get answers that are grounded in relevant text.\n",
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "call_qa_chain(\"What classes are related to Attributed Question and Answering.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mNtpXjYKx9Ye"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/markdown": "The `GenAIAqa` class depends on the following:\n\n* **`google.ai.generativelanguage`:** This is the Google Generative AI Python package, which provides the underlying API for interacting with Google's Generative AI services.\n* **`langchain_core`:** This is the core LangChain library, which provides the framework for building and using language models and other components.\n* **`_genai_extension`:** This is an internal module within the `langchain-google-genai` package that provides utility functions for interacting with the Google Generative AI API.\n\nIn addition to these direct dependencies, the `GenAIAqa` class also indirectly depends on other libraries such as `typing`, `langchain_core.pydantic_v1`, and `langchain_core.runnables`. \n",
+            "text/plain": [
+              "<IPython.core.display.Markdown object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "call_qa_chain(\"What are the dependencies of the GenAIAqa class?\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wBtm3YM-7vxD"
+      },
+      "source": [
+        "## Summary\n",
+        "\n",
+        "Gemini API works great with Langchain. The integration is seamless and provides an easy interface for:\n",
+        "- loading and splitting files\n",
+        "- creating DeepLake database with embeddings\n",
+        "- answering questions based on context from files"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iwxEpyvx1jbU"
+      },
+      "source": [
+        "## What's next?\n",
+        "\n",
+        "This notebook showed only one possible use case for langchain with Gemini API. You can find many more [here](https://github.com/google-gemini/cookbook/blob/main/examples/langchain)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "Code_analysis_using_Gemini,_LangChain_and_DeepLake.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}