diff --git a/v2.4.x/Variables.json b/v2.4.x/Variables.json index ddaf855f7..15b522efb 100644 --- a/v2.4.x/Variables.json +++ b/v2.4.x/Variables.json @@ -1,16 +1,16 @@ { - "milvus_release_version": "2.4.19", - "milvus_release_tag": "2.4.19", + "milvus_release_version": "2.4.20", + "milvus_release_tag": "2.4.20", "milvus_deb_name": "milvus_2.2.0-1_amd64", "milvus_rpm_name": "milvus-2.2.0-1.el7.x86_64", "milvus_python_sdk_version": "2.4.x", "milvus_python_sdk_real_version": "2.4.13", "milvus_node_sdk_version": "2.4.x", - "milvus_node_sdk_real_version": "v2.4.9", + "milvus_node_sdk_real_version": "v2.4.10", "milvus_go_sdk_version": "2.3.x", "milvus_go_sdk_real_version": "2.4.0", "milvus_java_sdk_version": "2.4.x", - "milvus_java_sdk_real_version": "2.4.8", + "milvus_java_sdk_real_version": "2.4.10", "milvus_csharp_sdk_version": "2.2.x", "milvus_csharp_sdk_real_version": "2.2.14", "milvus_restful_sdk_version": "2.4.x", diff --git a/v2.4.x/assets/advanced_rag/adjust_order.png b/v2.4.x/assets/advanced_rag/adjust_order.png new file mode 100644 index 000000000..59887ee8e Binary files /dev/null and b/v2.4.x/assets/advanced_rag/adjust_order.png differ diff --git a/v2.4.x/assets/advanced_rag/compress_prompt.png b/v2.4.x/assets/advanced_rag/compress_prompt.png new file mode 100644 index 000000000..419ccb0c5 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/compress_prompt.png differ diff --git a/v2.4.x/assets/advanced_rag/hierarchical_index.png b/v2.4.x/assets/advanced_rag/hierarchical_index.png new file mode 100644 index 000000000..684ac310f Binary files /dev/null and b/v2.4.x/assets/advanced_rag/hierarchical_index.png differ diff --git a/v2.4.x/assets/advanced_rag/hybrid_and_rerank.png b/v2.4.x/assets/advanced_rag/hybrid_and_rerank.png new file mode 100644 index 000000000..f2d90590e Binary files /dev/null and b/v2.4.x/assets/advanced_rag/hybrid_and_rerank.png differ diff --git a/v2.4.x/assets/advanced_rag/hyde.png b/v2.4.x/assets/advanced_rag/hyde.png new file mode 100644 index 000000000..572266e47 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/hyde.png differ diff --git a/v2.4.x/assets/advanced_rag/hypothetical_question.png b/v2.4.x/assets/advanced_rag/hypothetical_question.png new file mode 100644 index 000000000..121362a8c Binary files /dev/null and b/v2.4.x/assets/advanced_rag/hypothetical_question.png differ diff --git a/v2.4.x/assets/advanced_rag/merge_chunks.png b/v2.4.x/assets/advanced_rag/merge_chunks.png new file mode 100644 index 000000000..a23328539 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/merge_chunks.png differ diff --git a/v2.4.x/assets/advanced_rag/metadata_filtering.png b/v2.4.x/assets/advanced_rag/metadata_filtering.png new file mode 100644 index 000000000..76d13e026 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/metadata_filtering.png differ diff --git a/v2.4.x/assets/advanced_rag/query_routing.png b/v2.4.x/assets/advanced_rag/query_routing.png new file mode 100644 index 000000000..ab355e0bd Binary files /dev/null and b/v2.4.x/assets/advanced_rag/query_routing.png differ diff --git a/v2.4.x/assets/advanced_rag/query_routing_with_sub_query.png b/v2.4.x/assets/advanced_rag/query_routing_with_sub_query.png new file mode 100644 index 000000000..9491f334b Binary files /dev/null and b/v2.4.x/assets/advanced_rag/query_routing_with_sub_query.png differ diff --git a/v2.4.x/assets/advanced_rag/self_reflection.png b/v2.4.x/assets/advanced_rag/self_reflection.png new file mode 100644 index 000000000..c7e8ddb18 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/self_reflection.png differ diff --git a/v2.4.x/assets/advanced_rag/sentence_window.png b/v2.4.x/assets/advanced_rag/sentence_window.png new file mode 100644 index 000000000..1b6a7cac0 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/sentence_window.png differ diff --git a/v2.4.x/assets/advanced_rag/stepback.png b/v2.4.x/assets/advanced_rag/stepback.png new file mode 100644 index 000000000..6c9f1f309 Binary files /dev/null and b/v2.4.x/assets/advanced_rag/stepback.png differ diff --git a/v2.4.x/assets/advanced_rag/sub_query.png b/v2.4.x/assets/advanced_rag/sub_query.png new file mode 100644 index 000000000..db4be5f5e Binary files /dev/null and b/v2.4.x/assets/advanced_rag/sub_query.png differ diff --git a/v2.4.x/assets/advanced_rag/vanilla_rag.png b/v2.4.x/assets/advanced_rag/vanilla_rag.png new file mode 100644 index 000000000..f2a45bafe Binary files /dev/null and b/v2.4.x/assets/advanced_rag/vanilla_rag.png differ diff --git a/v2.4.x/site/en/adminGuide/configure-docker.md b/v2.4.x/site/en/adminGuide/configure-docker.md index c5020aef1..21183c883 100644 --- a/v2.4.x/site/en/adminGuide/configure-docker.md +++ b/v2.4.x/site/en/adminGuide/configure-docker.md @@ -16,10 +16,10 @@ In current release, all parameters take effect only after Milvus restarts. ## Download a configuration file -[Download](https://raw.githubusercontent.com/milvus-io/milvus/v2.4.19/configs/milvus.yaml) `milvus.yaml` directly or with the following command. +[Download](https://raw.githubusercontent.com/milvus-io/milvus/v2.4.20/configs/milvus.yaml) `milvus.yaml` directly or with the following command. ``` -$ wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.19/configs/milvus.yaml +$ wget https://raw.githubusercontent.com/milvus-io/milvus/v2.4.20/configs/milvus.yaml ``` ## Modify the configuration file @@ -178,13 +178,13 @@ Sorted by: ## Download an installation file -Download the installation file for Milvus [standalone](https://github.com/milvus-io/milvus/releases/download/v2.4.19/milvus-standalone-docker-compose.yml), and save it as `docker-compose.yml`. +Download the installation file for Milvus [standalone](https://github.com/milvus-io/milvus/releases/download/v2.4.20/milvus-standalone-docker-compose.yml), and save it as `docker-compose.yml`. You can also simply run the following command. ``` # For Milvus standalone -$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.19/milvus-standalone-docker-compose.yml -O docker-compose.yml +$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.20/milvus-standalone-docker-compose.yml -O docker-compose.yml ``` ## Modify the installation file diff --git a/v2.4.x/site/en/adminGuide/operator.md b/v2.4.x/site/en/adminGuide/operator.md index 414ba74b4..4002e46d4 100644 --- a/v2.4.x/site/en/adminGuide/operator.md +++ b/v2.4.x/site/en/adminGuide/operator.md @@ -12,7 +12,7 @@ Milvus cluster depends on components including object storage, etcd, and Pulsar. This topic assumes that you have deployed Milvus Operator. -
See Deploy Milvus Operator for more information.
+
See Deploy Milvus Operator for more information.
You need to specify a configuration file for using Milvus Operator to start a Milvus cluster. diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-docker.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-docker.md index ae3d7a447..a019b6a0c 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-docker.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-docker.md @@ -10,7 +10,7 @@ title: Upgrade Milvus Cluster with Docker Compose This topic describes how to upgrade your Milvus using Docker Compose. -In normal cases, you can [upgrade Milvus by changing its image](#Upgrade-Milvus-by-changing-its-image). However, you need to [migrate the metadata](#Migrate-the-metadata) before any upgrade from v2.1.x to v2.4.19. +In normal cases, you can [upgrade Milvus by changing its image](#Upgrade-Milvus-by-changing-its-image). However, you need to [migrate the metadata](#Migrate-the-metadata) before any upgrade from v2.1.x to v2.4.20. ## Upgrade Milvus by changing its image @@ -24,35 +24,35 @@ In normal cases, you can upgrade Milvus as follows: ... rootcoord: container_name: milvus-rootcoord - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... proxy: container_name: milvus-proxy - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... querycoord: container_name: milvus-querycoord - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... querynode: container_name: milvus-querynode - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... indexcoord: container_name: milvus-indexcoord - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... indexnode: container_name: milvus-indexnode - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... datacoord: container_name: milvus-datacoord - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ... datanode: container_name: milvus-datanode - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` 2. Run the following commands to perform the upgrade. @@ -80,7 +80,7 @@ In normal cases, you can upgrade Milvus as follows: runWithBackup: true config: sourceVersion: 2.1.4 # Specify your milvus version - targetVersion: 2.4.19 + targetVersion: 2.4.20 backupFilePath: /tmp/migration.bak metastore: type: etcd diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-helm.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-helm.md index c3b367aa4..5a65ff867 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-helm.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-helm.md @@ -85,11 +85,11 @@ zilliztech/milvus 4.1.0 2.3.0 Milvus is an ope You can choose the upgrade path for your Milvus as follows: -
- [Conduct a rolling upgrade](#conduct-a-rolling-upgrade) from Milvus v2.2.3 and later releases to v2.4.19.
+
- [Conduct a rolling upgrade](#conduct-a-rolling-upgrade) from Milvus v2.2.3 and later releases to v2.4.20.
-- [Upgrade Milvus using Helm](#Upgrade-Milvus-using-Helm) for an upgrade from a minor release before v2.2.3 to v2.4.19. +- [Upgrade Milvus using Helm](#Upgrade-Milvus-using-Helm) for an upgrade from a minor release before v2.2.3 to v2.4.20. -- [Migrate the metadata](#Migrate-the-metadata) before the upgrade from Milvus v2.1.x to v2.4.19. +- [Migrate the metadata](#Migrate-the-metadata) before the upgrade from Milvus v2.1.x to v2.4.20.
@@ -111,10 +111,10 @@ The script applies only to the upgrade of Milvus installed with Helm. The follow | `w` | New Milvus image tag | `milvusdb/milvus:v2.2.3` | True | | `o` | Operation | `update` | False | -Once you have ensured that all deployments in your Milvus instance are in their normal status. You can run the following command to upgrade the Milvus instance to 2.4.19. +Once you have ensured that all deployments in your Milvus instance are in their normal status. You can run the following command to upgrade the Milvus instance to 2.4.20. ```shell -sh rollingUpdate.sh -n default -i my-release -o update -t 2.4.19 -w 'milvusdb/milvus:v2.4.19' +sh rollingUpdate.sh -n default -i my-release -o update -t 2.4.20 -w 'milvusdb/milvus:v2.4.20' ```
diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-operator.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-operator.md index 4224bc7e6..99e5c0617 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-operator.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_cluster-operator.md @@ -26,9 +26,9 @@ helm -n milvus-operator upgrade milvus-operator zilliztech-milvus-operator/milvu Once you have upgraded your Milvus operator to the latest version, you have the following choices: -- To upgrade Milvus from v2.2.3 or later releases to 2.4.19, you can [conduct a rolling upgrade](#Conduct-a-rolling-upgrade). -- To upgrade Milvus from a minor release before v2.2.3 to 2.4.19, you are advised to [upgrade Milvus by changing its image version](#Upgrade-Milvus-by-changing-its-image). -- To upgrade Milvus from v2.1.x to 2.4.19, you need to [migrate the metadata](#Migrate-the-metadata) before the actual upgrade. +- To upgrade Milvus from v2.2.3 or later releases to 2.4.20, you can [conduct a rolling upgrade](#Conduct-a-rolling-upgrade). +- To upgrade Milvus from a minor release before v2.2.3 to 2.4.20, you are advised to [upgrade Milvus by changing its image version](#Upgrade-Milvus-by-changing-its-image). +- To upgrade Milvus from v2.1.x to 2.4.20, you need to [migrate the metadata](#Migrate-the-metadata) before the actual upgrade. ## Conduct a rolling upgrade @@ -47,7 +47,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: rollingUpgrade # Default value, can be omitted - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` In this above configuration file, set `spec.components.enableRollingUpdate` to `true` and set `spec.components.image` to the desired Milvus version. @@ -63,7 +63,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: all - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` You can set `spec.components.imageUpdateMode` to `rollingDowngrade` to have Milvus replace coordinator pod images with a lower version. @@ -101,7 +101,7 @@ metadata: spec: # Omit other fields ... components: - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` Then run the following to perform the upgrade: @@ -112,11 +112,11 @@ kubectl patch -f milvusupgrade.yaml ## Migrate the metadata -Since Milvus 2.2.0, the metadata is incompatible with that in previous releases. The following example snippets assume an upgrade from Milvus 2.1.4 to Milvus 2.4.19. +Since Milvus 2.2.0, the metadata is incompatible with that in previous releases. The following example snippets assume an upgrade from Milvus 2.1.4 to Milvus 2.4.20. ### 1. Create a `.yaml` file for metadata migration -Create a metadata migration file. The following is an example. You need to specify the `name`, `sourceVersion`, and `targetVersion` in the configuration file. The following example sets the `name` to `my-release-upgrade`, `sourceVersion` to `v2.1.4`, and `targetVersion` to `v2.4.19`. This means that your Milvus cluster will be upgraded from v2.1.4 to v2.4.19. +Create a metadata migration file. The following is an example. You need to specify the `name`, `sourceVersion`, and `targetVersion` in the configuration file. The following example sets the `name` to `my-release-upgrade`, `sourceVersion` to `v2.1.4`, and `targetVersion` to `v2.4.20`. This means that your Milvus cluster will be upgraded from v2.1.4 to v2.4.20. ``` apiVersion: milvus.io/v1beta1 @@ -128,9 +128,9 @@ spec: namespace: default name: my-release sourceVersion: "v2.1.4" - targetVersion: "v2.4.19" + targetVersion: "v2.4.20" # below are some omit default values: - # targetImage: "milvusdb/milvus:v2.4.19" + # targetImage: "milvusdb/milvus:v2.4.20" # toolImage: "milvusdb/meta-migration:v2.2.0" # operation: upgrade # rollbackIfFailed: true diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-docker.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-docker.md index bfa9a4b02..fa8b264d6 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-docker.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-docker.md @@ -14,7 +14,7 @@ title: Upgrade Milvus Standalone with Docker Compose This topic describes how to upgrade your Milvus using Docker Compose. -In normal cases, you can [upgrade Milvus by changing its image](#Upgrade-Milvus-by-changing-its-image). However, you need to [migrate the metadata](#Migrate-the-metadata) before any upgrade from v2.1.x to v2.4.19. +In normal cases, you can [upgrade Milvus by changing its image](#Upgrade-Milvus-by-changing-its-image). However, you need to [migrate the metadata](#Migrate-the-metadata) before any upgrade from v2.1.x to v2.4.20.
@@ -32,7 +32,7 @@ In normal cases, you can upgrade Milvus as follows: ... standalone: container_name: milvus-standalone - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` 2. Run the following commands to perform the upgrade. @@ -60,7 +60,7 @@ In normal cases, you can upgrade Milvus as follows: runWithBackup: true config: sourceVersion: 2.1.4 # Specify your milvus version - targetVersion: 2.4.19 + targetVersion: 2.4.20 backupFilePath: /tmp/migration.bak metastore: type: etcd diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-helm.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-helm.md index 0d58f5f0c..7f14c168b 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-helm.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-helm.md @@ -86,11 +86,11 @@ zilliztech/milvus 4.1.0 2.3.0 Milvus is an ope You can choose the upgrade path for your Milvus as follows: -
- [Conduct a rolling upgrade](#conduct-a-rolling-upgrade) from Milvus v2.2.3 and later releases to v2.4.19.
+
- [Conduct a rolling upgrade](#conduct-a-rolling-upgrade) from Milvus v2.2.3 and later releases to v2.4.20.
-- [Upgrade Milvus using Helm](#Upgrade-Milvus-using-Helm) for an upgrade from a minor release before v2.2.3 to v2.4.19. +- [Upgrade Milvus using Helm](#Upgrade-Milvus-using-Helm) for an upgrade from a minor release before v2.2.3 to v2.4.20. -- [Migrate the metadata](#Migrate-the-metadata) before the upgrade from Milvus v2.1.x to v2.4.19. +- [Migrate the metadata](#Migrate-the-metadata) before the upgrade from Milvus v2.1.x to v2.4.20.
@@ -112,10 +112,10 @@ The script applies only to the upgrade of Milvus installed with Helm. The follow | `w` | New Milvus image tag | `milvusdb/milvus:v2.2.3` | True | | `o` | Operation | `update` | False | -Once you have ensured that all deployments in your Milvus instance are in their normal status. You can run the following command to upgrade the Milvus instance to 2.4.19. +Once you have ensured that all deployments in your Milvus instance are in their normal status. You can run the following command to upgrade the Milvus instance to 2.4.20. ```shell -sh rollingUpdate.sh -n default -i my-release -o update -t 2.4.19 -w 'milvusdb/milvus:v2.4.19' +sh rollingUpdate.sh -n default -i my-release -o update -t 2.4.20 -w 'milvusdb/milvus:v2.4.20' ```
@@ -203,42 +203,42 @@ The following table lists the operations you can do for meta migration. 4. Migrate the Milvus metadata. 5. Start Milvus components with a new image. -#### 2. Upgrade Milvus from v2.1.x to 2.4.19 +#### 2. Upgrade Milvus from v2.1.x to 2.4.20 -The following commands assume that you upgrade Milvus from v2.1.4 to 2.4.19. Change them to the versions that fit your needs. +The following commands assume that you upgrade Milvus from v2.1.4 to 2.4.20. Change them to the versions that fit your needs. 1. Specify Milvus instance name, source Milvus version, and target Milvus version. ``` - ./migrate.sh -i my-release -s 2.1.4 -t 2.4.19 + ./migrate.sh -i my-release -s 2.1.4 -t 2.4.20 ``` 2. Specify the namespace with `-n` if your Milvus is not installed in the default K8s namespace. ``` - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 ``` 3. Specify the root path with `-r` if your Milvus is installed with the custom `rootpath`. ``` - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 -r by-dev + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 -r by-dev ``` 4. Specify the image tag with `-w` if your Milvus is installed with a custom `image`. ``` - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 -r by-dev -w milvusdb/milvus:v2.4.19 + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 -r by-dev -w milvusdb/milvus:v2.4.20 ``` 5. Set `-d true` if you want to automatically remove the migration pod after the migration is completed. ``` - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 -w milvusdb/milvus:v2.4.19 -d true + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 -w milvusdb/milvus:v2.4.20 -d true ``` 6. Rollback and migrate again if the migration fails. ``` - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 -r by-dev -o rollback -w milvusdb/milvus:v2.1.1 - ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.19 -r by-dev -o migrate -w milvusdb/milvus:v2.4.19 + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 -r by-dev -o rollback -w milvusdb/milvus:v2.1.1 + ./migrate.sh -i my-release -n milvus -s 2.1.4 -t 2.4.20 -r by-dev -o migrate -w milvusdb/milvus:v2.4.20 diff --git a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-operator.md b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-operator.md index 0f39dda80..eb90966b6 100644 --- a/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-operator.md +++ b/v2.4.x/site/en/adminGuide/upgrade_milvus_standalone-operator.md @@ -26,9 +26,9 @@ helm -n milvus-operator upgrade milvus-operator zilliztech-milvus-operator/milvu Once you have upgraded your Milvus operator to the latest version, you have the following choices: -- To upgrade Milvus from v2.2.3 or later releases to 2.4.19, you can [conduct a rolling upgrade](#Conduct-a-rolling-upgrade). -- To upgrade Milvus from a minor release before v2.2.3 to 2.4.19, you are advised to [upgrade Milvus by changing its image version](#Upgrade-Milvus-by-changing-its-image). -- To upgrade Milvus from v2.1.x to 2.4.19, you need to [migrate the metadata](#Migrate-the-metadata) before the actual upgrade. +- To upgrade Milvus from v2.2.3 or later releases to 2.4.20, you can [conduct a rolling upgrade](#Conduct-a-rolling-upgrade). +- To upgrade Milvus from a minor release before v2.2.3 to 2.4.20, you are advised to [upgrade Milvus by changing its image version](#Upgrade-Milvus-by-changing-its-image). +- To upgrade Milvus from v2.1.x to 2.4.20, you need to [migrate the metadata](#Migrate-the-metadata) before the actual upgrade. ## Conduct a rolling upgrade @@ -47,7 +47,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: rollingUpgrade # Default value, can be omitted - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` In this above configuration file, set `spec.components.enableRollingUpdate` to `true` and set `spec.components.image` to the desired Milvus version. @@ -63,7 +63,7 @@ spec: components: enableRollingUpdate: true imageUpdateMode: all - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` You can set `spec.components.imageUpdateMode` to `rollingDowngrade` to have Milvus replace coordinator pod images with a lower version. @@ -102,7 +102,7 @@ labels: spec: # Omit other fields ... components: - image: milvusdb/milvus:v2.4.19 + image: milvusdb/milvus:v2.4.20 ``` Then run the following to perform the upgrade: @@ -113,11 +113,11 @@ kubectl patch -f milvusupgrade.yaml ## Migrate the metadata -Since Milvus 2.2.0, the metadata is incompatible with that in previous releases. The following example snippets assume an upgrade from Milvus 2.1.4 to Milvus v2.4.19. +Since Milvus 2.2.0, the metadata is incompatible with that in previous releases. The following example snippets assume an upgrade from Milvus 2.1.4 to Milvus v2.4.20. ### 1. Create a `.yaml` file for metadata migration -Create a metadata migration file. The following is an example. You need to specify the `name`, `sourceVersion`, and `targetVersion` in the configuration file. The following example sets the `name` to `my-release-upgrade`, `sourceVersion` to `v2.1.4`, and `targetVersion` to `v2.4.19`. This means that your Milvus instance will be upgraded from v2.1.4 to v2.4.19. +Create a metadata migration file. The following is an example. You need to specify the `name`, `sourceVersion`, and `targetVersion` in the configuration file. The following example sets the `name` to `my-release-upgrade`, `sourceVersion` to `v2.1.4`, and `targetVersion` to `v2.4.20`. This means that your Milvus instance will be upgraded from v2.1.4 to v2.4.20. ``` apiVersion: milvus.io/v1beta1 @@ -129,9 +129,9 @@ spec: namespace: default name: my-release sourceVersion: "v2.1.4" - targetVersion: "v2.4.19" + targetVersion: "v2.4.20" # below are some omit default values: - # targetImage: "milvusdb/milvus:v2.4.19" + # targetImage: "milvusdb/milvus:v2.4.20" # toolImage: "milvusdb/meta-migration:v2.2.0" # operation: upgrade # rollbackIfFailed: true diff --git a/v2.4.x/site/en/getstarted/install_SDKs/install-java.md b/v2.4.x/site/en/getstarted/install_SDKs/install-java.md index aba504355..90f88e3fc 100644 --- a/v2.4.x/site/en/getstarted/install_SDKs/install-java.md +++ b/v2.4.x/site/en/getstarted/install_SDKs/install-java.md @@ -27,14 +27,14 @@ Run the following command to install Milvus Java SDK. io.milvus milvus-sdk-java - 2.4.8 + 2.4.10 ``` - Gradle/Grails ``` -implementation 'io.milvus:milvus-sdk-java:2.4.8' +implementation 'io.milvus:milvus-sdk-java:2.4.10' ``` ## What's next diff --git a/v2.4.x/site/en/getstarted/run-milvus-docker/install_standalone-docker-compose.md b/v2.4.x/site/en/getstarted/run-milvus-docker/install_standalone-docker-compose.md index c5018a45d..2a76a8151 100644 --- a/v2.4.x/site/en/getstarted/run-milvus-docker/install_standalone-docker-compose.md +++ b/v2.4.x/site/en/getstarted/run-milvus-docker/install_standalone-docker-compose.md @@ -21,7 +21,7 @@ Milvus provides a Docker Compose configuration file in the Milvus repository. To ```shell # Download the configuration file -$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.19/milvus-standalone-docker-compose.yml -O docker-compose.yml +$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.20/milvus-standalone-docker-compose.yml -O docker-compose.yml # Start Milvus $ sudo docker compose up -d diff --git a/v2.4.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md b/v2.4.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md index 3d48ecf63..8f88b6b3b 100644 --- a/v2.4.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md +++ b/v2.4.x/site/en/getstarted/run-milvus-gpu/install_standalone-docker-compose-gpu.md @@ -27,10 +27,10 @@ To install Milvus with GPU support using Docker Compose, follow these steps. ### 1. Download and configure the YAML file -Download [`milvus-standalone-docker-compose-gpu.yml`](https://github.com/milvus-io/milvus/releases/download/v2.4.19/milvus-standalone-docker-compose-gpu.yml) and save it as docker-compose.yml manually, or with the following command. +Download [`milvus-standalone-docker-compose-gpu.yml`](https://github.com/milvus-io/milvus/releases/download/v2.4.20/milvus-standalone-docker-compose-gpu.yml) and save it as docker-compose.yml manually, or with the following command. ```shell -$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.19/milvus-standalone-docker-compose-gpu.yml -O docker-compose.yml +$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.20/milvus-standalone-docker-compose-gpu.yml -O docker-compose.yml ``` You need to make some changes to the environment variables of the standalone service in the YAML file as follows: diff --git a/v2.4.x/site/en/menuStructure/en.json b/v2.4.x/site/en/menuStructure/en.json index 62d84e618..124f63907 100644 --- a/v2.4.x/site/en/menuStructure/en.json +++ b/v2.4.x/site/en/menuStructure/en.json @@ -1634,63 +1634,75 @@ "children": [] }, { - "label": "Image Search with Milvus", - "id": "image_similarity_search.md", + "label": "Advanced RAG", + "id": "how_to_enhance_your_rag.md", "order": 2, + "children": [] + }, + { + "label": "Full-Text Search with Milvus", + "id": "full_text_search_with_milvus.md", + "order": 3, "children": [] }, { "label": "Hybrid Search with Milvus", "id": "hybrid_search_with_milvus.md", - "order": 3, + "order": 4, + "children": [] + }, + { + "label": "Image Search with Milvus", + "id": "image_similarity_search.md", + "order": 5, "children": [] }, { "label": "Multimodal RAG", "id": "multimodal_rag_with_milvus.md", - "order": 4, + "order": 6, "children": [] }, { "label": "Graph RAG with Milvus", "id": "graph_rag_with_milvus.md", - "order": 6, + "order": 7, "children": [] }, { "label": "Contextual Retrieval", "id": "contextual_retrieval_with_milvus.md", - "order": 5, + "order": 8, "children": [] }, { "label": "HDBSCAN Clustering", "id": "hdbscan_clustering_with_milvus.md", - "order": 6, + "order": 9, "children": [] }, { "label": "Use ColPali for Multi-Modal Retrieval", "id": "use_ColPali_with_milvus.md", - "order": 7, + "order": 10, "children": [] }, { "label": "Vector Visualization", "id": "vector_visualization.md", - "order": 8, + "order": 11, "children": [] }, { "label": "Movie Recommendation", "id": "movie_recommendation_with_milvus.md", - "order": 9, + "order": 12, "children": [] }, { "label": "Funnel Search with Matryoshka Embeddings", "id": "funnel_search_with_matryoshka.md", - "order": 10, + "order": 13, "children": [] }, { diff --git a/v2.4.x/site/en/release_notes.md b/v2.4.x/site/en/release_notes.md index ecc41d0ed..5abd5869f 100644 --- a/v2.4.x/site/en/release_notes.md +++ b/v2.4.x/site/en/release_notes.md @@ -7,6 +7,27 @@ title: Release Notes Find out what’s new in Milvus! This page summarizes new features, improvements, known issues, and bug fixes in each release. You can find the release notes for each released version after v2.4.0 in this section. We suggest that you regularly visit this page to learn about updates. +## v2.4.20 + +Release Date: Janurary 2, 2025 + +| Milvus version | Python SDK version | Java SDK version | Node.js SDK version | +|----------------|--------------------|------------------|---------------------| +| 2.4.20 | 2.4.13 | 2.4.10 | 2.4.10 | + +Milvus 2.4.20 addresses several critical issues, including fixing mmap for sparse index, resolving the failure to parse the correct database name when altering collection fields, and preventing deadlocks in multiple compaction tasks within the scheduler. Additionally, this version introduces an enhancement that allows adjustment of the system limit for maximum varchar length through a configuration item in the YAML file. We highly recommend upgrading to this version for improved stability and reliability. + +### Improvements + +- Added param for tuning max varchar length ([#38890](https://github.com/milvus-io/milvus/pull/38890)) + +### Bug fixes + +- Enabled mmap for sparse index ([#38849](https://github.com/milvus-io/milvus/pull/38849)) +- Fixed altercollectionfield interceptor dbname ([#38664](https://github.com/milvus-io/milvus/pull/38664)) +- Released compaction task lock when return function ([#38857](https://github.com/milvus-io/milvus/pull/38857)) +- Retrieve plan on heap was used after free when reduce ([#38842](https://github.com/milvus-io/milvus/pull/38842)) + ## v2.4.19 Release Date: December 26, 2024 diff --git a/v2.4.x/site/en/tutorials/full_text_search_with_milvus.md b/v2.4.x/site/en/tutorials/full_text_search_with_milvus.md new file mode 100644 index 000000000..b870ee9b5 --- /dev/null +++ b/v2.4.x/site/en/tutorials/full_text_search_with_milvus.md @@ -0,0 +1,367 @@ +--- +id: full_text_search_with_milvus.md +summary: With the release of Milvus 2.5, Full Text Search enables users to efficiently search for text based on keywords or phrases, providing powerful text retrieval capabilities. This feature enhances search accuracy and can be seamlessly combined with embedding-based retrieval for hybrid search, allowing for both semantic and keyword-based results in a single query. In this notebook, we will show basic usage of full text search in Milvus. +title: Full Text Search with Milvus +--- + + + Open In Colab + + + GitHub Repository + + +# Full Text Search with Milvus + +With the release of Milvus 2.5, Full Text Search enables users to efficiently search for text based on keywords or phrases, providing powerful text retrieval capabilities. This feature enhances search accuracy and can be seamlessly combined with embedding-based retrieval for hybrid search, allowing for both semantic and keyword-based results in a single query. In this notebook, we will show basic usage of full text search in Milvus. + +## Preparation + +### Download the dataset +The following command will download the example data used in original Anthropic [demo](https://github.com/anthropics/anthropic-cookbook/blob/main/skills/contextual-embeddings/guide.ipynb). + + +```shell +$ wget https://raw.githubusercontent.com/anthropics/anthropic-cookbook/refs/heads/main/skills/contextual-embeddings/data/codebase_chunks.json +$ wget https://raw.githubusercontent.com/anthropics/anthropic-cookbook/refs/heads/main/skills/contextual-embeddings/data/evaluation_set.jsonl +``` + +### Install Milvus 2.5 +Check the [official installation guide](https://milvus.io/docs/install_standalone-docker-compose.md) for more details. + +### Install PyMilvus +Run the following command to install PyMilvus: + + +```python +pip install "pymilvus[model]" -U +``` + +### Define the Retriever + + +```python +import json + +from pymilvus import ( + MilvusClient, + DataType, + Function, + FunctionType, + AnnSearchRequest, + RRFRanker, +) + +from pymilvus.model.hybrid import BGEM3EmbeddingFunction + + +class HybridRetriever: + def __init__(self, uri, collection_name="hybrid", dense_embedding_function=None): + self.uri = uri + self.collection_name = collection_name + self.embedding_function = dense_embedding_function + self.use_reranker = True + self.use_sparse = True + self.client = MilvusClient(uri=uri) + + def build_collection(self): + if isinstance(self.embedding_function.dim, dict): + dense_dim = self.embedding_function.dim["dense"] + else: + dense_dim = self.embedding_function.dim + + tokenizer_params = { + "tokenizer": "standard", + "filter": [ + "lowercase", + { + "type": "length", + "max": 200, + }, + {"type": "stemmer", "language": "english"}, + { + "type": "stop", + "stop_words": [ + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "but", + "by", + "for", + "if", + "in", + "into", + "is", + "it", + "no", + "not", + "of", + "on", + "or", + "such", + "that", + "the", + "their", + "then", + "there", + "these", + "they", + "this", + "to", + "was", + "will", + "with", + ], + }, + ], + } + + schema = MilvusClient.create_schema() + schema.add_field( + field_name="pk", + datatype=DataType.VARCHAR, + is_primary=True, + auto_id=True, + max_length=100, + ) + schema.add_field( + field_name="content", + datatype=DataType.VARCHAR, + max_length=65535, + analyzer_params=tokenizer_params, + enable_match=True, + enable_analyzer=True, + ) + schema.add_field( + field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR + ) + schema.add_field( + field_name="dense_vector", datatype=DataType.FLOAT_VECTOR, dim=dense_dim + ) + schema.add_field( + field_name="original_uuid", datatype=DataType.VARCHAR, max_length=128 + ) + schema.add_field(field_name="doc_id", datatype=DataType.VARCHAR, max_length=64) + schema.add_field( + field_name="chunk_id", datatype=DataType.VARCHAR, max_length=64 + ), + schema.add_field(field_name="original_index", datatype=DataType.INT32) + + functions = Function( + name="bm25", + function_type=FunctionType.BM25, + input_field_names=["content"], + output_field_names="sparse_vector", + ) + + schema.add_function(functions) + + index_params = MilvusClient.prepare_index_params() + index_params.add_index( + field_name="sparse_vector", + index_type="SPARSE_INVERTED_INDEX", + metric_type="BM25", + ) + index_params.add_index( + field_name="dense_vector", index_type="FLAT", metric_type="IP" + ) + + self.client.create_collection( + collection_name=self.collection_name, + schema=schema, + index_params=index_params, + ) + + def insert_data(self, chunk, metadata): + embedding = self.embedding_function([chunk]) + if isinstance(embedding, dict) and "dense" in embedding: + dense_vec = embedding["dense"][0] + else: + dense_vec = embedding[0] + self.client.insert( + self.collection_name, {"dense_vector": dense_vec, **metadata} + ) + + def search(self, query: str, k: int = 20, mode="hybrid"): + + output_fields = [ + "content", + "original_uuid", + "doc_id", + "chunk_id", + "original_index", + ] + if mode in ["dense", "hybrid"]: + embedding = self.embedding_function([query]) + if isinstance(embedding, dict) and "dense" in embedding: + dense_vec = embedding["dense"][0] + else: + dense_vec = embedding[0] + + if mode == "sparse": + results = self.client.search( + collection_name=self.collection_name, + data=[query], + anns_field="sparse_vector", + limit=k, + output_fields=output_fields, + ) + elif mode == "dense": + results = self.client.search( + collection_name=self.collection_name, + data=[dense_vec], + anns_field="dense_vector", + limit=k, + output_fields=output_fields, + ) + elif mode == "hybrid": + full_text_search_params = {"metric_type": "BM25"} + full_text_search_req = AnnSearchRequest( + [query], "sparse_vector", full_text_search_params, limit=k + ) + + dense_search_params = {"metric_type": "IP"} + dense_req = AnnSearchRequest( + [dense_vec], "dense_vector", dense_search_params, limit=k + ) + + results = self.client.hybrid_search( + self.collection_name, + [full_text_search_req, dense_req], + ranker=RRFRanker(), + limit=k, + output_fields=output_fields, + ) + else: + raise ValueError("Invalid mode") + return [ + { + "doc_id": doc["entity"]["doc_id"], + "chunk_id": doc["entity"]["chunk_id"], + "content": doc["entity"]["content"], + "score": doc["distance"], + } + for doc in results[0] + ] +``` + + +```python +dense_ef = BGEM3EmbeddingFunction() +standard_retriever = HybridRetriever( + uri="http://localhost:19530", + collection_name="milvus_hybrid", + dense_embedding_function=dense_ef, +) +``` + + Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 108848.72it/s] + + +### Insert the data + + +```python +path = "codebase_chunks.json" +with open(path, "r") as f: + dataset = json.load(f) + +is_insert = True +if is_insert: + standard_retriever.build_collection() + for doc in dataset: + doc_content = doc["content"] + for chunk in doc["chunks"]: + metadata = { + "doc_id": doc["doc_id"], + "original_uuid": doc["original_uuid"], + "chunk_id": chunk["chunk_id"], + "original_index": chunk["original_index"], + "content": chunk["content"], + } + chunk_content = chunk["content"] + standard_retriever.insert_data(chunk_content, metadata) +``` + +### Test Sparse Search + + +```python +results = standard_retriever.search("create a logger?", mode="sparse", k=3) +print(results) +``` + + [{'doc_id': 'doc_10', 'chunk_id': 'doc_10_chunk_0', 'content': 'use {\n crate::args::LogArgs,\n anyhow::{anyhow, Result},\n simplelog::{Config, LevelFilter, WriteLogger},\n std::fs::File,\n};\n\npub struct Logger;\n\nimpl Logger {\n pub fn init(args: &impl LogArgs) -> Result<()> {\n let filter: LevelFilter = args.log_level().into();\n if filter != LevelFilter::Off {\n let logfile = File::create(args.log_file())\n .map_err(|e| anyhow!("Failed to open log file: {e:}"))?;\n WriteLogger::init(filter, Config::default(), logfile)\n .map_err(|e| anyhow!("Failed to initalize logger: {e:}"))?;\n }\n Ok(())\n }\n}\n', 'score': 9.12518310546875}, {'doc_id': 'doc_87', 'chunk_id': 'doc_87_chunk_3', 'content': '\t\tLoggerPtr INF = Logger::getLogger(LOG4CXX_TEST_STR("INF"));\n\t\tINF->setLevel(Level::getInfo());\n\n\t\tLoggerPtr INF_ERR = Logger::getLogger(LOG4CXX_TEST_STR("INF.ERR"));\n\t\tINF_ERR->setLevel(Level::getError());\n\n\t\tLoggerPtr DEB = Logger::getLogger(LOG4CXX_TEST_STR("DEB"));\n\t\tDEB->setLevel(Level::getDebug());\n\n\t\t// Note: categories with undefined level\n\t\tLoggerPtr INF_UNDEF = Logger::getLogger(LOG4CXX_TEST_STR("INF.UNDEF"));\n\t\tLoggerPtr INF_ERR_UNDEF = Logger::getLogger(LOG4CXX_TEST_STR("INF.ERR.UNDEF"));\n\t\tLoggerPtr UNDEF = Logger::getLogger(LOG4CXX_TEST_STR("UNDEF"));\n\n', 'score': 7.0077056884765625}, {'doc_id': 'doc_89', 'chunk_id': 'doc_89_chunk_3', 'content': 'using namespace log4cxx;\nusing namespace log4cxx::helpers;\n\nLOGUNIT_CLASS(FMTTestCase)\n{\n\tLOGUNIT_TEST_SUITE(FMTTestCase);\n\tLOGUNIT_TEST(test1);\n\tLOGUNIT_TEST(test1_expanded);\n\tLOGUNIT_TEST(test10);\n//\tLOGUNIT_TEST(test_date);\n\tLOGUNIT_TEST_SUITE_END();\n\n\tLoggerPtr root;\n\tLoggerPtr logger;\n\npublic:\n\tvoid setUp()\n\t{\n\t\troot = Logger::getRootLogger();\n\t\tMDC::clear();\n\t\tlogger = Logger::getLogger(LOG4CXX_TEST_STR("java.org.apache.log4j.PatternLayoutTest"));\n\t}\n\n', 'score': 6.750633716583252}] + + +## Evaluation +Now that we have inserted the dataset into Milvus, we can use dense, sparse, or hybrid search to retrieve the top 5 results. You can change the `mode` and evaluate each one. We present the Pass@5 metric, which involves retrieving the top 5 results for each query and calculating the Recall. + + +```python +def load_jsonl(file_path: str): + """Load JSONL file and return a list of dictionaries.""" + with open(file_path, "r") as file: + return [json.loads(line) for line in file] + + +dataset = load_jsonl("evaluation_set.jsonl") +k = 5 + +# mode can be "dense", "sparse" or "hybrid". +mode = "hybrid" + +total_query_score = 0 +num_queries = 0 + +for query_item in dataset: + + query = query_item["query"] + + golden_chunk_uuids = query_item["golden_chunk_uuids"] + + chunks_found = 0 + golden_contents = [] + for doc_uuid, chunk_index in golden_chunk_uuids: + golden_doc = next( + (doc for doc in query_item["golden_documents"] if doc["uuid"] == doc_uuid), + None, + ) + if golden_doc: + golden_chunk = next( + ( + chunk + for chunk in golden_doc["chunks"] + if chunk["index"] == chunk_index + ), + None, + ) + if golden_chunk: + golden_contents.append(golden_chunk["content"].strip()) + + results = standard_retriever.search(query, mode=mode, k=5) + + for golden_content in golden_contents: + for doc in results[:k]: + retrieved_content = doc["content"].strip() + if retrieved_content == golden_content: + chunks_found += 1 + break + + query_score = chunks_found / len(golden_contents) + + total_query_score += query_score + num_queries += 1 +``` + + +```python +print("Pass@5: ", total_query_score / num_queries) +``` + + Pass@5: 0.7911386328725037 + diff --git a/v2.4.x/site/en/tutorials/how_to_enhance_your_rag.md b/v2.4.x/site/en/tutorials/how_to_enhance_your_rag.md new file mode 100644 index 000000000..0e3b06e42 --- /dev/null +++ b/v2.4.x/site/en/tutorials/how_to_enhance_your_rag.md @@ -0,0 +1,188 @@ +--- +id: how_to_enhance_your_rag.md +summary: With the increasing popularity of Retrieval Augmented Generation RAG applications, there is a growing concern about improving their performance. This article presents all possible ways to optimize RAG pipelines and provides corresponding illustrations to help you quickly understand the mainstream RAG optimization strategies. +title: How to Enhance the Performance of Your RAG Pipeline +--- + +# How to Enhance the Performance of Your RAG Pipeline + + +With the increasing popularity of Retrieval Augmented Generation ([RAG](https://zilliz.com/learn/Retrieval-Augmented-Generation)) applications, there is a growing concern about improving their performance. This article presents all possible ways to optimize RAG pipelines and provides corresponding illustrations to help you quickly understand the mainstream RAG optimization strategies. + +It's important to note that we'll only provide a high-level exploration of these strategies and techniques, focusing on how they integrate into a RAG system. However, we won't delve into intricate details or guide you through step-by-step implementation. + +## A Standard RAG Pipeline + +The diagram below shows the most straightforward vanilla RAG pipeline. First, document chunks are loaded into a vector store (such as [Milvus](https://milvus.io/docs) or [Zilliz cloud](https://zilliz.com/cloud)). Then, the vector store retrieves the Top-K most relevant chunks related to the query. These relevant chunks are then injected into the [LLM](https://zilliz.com/glossary/large-language-models-\(llms\))'s context prompt, and finally, the LLM returns the final answer. + +![](../../../assets/advanced_rag/vanilla_rag.png) + +## Various Types of RAG Enhancement Techniques + +We can classify different RAG enhancement approaches based on their roles in the RAG pipeline stages. + +* **Query Enhancement**: Modifying and manipulating the query process of the RAG input to better express or process the query intent. +* **Indexing Enhancement**: Optimizing the creation of chunking indexes using techniques such as multi-chunking, step-wise indexing, or multi-way indexing. +* **Retriever Enhancement**: Applying optimization techniques and strategies during the retrieval process. +* **Generator Enhancement**: Adjusting and optimizing prompts when assembling prompts for the LLM to provide better responses. +* **RAG Pipeline Enhancement**: Dynamically switching processes within the entire RAG pipeline, including using agents or tools to optimize key steps in the RAG pipeline. + +Next, we will introduce specific methods under each of these categories. + +## Query Enhancement + +Let's explore four effective methods to enhance your query experience: Hypothetical Questions, Hypothetical Document Embeddings, Sub-Queries, and Stepback Prompts. + +### Creating Hypothetical Questions + +Creating hypothetical questions involves utilizing an LLM to generate multiple questions that users might ask about the content within each document chunk. Before the user's actual query reaches the LLM, the vector store retrieves the most relevant hypothetical questions related to the real query, along with their corresponding document chunks, and forwards them to the LLM. + +![](../../../assets/advanced_rag/hypothetical_question.png) + +This methodology bypasses the cross-domain asymmetry problem in the vector search process by directly engaging in query-to-query searches, alleviating the burden on vector searches. However, it introduces additional overhead and uncertainty in generating hypothetical questions. + +### HyDE (Hypothetical Document Embeddings) + +HyDE stands for Hypothetical Document Embeddings. It leverages an LLM to craft a "***Hypothetical Document***" or a ***fake*** answer in response to a user query devoid of contextual information. This fake answer is then converted into vector embeddings and employed to query the most relevant document chunks within a vector database. Subsequently, the vector database retrieves the Top-K most relevant document chunks and transmits them to the LLM and the original user query to generate the final answer. + +![](../../../assets/advanced_rag/hyde.png) + +This method is similar to the hypothetical question technique in addressing cross-domain asymmetry in vector searches. However, it also has drawbacks, such as the added computational costs and uncertainties of generating fake answers. + +For more information, refer to the [HyDE](https://arxiv.org/abs/2212.10496) paper. + +### Creating Sub-Queries + +When a user query is too complicated, we can use an LLM to break it down into simpler sub-queries before passing them on to the vector database and the LLM. Let's take a look at an example. + +Imagine a user asking: "***What are the differences in features between Milvus and Zilliz Cloud?***" This question is quite complex and might not have a straightforward answer in our knowledge base. To tackle this issue, we can split it into two simpler sub-queries: + +* Sub-query 1: "What are the features of Milvus?" +* Sub-query 2: "What are the features of Zilliz Cloud?" + +Once we have these sub-queries, we send them all to the vector database after converting them into vector embeddings. The vector database then finds the Top-K document chunks most relevant to each sub-query. Finally, the LLM uses this information to generate a better answer. + +![](../../../assets/advanced_rag/sub_query.png) + +By breaking down the user query into sub-queries, we make it easier for our system to find relevant information and provide accurate answers, even to complex questions. + +### Creating Stepback Prompts + +Another way to simplify complex user queries is by creating ***stepback prompts***. This technique involves abstracting complicated user queries into **"*stepback questions*"** using an LLM. Then, a vector database uses these stepback questions to retrieve the most relevant document chunks. Finally, the LLM generates a more accurate answer based on these retrieved document chunks. + +Let's illustrate this technique with an example. Consider the following query, which is quite complex and not straightforward to answer directly: + +***Original User Query: "I have a dataset with 10 billion records and want to store it in Milvus for querying. Is it possible?"*** + +To simplify this user query, we can use an LLM to generate a more straightforward stepback question: + +***Stepback Question: "What is the dataset size limit that Milvus can handle?"*** + +![](../../../assets/advanced_rag/stepback.png) + +This method can help us get better and more accurate answers to complex queries. It breaks down the original question into a simpler form, making it easier for our system to find relevant information and provide accurate responses. + +## Indexing Enhancement + +Enhancing indexing is another strategy for enhancing the performance of your RAG applications. Let’s explore three indexing enhancement techniques. + +### Merging Document Chunks Automatically + +When building an index, we can employ two granularity levels: child chunks and their corresponding parent chunks. Initially, we search for child chunks at a finer level of detail. Then, we apply a merging strategy: if a specific number, ***n***, of child chunks from the first ***k*** child chunks belong to the same parent chunk, we provide this parent chunk to the LLM as contextual information. + +![](../../../assets/advanced_rag/merge_chunks.png) + +This methodology has been implemented in [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/retrievers/recursive_retriever_nodes.html). + +### Constructing Hierarchical Indices + +When creating indices for documents, we can establish a two-level index: one for document summaries and another for document chunks. The vector search process comprises two stages: initially, we filter relevant documents based on the summary, and subsequently, we retrieve corresponding document chunks exclusively within these relevant documents. + +![](../../../assets/advanced_rag/hierarchical_index.png) + +This approach proves beneficial in situations involving extensive data volumes or instances where data is hierarchical, such as content retrieval within a library collection. + +### Hybrid Retrieval and Reranking + +The Hybrid Retrieval and Reranking technique integrates one or more supplementary retrieval methods with [vector similarity retrieval](https://zilliz.com/learn/vector-similarity-search). Then, a [reranker](https://zilliz.com/learn/optimize-rag-with-rerankers-the-role-and-tradeoffs#What-is-a-Reranker) reranks the retrieved results based on their relevancy to the user query. + +Common supplementary retrieval algorithms include lexical frequency-based methods like [BM25](https://milvus.io/docs/embed-with-bm25.md) or big models utilizing sparse embeddings like [Splade](https://zilliz.com/learn/discover-splade-revolutionize-sparse-data-processing). Re-ranking algorithms include RRF or more sophisticated models such as [Cross-Encoder](https://www.sbert.net/examples/applications/cross-encoder/README.html), which resembles BERT-like architectures. + +![](../../../assets/advanced_rag/hybrid_and_rerank.png) + +This approach leverages diverse retrieval methods to improve retrieval quality and address potential gaps in vector recall. + +## Retriever Enhancement + +Refinement of the retriever component within the RAG system can also improve RAG applications. Let's explore some effective methods for enhancing the retriever. + +### Sentence Window Retrieval + +In a basic RAG system, the document chunk given to the LLM is a larger window encompassing the retrieved embedding chunk. This ensures that the information provided to the LLM includes a broader range of contextual details, minimizing information loss. The Sentence Window Retrieval technique decouples the document chunk used for embedding retrieval from the chunk provided to the LLM. + +![](../../../assets/advanced_rag/sentence_window.png) + +However, expanding the window size may introduce additional interfering information. We can adjust the size of the window expansion based on the specific business needs. + +### Meta-data Filtering + +To ensure more precise answers, we can refine the retrieved documents by filtering metadata like time and category before passing them to the LLM. For instance, if financial reports spanning multiple years are retrieved, filtering based on the desired year will refine the information to meet specific requirements. This method proves effective in situations with extensive data and detailed metadata, such as content retrieval in library collections. + +![](../../../assets/advanced_rag/metadata_filtering.png) + +## Generator Enhancement + +Let’s explore more RAG optimizing techniques by improving the generator within a RAG system. + +### Compressing the LLM prompt + +The noise information within retrieved document chunks can significantly impact the accuracy of RAG's final answer. The limited prompt window in LLMs also presents a hurdle for more accurate answers. To address this challenge, we can compress irrelevant details, emphasize key paragraphs, and reduce the overall context length of retrieved document chunks. + +![](../../../assets/advanced_rag/compress_prompt.png) + +This approach is similar to the earlier discussed hybrid retrieval and reranking method, wherein a reranker is utilized to sift out irrelevant document chunks. + +### Adjusting the chunk order in the prompt + +In the paper "[Lost in the middle](https://arxiv.org/abs/2307.03172)," researchers observed that LLMs often overlook information in the middle of given documents during the reasoning process. Instead, they tend to rely more on the information presented at the beginning and end of the documents. + +Based on this observation, we can adjust the order of retrieved chunks to improve the answer quality: when retrieving multiple knowledge chunks, chunks with relatively low confidence are placed in the middle, and chunks with relatively high confidence are positioned at both ends. + +![](../../../assets/advanced_rag/adjust_order.png) + +## RAG Pipeline Enhancement + +We can also improve the performance of your RAG applications by enhancing the whole RAG pipeline. + +### Self-reflection + +This approach incorporates the concept of self-reflection within AI agents. Then, how does this technique work? + +Some initially retrieved Top-K document chunks are ambiguous and may not answer the user question directly. In such cases, we can conduct a second round of reflection to verify whether these chunks can genuinely address the query. + +We can conduct the reflection using efficient reflection methods such as Natural Language Inference(NLI) models or additional tools like internet searches for verification. + +![](../../../assets/advanced_rag/self_reflection.png) + +This concept of self-reflection has been explored in several papers or projects, including [Self-RAG](https://arxiv.org/pdf/2310.11511.pdf), [Corrective RAG](https://arxiv.org/pdf/2401.15884.pdf), [LangGraph](https://github.com/langchain-ai/langgraph/blob/main/examples/reflexion/reflexion.ipynb), etc. + +### Query Routing with an Agent + +Sometimes, we don’t have to use a RAG system to answer simple questions as it might result in more misunderstanding and inference from misleading information. In such cases, we can use an agent as a router at the querying stage. This agent assesses whether the query needs to go through the RAG pipeline. If it does, the subsequent RAG pipeline is initiated; otherwise, the LLM directly addresses the query. + +![](../../../assets/advanced_rag/query_routing.png) +![](../../../assets/advanced_rag/query_routing_with_sub_query.png) + +The agent could take various forms, including an LLM, a small classification model, or even a set of rules. + +By routing queries based on user intent, you can redirect a portion of the queries, leading to a significant boost in response time and a noticeable reduction in unnecessary noise. + +We can extend the query routing technique to other processes within the RAG system, such as determining when to utilize tools like web searches, conducting sub-queries, or searching for images. This approach ensures that each step in the RAG system is optimized based on the specific requirements of the query, leading to more efficient and accurate information retrieval. + +## Summary + +While a vanilla RAG pipeline may appear simple, achieving optimal business performance often requires more sophisticated optimization techniques. + +This article summarizes various popular approaches to enhancing the performance of your RAG applications. We also provided clear illustrations to help you quickly understand these concepts and techniques and expedite their implementation and optimization. + +You can get the simple implementations of the major approaches listed in this article at this [GitHub link](https://github.com/milvus-io/bootcamp/tree/master/bootcamp/RAG/advanced_rag). diff --git a/v2.4.x/site/en/tutorials/tutorials-overview.md b/v2.4.x/site/en/tutorials/tutorials-overview.md index 520162020..4cd4828c0 100644 --- a/v2.4.x/site/en/tutorials/tutorials-overview.md +++ b/v2.4.x/site/en/tutorials/tutorials-overview.md @@ -11,10 +11,19 @@ This page provides a list of tutorials for you to interact with Milvus. | Tutorial | Use Case | Related Milvus Features | | -------- | -------- | --------- | | [Build RAG with Milvus](build-rag-with-milvus.md) | RAG | vector search | -| [Multimodal RAG with Milvus](multimodal_rag_with_milvus.md) | RAG | vector search, dynamic field | -| [Image Search with Milvus](image_similarity_search.md) | Semantic Search | vector search, dynamic field | +| [Advanced RAG](how_to_enhance_your_rag.md) | RAG | vector search | +| [Full Text Search with Milvus](full_text_search_with_milvus.md) | Quickstart | Full-Text Search | | [Hybrid Search with Milvus](hybrid_search_with_milvus.md) | Hybrid Search | hybrid search, multi vector, dense embedding, sparse embedding | +| [Image Search with Milvus](image_similarity_search.md) | Semantic Search | vector search, dynamic field | +| [Multimodal RAG with Milvus](multimodal_rag_with_milvus.md) | RAG | vector search, dynamic field | | [Multimodal Search using Multi Vectors](multimodal_rag_with_milvus.md) | Semantic Search | multi vector, hybrid search | +| [Graph RAG with Milvus](graph_rag_with_milvus.md) | RAG | graph search | +| [Contextual Retrieval with Milvus](contextual_retrieval_with_milvus.md) | Quickstart | vector search | +| [HDBSCAN Clustering with Milvus](hdbscan_clustering_with_milvus.md) | Quickstart | vector search | +| [Use ColPali for Multi-Modal Retrieval with Milvus](use_ColPali_with_milvus.md) | Quickstart | vector search | +| [Vector Visualization](vector_visualization.md) | Quickstart | vector search | +| [Movie Recommendation with Milvus](movie_recommendation_with_milvus.md) | Recommendation System | vector search | +| [Funnel Search with Matryoshka Embeddings](funnel_search_with_matryoshka.md) | Quickstart | vector search | | [Question Answering System](question_answering_system.md) | Question Answering | vector search | | [Recommender System](recommendation_system.md) | Recommendation System | vector search | | [Video Similarity Search](video_similarity_search.md) | Semantic Search | vector search | @@ -23,11 +32,4 @@ This page provides a list of tutorials for you to interact with Milvus. | [Text Search Engine](text_search_engine.md) | Semantic Search | vector search | | [Search Image by Text](text_image_search.md) | Semantic Search | vector search | | [Image Deduplication](image_deduplication_system.md) | Deduplication | vector search | -| [Graph RAG with Milvus](graph_rag_with_milvus.md) | RAG | graph search | -| [Contextual Retrieval with Milvus](contextual_retrieval_with_milvus.md) | Quickstart | vector search | -| [HDBSCAN Clustering with Milvus](hdbscan_clustering_with_milvus.md) | Quickstart | vector search | -| [Use ColPali for Multi-Modal Retrieval with Milvus](use_ColPali_with_milvus.md) | Quickstart | vector search | -| [Vector Visualization](vector_visualization.md) | Quickstart | vector search | -| [Movie Recommendation with Milvus](movie_recommendation_with_milvus.md) | Recommendation System | vector search | -| [Funnel Search with Matryoshka Embeddings](funnel_search_with_matryoshka.md) | Quickstart | vector search | diff --git a/version.json b/version.json index afcd7732d..8085d97f0 100644 --- a/version.json +++ b/version.json @@ -1,4 +1,4 @@ { - "version": "v2.5.x", + "version": "v2.4.x", "released": "yes" }