diff --git a/README.md b/README.md index 7ba82c31..e0444a37 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,6 @@ index.search( This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info. -⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901). ## 💡 Learn more diff --git a/meilisearch/config.py b/meilisearch/config.py index 0c862391..09cc5fad 100644 --- a/meilisearch/config.py +++ b/meilisearch/config.py @@ -38,6 +38,7 @@ class Paths: separator_tokens = "separator-tokens" non_separator_tokens = "non-separator-tokens" swap = "swap-indexes" + embedders = "embedders" def __init__( self, diff --git a/meilisearch/index.py b/meilisearch/index.py index 9446c8c6..ddb68ab4 100644 --- a/meilisearch/index.py +++ b/meilisearch/index.py @@ -10,7 +10,7 @@ from meilisearch.config import Config from meilisearch.errors import version_error_hint_message from meilisearch.models.document import Document, DocumentsResults -from meilisearch.models.index import Faceting, IndexStats, Pagination, TypoTolerance +from meilisearch.models.index import Embedders, Faceting, IndexStats, Pagination, TypoTolerance from meilisearch.models.task import Task, TaskInfo, TaskResults from meilisearch.task import TaskHandler @@ -1757,6 +1757,71 @@ def reset_non_separator_tokens(self) -> TaskInfo: return TaskInfo(**task) + # EMBEDDERS SUB-ROUTES + + def get_embedders(self) -> Embedders | None: + """Get embedders of the index. + + Returns + ------- + settings: + The embedders settings of the index. + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + response = self.http.get(self.__settings_url_for(self.config.paths.embedders)) + + if not response: + return None + + return Embedders(embedders=response) + + def update_embedders(self, body: Union[Mapping[str, Any], None]) -> TaskInfo: + """Update embedders of the index. + + Parameters + ---------- + body: dict + Dictionary containing the embedders. + + Returns + ------- + task_info: + TaskInfo instance containing information about a task to track the progress of an asynchronous process. + https://www.meilisearch.com/docs/reference/api/tasks#get-one-task + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body) + + return TaskInfo(**task) + + def reset_embedders(self) -> TaskInfo: + """Reset embedders of the index to default values. + + Returns + ------- + task_info: + TaskInfo instance containing information about a task to track the progress of an asynchronous process. + https://www.meilisearch.com/docs/reference/api/tasks#get-one-task + + Raises + ------ + MeilisearchApiError + An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors + """ + task = self.http.delete( + self.__settings_url_for(self.config.paths.embedders), + ) + + return TaskInfo(**task) + @staticmethod def _batch( documents: Sequence[Mapping[str, Any]], batch_size: int diff --git a/meilisearch/models/index.py b/meilisearch/models/index.py index d9b97c3d..535d8e23 100644 --- a/meilisearch/models/index.py +++ b/meilisearch/models/index.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional, Union from camel_converter import to_snake from camel_converter.pydantic_base import CamelBase @@ -46,3 +46,26 @@ class TypoTolerance(CamelBase): disable_on_attributes: Optional[List[str]] = None disable_on_words: Optional[List[str]] = None min_word_size_for_typos: Optional[MinWordSizeForTypos] = None + + +class OpenAiEmbedder(CamelBase): + source: str = "openAi" + model: Optional[str] = None # Defaults to text-embedding-ada-002 + api_key: Optional[str] = None # Can be provided through a CLI option or environment variable + document_template: Optional[str] = None + + +class HuggingFaceEmbedder(CamelBase): + source: str = "huggingFace" + model: Optional[str] = None # Defaults to BAAI/bge-base-en-v1.5 + revision: Optional[str] = None + document_template: Optional[str] = None + + +class UserProvidedEmbedder(CamelBase): + source: str = "userProvided" + dimensions: int + + +class Embedders(CamelBase): + embedders: Dict[str, Union[OpenAiEmbedder, HuggingFaceEmbedder, UserProvidedEmbedder]] diff --git a/tests/conftest.py b/tests/conftest.py index cd6be2cb..043bca1f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -129,12 +129,21 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies): @fixture(scope="function") def index_with_documents_and_vectors(empty_index, small_movies): - small_movies[0]["_vectors"] = [0.1, 0.2] + small_movies[0]["_vectors"] = {"default": [0.1, 0.2]} def index_maker(index_uid=common.INDEX_UID, documents=small_movies): index = empty_index(index_uid) - task = index.add_documents(documents) - index.wait_for_task(task.task_uid) + settings_update_task = index.update_embedders( + { + "default": { + "source": "userProvided", + "dimensions": 2, + } + } + ) + index.wait_for_task(settings_update_task.task_uid) + document_addition_task = index.add_documents(documents) + index.wait_for_task(document_addition_task.task_uid) return index return index_maker @@ -216,3 +225,13 @@ def enable_vector_search(): json={"vectorStore": False}, timeout=10, ) + + +@fixture +def new_embedders(): + return { + "default": { + "source": "userProvided", + "dimensions": 1, + } + } diff --git a/tests/index/test_index_search_meilisearch.py b/tests/index/test_index_search_meilisearch.py index ad4443e4..e5b045cd 100644 --- a/tests/index/test_index_search_meilisearch.py +++ b/tests/index/test_index_search_meilisearch.py @@ -459,13 +459,9 @@ def test_attributes_to_search_on_search_no_match(index_with_documents): assert response["hits"] == [] -@pytest.mark.xfail( - strict=True, reason="https://github.com/meilisearch/meilisearch-python/issues/901" -) @pytest.mark.usefixtures("enable_vector_search") def test_vector_search(index_with_documents_and_vectors): response = index_with_documents_and_vectors().search( - "How to Train Your Dragon", opt_params={"vector": [0.1, 0.2]} + "", opt_params={"vector": [0.1, 0.2], "hybrid": {"semanticRatio": 1.0}} ) - assert response["hits"][0]["id"] == "287947" assert response["vector"] == [0.1, 0.2] diff --git a/tests/settings/test_settings_embedders.py b/tests/settings/test_settings_embedders.py new file mode 100644 index 00000000..4d27ffb1 --- /dev/null +++ b/tests/settings/test_settings_embedders.py @@ -0,0 +1,44 @@ +import pytest + +from meilisearch.models.index import Embedders + + +@pytest.mark.usefixtures("enable_vector_search") +def test_get_default_embedders(empty_index): + """Tests getting default embedders.""" + response = empty_index().get_embedders() + + assert response is None + + +@pytest.mark.usefixtures("enable_vector_search") +def test_update_embedders_with_user_provided_source(new_embedders, empty_index): + """Tests updating embedders.""" + index = empty_index() + response_update = index.update_embedders(new_embedders) + update = index.wait_for_task(response_update.task_uid) + response_get = index.get_embedders() + assert update.status == "succeeded" + assert response_get == Embedders(embedders=new_embedders) + + +@pytest.mark.usefixtures("enable_vector_search") +def test_reset_embedders(new_embedders, empty_index): + """Tests resetting the typo_tolerance setting to its default value.""" + index = empty_index() + + # Update the settings + response_update = index.update_embedders(new_embedders) + update1 = index.wait_for_task(response_update.task_uid) + # Get the setting after update + response_get = index.get_embedders() + # Reset the setting + response_reset = index.reset_embedders() + update2 = index.wait_for_task(response_reset.task_uid) + # Get the setting after reset + response_last = index.get_embedders() + + assert update1.status == "succeeded" + assert response_get == Embedders(embedders=new_embedders) + assert update2.status == "succeeded" + assert response_last is None