Skip to content

Commit

Permalink
Merge #924
Browse files Browse the repository at this point in the history
924: Implement vector search experimental feature v2 (v1.6) r=curquiza a=CaroFG

# Pull Request

## Related issue
Fixes #901 

## What does this PR do?
- Creates embedders classes
- Adds embedders to paths
 - Introduces new routes:
   - Create a new method to get the settings by calling GET /indexes/:index_uid/settings/embedders
   - Create a new method to update the settings by calling PATCH /indexes/:index_uid/settings/embedders
   - Create a new method to reset the settings by calling DELETE /indexes/:index_uid/settings/embedders
  - Adds embedders settings tests
  - Updates vector search tests
 

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: CaroFG <carolina.ferreira131@gmail.com>
Co-authored-by: CaroFG <48251481+CaroFG@users.noreply.github.com>
  • Loading branch information
3 people authored Feb 8, 2024
2 parents 1e9a3a8 + cfb57f1 commit bf0aea5
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 11 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ index.search(

This package guarantees compatibility with [version v1.x of Meilisearch](https://github.com/meilisearch/meilisearch/releases/latest), but some features may not be present. Please check the [issues](https://github.com/meilisearch/meilisearch-python/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22+label%3Aenhancement) for more info.

⚠️ This package is not compatible with the [`vectoreStore` experimental feature](https://www.meilisearch.com/docs/learn/experimental/vector_search) of Meilisearch v1.6.0 and later. More information on this [issue](https://github.com/meilisearch/meilisearch-python/issues/901).

## 💡 Learn more

Expand Down
1 change: 1 addition & 0 deletions meilisearch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Paths:
separator_tokens = "separator-tokens"
non_separator_tokens = "non-separator-tokens"
swap = "swap-indexes"
embedders = "embedders"

def __init__(
self,
Expand Down
67 changes: 66 additions & 1 deletion meilisearch/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from meilisearch.config import Config
from meilisearch.errors import version_error_hint_message
from meilisearch.models.document import Document, DocumentsResults
from meilisearch.models.index import Faceting, IndexStats, Pagination, TypoTolerance
from meilisearch.models.index import Embedders, Faceting, IndexStats, Pagination, TypoTolerance
from meilisearch.models.task import Task, TaskInfo, TaskResults
from meilisearch.task import TaskHandler

Expand Down Expand Up @@ -1757,6 +1757,71 @@ def reset_non_separator_tokens(self) -> TaskInfo:

return TaskInfo(**task)

# EMBEDDERS SUB-ROUTES

def get_embedders(self) -> Embedders | None:
"""Get embedders of the index.
Returns
-------
settings:
The embedders settings of the index.
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
response = self.http.get(self.__settings_url_for(self.config.paths.embedders))

if not response:
return None

return Embedders(embedders=response)

def update_embedders(self, body: Union[Mapping[str, Any], None]) -> TaskInfo:
"""Update embedders of the index.
Parameters
----------
body: dict
Dictionary containing the embedders.
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)

return TaskInfo(**task)

def reset_embedders(self) -> TaskInfo:
"""Reset embedders of the index to default values.
Returns
-------
task_info:
TaskInfo instance containing information about a task to track the progress of an asynchronous process.
https://www.meilisearch.com/docs/reference/api/tasks#get-one-task
Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
task = self.http.delete(
self.__settings_url_for(self.config.paths.embedders),
)

return TaskInfo(**task)

@staticmethod
def _batch(
documents: Sequence[Mapping[str, Any]], batch_size: int
Expand Down
25 changes: 24 additions & 1 deletion meilisearch/models/index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, Dict, Iterator, List, Optional
from typing import Any, Dict, Iterator, List, Optional, Union

from camel_converter import to_snake
from camel_converter.pydantic_base import CamelBase
Expand Down Expand Up @@ -46,3 +46,26 @@ class TypoTolerance(CamelBase):
disable_on_attributes: Optional[List[str]] = None
disable_on_words: Optional[List[str]] = None
min_word_size_for_typos: Optional[MinWordSizeForTypos] = None


class OpenAiEmbedder(CamelBase):
source: str = "openAi"
model: Optional[str] = None # Defaults to text-embedding-ada-002
api_key: Optional[str] = None # Can be provided through a CLI option or environment variable
document_template: Optional[str] = None


class HuggingFaceEmbedder(CamelBase):
source: str = "huggingFace"
model: Optional[str] = None # Defaults to BAAI/bge-base-en-v1.5
revision: Optional[str] = None
document_template: Optional[str] = None


class UserProvidedEmbedder(CamelBase):
source: str = "userProvided"
dimensions: int


class Embedders(CamelBase):
embedders: Dict[str, Union[OpenAiEmbedder, HuggingFaceEmbedder, UserProvidedEmbedder]]
25 changes: 22 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,21 @@ def index_maker(index_uid=common.INDEX_UID, documents=small_movies):

@fixture(scope="function")
def index_with_documents_and_vectors(empty_index, small_movies):
small_movies[0]["_vectors"] = [0.1, 0.2]
small_movies[0]["_vectors"] = {"default": [0.1, 0.2]}

def index_maker(index_uid=common.INDEX_UID, documents=small_movies):
index = empty_index(index_uid)
task = index.add_documents(documents)
index.wait_for_task(task.task_uid)
settings_update_task = index.update_embedders(
{
"default": {
"source": "userProvided",
"dimensions": 2,
}
}
)
index.wait_for_task(settings_update_task.task_uid)
document_addition_task = index.add_documents(documents)
index.wait_for_task(document_addition_task.task_uid)
return index

return index_maker
Expand Down Expand Up @@ -216,3 +225,13 @@ def enable_vector_search():
json={"vectorStore": False},
timeout=10,
)


@fixture
def new_embedders():
return {
"default": {
"source": "userProvided",
"dimensions": 1,
}
}
6 changes: 1 addition & 5 deletions tests/index/test_index_search_meilisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,9 @@ def test_attributes_to_search_on_search_no_match(index_with_documents):
assert response["hits"] == []


@pytest.mark.xfail(
strict=True, reason="https://github.com/meilisearch/meilisearch-python/issues/901"
)
@pytest.mark.usefixtures("enable_vector_search")
def test_vector_search(index_with_documents_and_vectors):
response = index_with_documents_and_vectors().search(
"How to Train Your Dragon", opt_params={"vector": [0.1, 0.2]}
"", opt_params={"vector": [0.1, 0.2], "hybrid": {"semanticRatio": 1.0}}
)
assert response["hits"][0]["id"] == "287947"
assert response["vector"] == [0.1, 0.2]
44 changes: 44 additions & 0 deletions tests/settings/test_settings_embedders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest

from meilisearch.models.index import Embedders


@pytest.mark.usefixtures("enable_vector_search")
def test_get_default_embedders(empty_index):
"""Tests getting default embedders."""
response = empty_index().get_embedders()

assert response is None


@pytest.mark.usefixtures("enable_vector_search")
def test_update_embedders_with_user_provided_source(new_embedders, empty_index):
"""Tests updating embedders."""
index = empty_index()
response_update = index.update_embedders(new_embedders)
update = index.wait_for_task(response_update.task_uid)
response_get = index.get_embedders()
assert update.status == "succeeded"
assert response_get == Embedders(embedders=new_embedders)


@pytest.mark.usefixtures("enable_vector_search")
def test_reset_embedders(new_embedders, empty_index):
"""Tests resetting the typo_tolerance setting to its default value."""
index = empty_index()

# Update the settings
response_update = index.update_embedders(new_embedders)
update1 = index.wait_for_task(response_update.task_uid)
# Get the setting after update
response_get = index.get_embedders()
# Reset the setting
response_reset = index.reset_embedders()
update2 = index.wait_for_task(response_reset.task_uid)
# Get the setting after reset
response_last = index.get_embedders()

assert update1.status == "succeeded"
assert response_get == Embedders(embedders=new_embedders)
assert update2.status == "succeeded"
assert response_last is None

0 comments on commit bf0aea5

Please sign in to comment.