diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml index 55887c290f..b769252279 100644 --- a/.github/workflows/test-check.yaml +++ b/.github/workflows/test-check.yaml @@ -97,6 +97,6 @@ jobs: - name: "Clean sparsezoo directory" run: rm -r sparsezoo/ - name: ⚙️ Install dependencies - run: pip install .[dev,haystack] + run: pip install .[dev] - name: Run integrations tests run: make test_integrations diff --git a/MANIFEST.in b/MANIFEST.in index d30f250972..f4d58c8742 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,5 @@ include LICENSE include utils/artifacts.py -include src/deepsparse/transformers/haystack/haystack_reqs.txt recursive-include src/deepsparse/avx2 * recursive-include src/deepsparse/avx512 * recursive-include src/deepsparse/neon * diff --git a/examples/vit_pose/schemas.py b/examples/vit_pose/schemas.py index 9c3e9ac63c..ccf9f9298a 100644 --- a/examples/vit_pose/schemas.py +++ b/examples/vit_pose/schemas.py @@ -15,7 +15,7 @@ from typing import List import numpy -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from deepsparse.pipelines.computer_vision import ComputerVisionSchema @@ -32,6 +32,4 @@ class VitPoseInput(ComputerVisionSchema): class VitPoseOutput(BaseModel): out: List[numpy.ndarray] - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/integrations/haystack/README.md b/integrations/haystack/README.md deleted file mode 100644 index be9060b978..0000000000 --- a/integrations/haystack/README.md +++ /dev/null @@ -1,307 +0,0 @@ -# Haystack: Information Retrieval # -The relevant features added as a part of the Haystack information retrieval integration are a [Haystack pipeline](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/transformers/haystack/pipeline.py), an [embedding extraction pipeline](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/transformers/pipelines/embedding_extraction.py), and two classes, [DeepSparseEmbeddingRetriever](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/transformers/haystack/nodes.py) and [DeepSparseDensePassageRetriever](https://github.com/neuralmagic/deepsparse/tree/main/src/deepsparse/transformers/haystack/nodes.py). - -These features allow a user to perform information retrieval tasks using the Haystack library as well as substitute in sparse retrieval nodes into their existing Haystack systems. - -## Installation and Setup ## -In order to ensure the proper installation, please install with python version 3.8. - -Install `farm-haystack`'s dependencies via deepsparse extras -```bash -pip install deepsparse[haystack] -``` - -After this is done, importing assets from `deepsparse.transformers.haystack` will trigger an auto-installation of Neural Magic's fork of `transformers` as well as `farm-haystack[all]==1.4.0`. These auto-installations can be controlled by setting the environment variables `NM_NO_AUTOINSTALL_TRANSFORMERS` and `NM_NO_AUTOINSTALL_HAYSTACK` respectively. - -## Haystack ## -[Haystack](https://haystack.deepset.ai/overview/intro) is an open source framework developed by Deepset for building document search systems. The library implements classes that handle operations such as document storage, index search, embedding generation, and information retrieval. - -### Document Retrieval with Haystack ### -A typical document retrieval script in Haystack might look something like this: - -First initialize a document store. The document store is responsible for handling the storage of document texts, their embeddings, as well as indexing those embeddings. The simplest document store provided by Haystack is the `InMemoryDocumentStore`, but more complex document stores such as `ElasticDocumentStore`, `FAISSDocumentStore`, or `WeaviateDocumentStore` may require more set up but provide more robust indexing capabilities. -``` python3 -from haystack.document_stores import InMemoryDocumentStore - -document_store = InMemoryDocumentStore( - similarity="cosine", - embedding_dim=768, - use_gpu=False -) -``` - -Next, create a retriever. The retriever houses the embedding model and is responsible for, given a document or query, generating an embedding such that query embeddings have a high similarity to their relevant document embeddings. -``` python3 -from haystack.nodes import EmbeddingRetriever - -retriever = EmbeddingRetriever( - document_store, # pass in document store - embedding_model="deepset/sentence_bert", - use_gpu=False, -) -``` -``` python3 ->>> retriever.embed_queries(["How many protons in a hydrogen atom"])[0][:10] -array([-0.00331814, -0.16311326, -0.64788855, -0.35724441, -0.26155273, - -0.76656055, 0.35976224, -0.6578757 , -0.15693564, -0.1927543 ]) -``` - -Next, write some files to your document store. These documents can be instances of Haystack's `Document` class or dictionaries containing `content`. Remember to update the documents' embeddings with `document_store.update_embeddings(retriever)`. -``` python3 -document_store.write_documents([ - { - "title" : "Looking Glass", - "content": "He came on a summer's day " - "Bringin' gifts from far away." - "But he made it clear he couldn't stay." - "No harbor was his home." - }, - { - "title" : "Bobby Darin", - "content": "Somewhere beyond the sea." - "Somewhere waiting for me." - "My lover stands on golden sands " - "And watches the ships that go sailin'" - } -]) -document_store.update_embeddings(retriever) -``` - -Finally, create a pipeline and run a query using Haystack's `DocumentSearchPipeline`. -``` python3 -from haystack.pipelines import DocumentSearchPipeline - -pipeline = DocumentSearchPipeline(retriever) -results = pipeline.run(query="Where does my lover stand?", params={"Retriever": {"top_k": 1}}) -print(results) -``` -``` -{'documents': [], 'root_node': 'Query', 'params': {'Retriever': {'top_k': 1}}, 'query': 'Where does my lover stand?', 'node_id': 'Retriever'} -``` - -### Document Retrieval with DeepSparse ### -To integrate with the DeepSparse Engine, simply replace your Haystack retriever node with an instance of a DeepSparse node. -``` python3 -from deepsparse.transformers.haystack import DeepSparseEmbeddingRetriever - -retriever = DeepSparseEmbeddingRetriever( - document_store, - model_path="zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface/wikipedia_bookcorpus/pruned80_quant-none-vnni", -) -``` - -## DeepSparse Nodes ## -DeepSparse Nodes are a set of classes that leverage the embedding extraction pipeline to generate document embeddings using the DeepSparse engine. These embeddings can then be used for information retrieval and other haystack tasks. - -### DeepSparseEmbeddingRetriever ### -This class implements Haystack's `EmbeddingRetriever` class with DeepSparse inference using the `TransformersEmbeddingExtractionPipeline`. The embedding extraction pipeline takes the passed model path, truncates the ONNX to a transformer layer, then uses those model outputs as embeddings. The embedded representation of the document can then be compared to the embedded representation of the query. Query embeddings and document embeddings that have a high dot product/cosine similiarity are deemed to be relevant by the `DocumentSearchPipeline`. -``` python3 -from haystack.document_stores import InMemoryDocumentStore -from haystack.pipelines import DocumentSearchPipeline - -from deepsparse.transformers.haystack import DeepSparseEmbeddingRetriever - -document_store = InMemoryDocumentStore(similarity="cosine", embedding_dim=768, use_gpu=False) -document_store.write_documents([ - { - "content": "He came on a summer's day " - "Bringin' gifts from far away." - "But he made it clear he couldn't stay." - "No harbor was his home." - }, - { - "content": "Somewhere beyond the sea." - "Somewhere waiting for me." - "My lover stands on golden sands " - "And watches the ships that go sailin'" - } -]) - -retriever = DeepSparseEmbeddingRetriever( - document_store, - "zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface/wikipedia_bookcorpus/pruned80_quant-none-vnni", - pooling_strategy="reduce_mean", -) -document_store.update_embeddings(retriever) - -pipeline = DocumentSearchPipeline(retriever) -results = pipeline.run(query="Where does my lover stand?", params={"Retriever": {"top_k": 1}}) -``` - -### DeepSparseDensePassageRetriever ### -This class implements Haystack's `DensePassageRetriever` class with DeepSparse inference using two instances of the `TransformersEmbeddingExtractionPipeline` with shared context. This node takes `query_model_path` and `passage_model_path` as arguments and produces document and query embeddings using their respective models. - -Dense passage retrieval requires biencoder models to use. For more information, contact support@neuralmagic.com. - -``` python3 -from haystack.document_stores import InMemoryDocumentStore -from haystack.pipelines import DocumentSearchPipeline - -from deepsparse.transformers.haystack import DeepSparseDensePassageRetriever - -document_store = InMemoryDocumentStore(similarity="cosine", embedding_dim=768, use_gpu=False) -document_store.write_documents([ - { - "content": "High and dry, out of the rain." - "It's so easy to hurt others when you can't feel pain. " - "And don't you know that a love can't grow " - "'Cause there's too much to give, 'cause you'd rather live " - "For the thrill of it all." - }, - { - "content": "Everybody here is out of sight. " - "They don't bark and they don't bite. " - "They keep things loose, they keep things light. " - "Everybody was dancing in the moonlight. " - } -]) -retriever = DeepSparseDensePassageRetriever( - document_store, - query_model_path="./query_model", - passage_model_path="./passage_model", - pooling_strategy="cls_token", -) -document_store.update_embeddings(retriever) -pipeline = DocumentSearchPipeline(retriever) - -results = pipeline.run(query="How is everybody feeling?", params={"Retriever": {"top_k": 1}}) -``` - -## Haystack Pipeline ## -The haystack pipeline is a non traditional pipeline which constructs Haystack nodes used for document retrieval or any other Haystack task. Said another way, this pipeline provides an API for constructing a document_store, retriever, and pipeline like the workflow described in [Document Retrieval with Haystack](#Document-Retrieval-with-Haystack) - -This pipeline supports all Haystack document stores, nodes, and pipelines as well as the DeepSparse integrated nodes `DeepSparseEmbeddingRetriever` and `DeepSparseDensePassageRetriever`. Users can control which nodes are included via the `config` argument. -``` python3 -from deepsparse import Pipeline -from deepsparse.transformers.haystack import print_pipeline_documents -from deepsparse.transformers.haystack import HaystackPipeline - -from haystack.utils import print_documents, fetch_archive_from_http, convert_files_to_docs, clean_wiki_text - -documents = [ - {"title": "Rick Astley", - "content": "Richard Paul Astley (born 6 February 1966) is an English singer, songwriter and " - "famous musical artist, who has been active in music for several decades. He gained " - "worldwide fame in the 1980s, having multiple hits including his signature song " - "Never Gonna Give You Up, Together Forever and Whenever You Need Somebody, and " - "returned to music full-time in the 2000s after a 6-year hiatus. Outside his " - "music career, Astley has occasionally worked as a radio DJ and a podcaster."}, - - {"title": "Chinese (Language)", - "content": "Chinese is a group of languages that form the Sinitic branch of the Sino-Tibetan " - "languages family, spoken by the ethnic Han Chinese majority and many minority " - "ethnic groups in Greater China. About 1.3 billion people (or approximately 16% " - "of the world's population) speak a variety of Chinese as their first language."}, - - {"title": "Artificial Neural Network", - "content": "An ANN is based on a collection of connected units or nodes called artificial " - "neurons, which loosely model the neurons in a biological brain. Each connection, " - "like the synapses in a biological brain, can transmit a signal to other neurons. " - "An artificial neuron receives signals then processes them and can signal neurons " - "connected to it. The signal at a connection is a real number, and the output of " - "each neuron is computed by some non-linear function of the sum of its inputs."}, - - {"title": "Picasso", - "content": "Pablo Ruiz Picasso (25 October 1881 – 8 April 1973) was a Spanish painter, " - "sculptor, printmaker, ceramicist and theatre designer who spent most of his adult " - "life in France. Regarded as one of the most influential painters of the 20th " - "century, he is known for co-founding the Cubist movement, the invention of " - "constructed sculpture, the co-invention of collage, and for the wide " - "variety of styles that he helped develop and explore"}, -] - -pipeline = HaystackPipeline( - model_path="zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface/wikipedia_bookcorpus/pruned80_quant-none-vnni", - docs=documents, - config={ - "document_store": "InMemoryDocumentStore", - "document_store_args": { - "embedding_dim": 768, - "similarity": "cosine", - "use_gpu": False - }, - "retriever": "DeepSparseEmbeddingRetriever", - "retriever_args": { - "pooling_strategy": "reduce_mean" - }, - "haystack_pipeline": "DocumentSearchPipeline", - } -) - -results = pipeline(queries="Famous artists", params={"Retriever": {"top_k": 1}}) -print_pipeline_documents(results) -``` -``` -Query: Famous artists - -{ 'content': 'Pablo Ruiz Picasso (25 October 1881 – 8 April 1973) was a ' - 'Spanish painter, sculptor, printmaker, ceramicist and theatre ' - 'designer who spent most of his adult life in France. Regarded ' - 'as one of the most influential painters of the 20th century, ' - 'he is known for co-founding the Cubist movement, the invention ' - 'of constructed sculpture, the co-invention of collage, and for ' - 'the wide variety of styles that he helped develop and explore', - 'name': None} -``` - -## Embedding Extraction Pipeline -The embedding extraction pipeline is a transformers pipeline that supports the implementation of [DeepSparse Nodes](#DeepSparse-Nodes) as well as the [Haystack Pipeline](#Haystack-Pipeline). It can also be instantiated directly to grab embeddings from any onnx model. - -``` python3 -from deepsparse import Pipeline - -pipeline = Pipeline.create( - "embedding_extraction", - model_path="zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface/wikipedia_bookcorpus/pruned80_quant-none-vnni", - emb_extraction_layer=-1, - return_numpy=True, -) - -text = "sally sold sea shells by the seashore" - -embedding = pipeline(text).embeddings[0] -print(embedding) -``` - -This pipeline works by grabbing embeddings from an intermediate layer of a passed transformer architecture. This is done with the help of `truncate_transformer_onnx_model`, a function that finds the nodes within the onnx graph that mark the last operation performed by a transformer model layer. The onnx model graph is then truncated that node. The embedding extractor pipeline also implements [pooling methods](https://arxiv.org/abs/1806.09828) which help to reduce the dimensionality of embeddings such as `cls_token`, `reduce_mean` `reduce_max`, and `per_token` (None). - -## Accuracy Evaluation ## -The DeepSparse nodes were evaluated using evaluation scripts provided by Tevatron. These results are consistent with those documented in [Dense Passage Retrieval for Open-Domain Question Answering](https://arxiv.org/abs/2004.04906). - -|Model (WikipediaNQ)|Accuracy @20|Recall|Accuracy @100|Recall|Accuracy @ 200|Recall| -|-|-|-|-|-|-|-| -|base-none|0.7983379501|100.00%|0.863434903|100.00%|0.8842105263|100.00%| -|base-none-untied|0.7988919668|100.07%|0.8581717452|99.39%|0.8842105263|100.00%| -|pruned90-none|0.7878116343|98.68%|0.8584487535|99.42%|0.8770083102|99.19%| -|pruned90-none-untied|0.7828254848|98.06%|0.8570637119|99.26%|0.8747922438|98.93%| -pruned80-vnni|0.7847645429|98.30%|0.856232687|99.17%|0.8717451524|98.59%| -pruned80-vnni-untied|0.7817174515|97.92%|0.8509695291|98.56%|0.8717|98.59%| - -|Model (TriviaNQ) |Accuracy @20|Recall|Accuracy @100|Recall|Accuracy @ 200|Recall| -|-|-|-|-|-|-|-| -|base-none|0.7961637055|100.00%|0.853266154|100.00%|0.8672323875|100.00%| -|base-none-untied|0.7943074339|99.77%|0.8503491558|99.66%|0.8661716609|99.88%| -|pruned90-none|0.7839653496|98.47%|0.8440731901|98.92%|0.8609564218|99.28%| -|pruned90-none-untied|0.782904623|98.33%|0.8435428268|98.86%|0.8594537258|99.10%| -|pruned80-vnni|0.7930699196|99.61%|0.8480509149|99.39%|0.8649341466|99.73%| -|pruned80-vnni-untied|0.7867939539|98.82%|0.8460178556|99.15%|0.8629010872|99.50%| - -|MSMARCO Passage|MRR@10|Recall|Accuracy @10|Recall|Recall@20|Recall|Recall@100|Recall|Recall@200|Recall| -|-|-|-|-|-|-|-|-|-|-|-| -|base-none|0.3220429117|100.00%|0.6021489971|100.00%|0.6979942693|100.00%|0.8528653295|100.00%|0.8951289398|100.00%| -|base-none-untied|0.3209568722|99.66%|0.5984240688|99.38%|0.6892550143|98.75%|0.8484240688|99.48%|0.8928366762|99.74%| -|pruned90-none|0.3276589007|101.74%|0.6146131805|102.07%|0.7004297994|100.35%|0.8537249284|100.10%|0.8932664756|99.79%| -|pruned90-none-untied|0.3093550166|96.06%|0.588252149|97.69%|0.6793696275|97.33%|0.8368194842|98.12%|0.8812320917|98.45%| -|pruned80-vnni|0.3251235958|100.96%|0.6068767908|100.79%|0.6962750716|99.75%|0.8449856734|99.08%|0.8856733524|98.94%| -|pruned80-vnni-untied|0.3124041479|97.01%|0.5918338109|98.29%|0.6802292264|97.45%|0.8319484241|97.55%|0.8780802292|98.10%| - -## Performance Evaluation ## -Retrievers were also evaluated on their run time. This table compares the run time of generating query embeddings using `DenseEmbeddingRetriever` with Pytorch and `DeepSparseEmbeddingRetriever` with the DeepSparse Engine. Both retrievers were evaluated with the same [80% sparse quantized 3 layer BERT](https://sparsezoo.neuralmagic.com/models/nlp%2Fmasked_language_modeling%2Fbert-base%2Fpytorch%2Fhuggingface%2Fwikipedia_bookcorpus%2F3layer_pruned80_quant-none-vnni) model on the same CPU hardware. - -|Number of Queries|DenseEmbeddingRetriever (sec)|DeepSparseEmbeddingRetriever (sec)| -|-|-|-| -|1|0.0319|0.0159| -|10|0.137|0.0180| -|100|1.32|0.101| -|1,000|13.3|0.976| -|10,000|134.8|9.79| diff --git a/integrations/haystack/tests/test_smoke.py b/integrations/haystack/tests/test_smoke.py deleted file mode 100644 index 50cb5943e4..0000000000 --- a/integrations/haystack/tests/test_smoke.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -from deepsparse.transformers.haystack import ( - DeepSparseEmbeddingRetriever, - DeepSparseReader, -) - -from haystack.document_stores import InMemoryDocumentStore # isort:skip -from haystack.pipelines import ( - DocumentSearchPipeline, - ExtractiveQAPipeline, -) # isort:skip - - -@pytest.mark.smoke -def test_document_search(): - - document_store = InMemoryDocumentStore( - similarity="cosine", embedding_dim=768, use_gpu=False - ) - document_store.write_documents( - [ - { - "content": "He came on a summer's day " - "Bringin' gifts from far away." - "But he made it clear he couldn't stay." - "No harbor was his home." - }, - { - "content": "Somewhere beyond the sea." - "Somewhere waiting for me." - "My lover stands on golden sands " - "And watches the ships that go sailin'" - }, - ] - ) - - retriever = DeepSparseEmbeddingRetriever( - document_store, - ( - "zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface" - "/wikipedia_bookcorpus/pruned80_quant-none-vnni" - ), - pooling_strategy="reduce_mean", - ) - document_store.update_embeddings(retriever) - - pipeline = DocumentSearchPipeline(retriever) - results = pipeline.run( - query="Where does my lover stand?", params={"Retriever": {"top_k": 1}} - ) - print(results) - - -@pytest.mark.smoke -def test_extractive_qa(): - - document_store = InMemoryDocumentStore( - similarity="cosine", embedding_dim=768, use_gpu=False - ) - document_store.write_documents( - [ - { - "content": "He came on a summer's day " - "Bringin' gifts from far away." - "But he made it clear he couldn't stay." - "No harbor was his home." - }, - { - "content": "Somewhere beyond the sea." - "Somewhere waiting for me." - "My lover stands on golden sands " - "And watches the ships that go sailin'" - }, - ] - ) - - retriever = DeepSparseEmbeddingRetriever( - document_store, - ( - "zoo:nlp/masked_language_modeling/distilbert-none/pytorch/huggingface" - "/wikipedia_bookcorpus/pruned80_quant-none-vnni" - ), - pooling_strategy="reduce_mean", - ) - document_store.update_embeddings(retriever) - - reader = DeepSparseReader( - model_path="zoo:nlp/question_answering/distilbert-none/pytorch/huggingface/squad/pruned80_quant-none-vnni" - ) - - pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever) - - results = pipeline.run( - query="Where does my lover stand?", - params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}}, - ) - print(results) diff --git a/integrations/test_placeholder.py b/integrations/test_placeholder.py new file mode 100644 index 0000000000..69d4e87490 --- /dev/null +++ b/integrations/test_placeholder.py @@ -0,0 +1,14 @@ + +def test_placeholder(): + """ + Needed to make the test suite run and not throw + an error about no tests being found when + `make test_integrations` is used. + + The error would look like this: + make: *** [Makefile:61: test_integrations] Error 5 + + More information can be found here: + https://github.com/pytest-dev/pytest/issues/2393 + """ + pass \ No newline at end of file diff --git a/setup.py b/setup.py index a2c1204413..c58dd49bfd 100644 --- a/setup.py +++ b/setup.py @@ -77,17 +77,10 @@ ] -def _parse_requirements_file(file_path): - with open(file_path, "r") as requirements_file: - lines = requirements_file.read().splitlines() - - return [line for line in lines if len(line) > 0 and line[0] != "#"] - - _deps = [ "numpy>=1.16.3", "onnx>=1.5.0,<1.15.0", - "pydantic>=1.8.2,<2.0.0", + "pydantic>=2.0.0,<2.8.0", "requests>=2.0.0", "tqdm>=4.0.0", "protobuf>=3.12.2", @@ -122,7 +115,7 @@ def _parse_requirements_file(file_path): ] _server_deps = [ "uvicorn>=0.15.0", - "fastapi>=0.70.0,<0.87.0", + "fastapi>=0.100.0,<0.111", "requests>=2.26.0", "python-multipart>=0.0.5", "prometheus-client>=0.14.1", @@ -153,17 +146,6 @@ def _parse_requirements_file(file_path): ] _sentence_transformers_integration_deps = ["optimum-deepsparse"] + _torch_deps -# haystack dependencies are installed from a requirements file to avoid -# conflicting versions with NM's deepsparse/transformers -_haystack_requirements_file_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "src", - "deepsparse", - "transformers", - "haystack", - "haystack_reqs.txt", -) -_haystack_integration_deps = _parse_requirements_file(_haystack_requirements_file_path) _clip_deps = [ "open_clip_torch==2.20.0", "transformers<4.40", @@ -270,7 +252,6 @@ def _setup_extras() -> Dict: "image_classification": _computer_vision_deps, "yolo": _computer_vision_deps, "yolov5": _computer_vision_deps, - "haystack": _haystack_integration_deps, "openpifpaf": _openpifpaf_integration_deps, "yolov8": _yolov8_integration_deps, "transformers": _transformers_integration_deps, diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index caf4c833ea..18c98e2edb 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -17,7 +17,7 @@ import random import string from os import path -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, get_args import numpy @@ -58,15 +58,11 @@ def get_input_schema_type(pipeline: Pipeline) -> str: if SchemaType.TEXT_SEQ in input_schema_requirements: if input_schema_fields.get(SchemaType.TEXT_SEQ).alias == SchemaType.TEXT_PROMPT: return SchemaType.TEXT_PROMPT - sequence_types = [ - f.outer_type_ for f in input_schema_fields[SchemaType.TEXT_SEQ].sub_fields - ] + sequence_types = get_args(input_schema_fields[SchemaType.TEXT_SEQ].annotation) if List[str] in sequence_types: return SchemaType.TEXT_SEQ elif SchemaType.TEXT_INPUT in input_schema_requirements: - sequence_types = [ - f.outer_type_ for f in input_schema_fields[SchemaType.TEXT_INPUT].sub_fields - ] + sequence_types = get_args(input_schema_fields[SchemaType.TEXT_INPUT].annotation) if List[str] in sequence_types: return SchemaType.TEXT_INPUT elif SchemaType.QUESTION in input_schema_requirements: diff --git a/src/deepsparse/clip/decoder_pipeline.py b/src/deepsparse/clip/decoder_pipeline.py index 28388b3a74..b6b82b8566 100644 --- a/src/deepsparse/clip/decoder_pipeline.py +++ b/src/deepsparse/clip/decoder_pipeline.py @@ -30,10 +30,10 @@ class CLIPDecoderInput(BaseModel): """ text_embeddings: Any = Field( - description="np.array of text emebddings from the " "text branch" + None, description="np.array of text emebddings from the " "text branch" ) image_embeddings: Any = Field( - description="np.array of image embeddings from the " "visual branch" + None, description="np.array of image embeddings from the " "visual branch" ) diff --git a/src/deepsparse/evaluation/results.py b/src/deepsparse/evaluation/results.py index 78c4bbd501..e045098863 100644 --- a/src/deepsparse/evaluation/results.py +++ b/src/deepsparse/evaluation/results.py @@ -36,15 +36,15 @@ class Metric(BaseModel): class Dataset(BaseModel): - type: Optional[str] = Field(description="Type of dataset") + type: Optional[str] = Field(None, description="Type of dataset") name: str = Field(description="Name of the dataset") - config: Any = Field(description="Configuration for the dataset") - split: Optional[str] = Field(description="Split of the dataset") + config: Any = Field(None, description="Configuration for the dataset") + split: Optional[str] = Field(None, description="Split of the dataset") class EvalSample(BaseModel): - input: Any = Field(description="Sample input to the model") - output: Any = Field(description="Sample output from the model") + input: Any = Field(None, description="Sample input to the model") + output: Any = Field(None, description="Sample output from the model") class Evaluation(BaseModel): @@ -55,7 +55,7 @@ class Evaluation(BaseModel): dataset: Dataset = Field(description="Dataset that the evaluation was performed on") metrics: List[Metric] = Field(description="List of metrics for the evaluation") samples: Optional[List[EvalSample]] = Field( - description="List of samples for the evaluation" + None, description="List of samples for the evaluation" ) @@ -64,8 +64,9 @@ class Result(BaseModel): description="Evaluation result represented in the unified, structured format" ) raw: Any = Field( + None, description="Evaluation result represented in the raw format " - "(characteristic for the specific evaluation integration)" + "(characteristic for the specific evaluation integration)", ) @@ -97,7 +98,7 @@ def _save_to_json(result: Result, save_path: str): def _save_to_yaml(result: Result, save_path: str): - _save(yaml.dump(result.dict()), save_path, expected_ext=".yaml") + _save(yaml.dump(result.model_dump()), save_path, expected_ext=".yaml") def _save(data: str, save_path: str, expected_ext: str): diff --git a/src/deepsparse/legacy/loggers/config.py b/src/deepsparse/legacy/loggers/config.py index e878fe69db..c7132d8eab 100644 --- a/src/deepsparse/legacy/loggers/config.py +++ b/src/deepsparse/legacy/loggers/config.py @@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field, field_validator """ @@ -57,7 +57,8 @@ class MetricFunctionConfig(BaseModel): "the subset of loggers (specified here by a list of their names).", ) - @validator("frequency") + @field_validator("frequency") + @classmethod def non_zero_frequency(cls, frequency: int) -> int: if frequency <= 0: raise ValueError( diff --git a/src/deepsparse/legacy/tasks.py b/src/deepsparse/legacy/tasks.py index 6b23c7d072..676b6f527a 100644 --- a/src/deepsparse/legacy/tasks.py +++ b/src/deepsparse/legacy/tasks.py @@ -206,7 +206,10 @@ def check_register_task( elif cls.is_haystack(task): # trigger haystack pipeline as well as transformers pipelines to # register with Pipeline.register - import deepsparse.transformers.haystack # noqa: F401 + raise DeprecationWarning( + "Haystack support with deepsparse has been deprecated, " + "kindly use deepsparse-nightly==1.8.20240404 or older" + ) elif cls.is_embedding_extraction(task): # trigger embedding_extraction pipelines to register with diff --git a/src/deepsparse/loggers/config.py b/src/deepsparse/loggers/config.py index 15244ae85f..74f7b7d9db 100644 --- a/src/deepsparse/loggers/config.py +++ b/src/deepsparse/loggers/config.py @@ -15,12 +15,11 @@ from typing import Dict, List, Optional import yaml -from pydantic import BaseModel, Extra, Field, validator +from pydantic import BaseModel, ConfigDict, Field, validator class LoggerConfig(BaseModel): - class Config: - extra = Extra.allow + model_config = ConfigDict(extra="allow") name: str = Field( default="PythonLogger", diff --git a/src/deepsparse/loggers/logger_manager.py b/src/deepsparse/loggers/logger_manager.py index adfad3a39a..0b3528c9fd 100644 --- a/src/deepsparse/loggers/logger_manager.py +++ b/src/deepsparse/loggers/logger_manager.py @@ -65,7 +65,7 @@ class LoggerManager(AsyncExecutor, LoggerFactory): """ def __init__(self, config: str = ""): - self.config = LoggingConfig.from_config(config).dict() + self.config = LoggingConfig.from_config(config).model_dump() super().__init__(config=self.config) def log( diff --git a/src/deepsparse/loggers/root_logger.py b/src/deepsparse/loggers/root_logger.py index f739b13aeb..93352ce3cf 100644 --- a/src/deepsparse/loggers/root_logger.py +++ b/src/deepsparse/loggers/root_logger.py @@ -38,7 +38,7 @@ class RootLogger(FrequencyFilter): its own FrequencyFilter :param config: config with respect to - the log_type (LoggerConfig().dict().get(log_type)) + the log_type (LoggerConfig().model_dump().get(log_type)) :param leaf_logger: leaf logger singleton shared among other RootLogger """ diff --git a/src/deepsparse/open_pif_paf/schemas.py b/src/deepsparse/open_pif_paf/schemas.py index 5628bb2b22..03cfb34459 100644 --- a/src/deepsparse/open_pif_paf/schemas.py +++ b/src/deepsparse/open_pif_paf/schemas.py @@ -16,7 +16,7 @@ import numpy from PIL import Image -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.pipelines.computer_vision import ComputerVisionSchema @@ -76,8 +76,7 @@ def from_files( input_schema = cls(*args, images=files_numpy, **kwargs) return input_schema - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class OpenPifPafOutput(BaseModel): @@ -105,6 +104,4 @@ class OpenPifPafOutput(BaseModel): "For every prediction, it is a list of tuples of body " "part indices. " ) - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/src/deepsparse/operators/engine_operator.py b/src/deepsparse/operators/engine_operator.py index 3c4e001293..6d265facda 100644 --- a/src/deepsparse/operators/engine_operator.py +++ b/src/deepsparse/operators/engine_operator.py @@ -15,7 +15,7 @@ from copy import deepcopy from typing import Dict, List, Optional, Union -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.benchmark import ORTEngine from deepsparse.engine import Context as EngineContext @@ -69,8 +69,7 @@ def join(cls, inputs: List["EngineOperatorInputs"]) -> "EngineOperatorInputs": return cls(engine_inputs=joined_engine_inputs) - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class EngineOperatorOutputs(BaseModel): diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py index fe74798d0a..7550194946 100644 --- a/src/deepsparse/pipeline.py +++ b/src/deepsparse/pipeline.py @@ -800,8 +800,13 @@ def haystack_pipeline(*args, **kwargs) -> "Pipeline": Neural Magic pipeline for running Haystack DocumentSearchPipeline. Supports selected Haystack Nodes as well as Haystack nodes integrated with the Neural Magic DeepSparse Engine + + Note: Deprecated due to lack of pydanticV2 support in Haystack v1 """ - return Pipeline.create("information_retrieval_haystack", *args, **kwargs) + raise DeprecationWarning( + "Haystack support with deepsparse has been deprecated, " + "kindly use deepsparse-nightly==1.8.20240404 or older" + ) def embedding_extraction_pipeline(*args, **kwargs) -> "Pipeline": diff --git a/src/deepsparse/pipelines/computer_vision.py b/src/deepsparse/pipelines/computer_vision.py index a052fb23f4..75f0ac36d0 100644 --- a/src/deepsparse/pipelines/computer_vision.py +++ b/src/deepsparse/pipelines/computer_vision.py @@ -15,6 +15,7 @@ from typing import Any, Iterable, List, TextIO, Union import numpy +from pydantic import ConfigDict try: @@ -42,9 +43,7 @@ class ComputerVisionSchema(BaseModel): images: Union[str, List[str], List[Any], Any] = Field( description="List of Images to process" ) # List[Any] to accept List[numpy.ndarray], Any to accept numpy.ndarray - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) @classmethod def from_files( diff --git a/src/deepsparse/pipelines/embedding_extraction.py b/src/deepsparse/pipelines/embedding_extraction.py index 801c242afd..dd697e6a59 100644 --- a/src/deepsparse/pipelines/embedding_extraction.py +++ b/src/deepsparse/pipelines/embedding_extraction.py @@ -21,7 +21,7 @@ from typing import Any, List, Type, Union import numpy -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.legacy import Pipeline from deepsparse.log import get_main_logger @@ -47,9 +47,7 @@ class EmbeddingExtractionOutput(BaseModel): description="The output of the model which is an embedded " "representation of the input" ) - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) @Pipeline.register( diff --git a/src/deepsparse/pipelines/numpy_schemas.py b/src/deepsparse/pipelines/numpy_schemas.py index 09211829fc..fd9d2bbadd 100644 --- a/src/deepsparse/pipelines/numpy_schemas.py +++ b/src/deepsparse/pipelines/numpy_schemas.py @@ -15,7 +15,7 @@ from typing import Generic, Type, TypeVar import numpy -from pydantic.fields import ModelField +from pydantic.v1.fields import ModelField Dtype = TypeVar("Dtype") diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py index d402f616fd..37bc92353d 100644 --- a/src/deepsparse/server/cli.py +++ b/src/deepsparse/server/cli.py @@ -249,7 +249,7 @@ def main( with TemporaryDirectory() as tmp_dir: config_path = os.path.join(tmp_dir, "server-config.yaml") with open(config_path, "w") as fp: - yaml.dump(cfg.dict(), fp) + yaml.dump(cfg.model_dump(), fp) server = _fetch_server(integration=integration, config=config_path) server.start_server( diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py index 595bbe8342..7b39b5798b 100644 --- a/src/deepsparse/server/config.py +++ b/src/deepsparse/server/config.py @@ -15,7 +15,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field, field_validator from deepsparse.legacy.loggers.config import ( MetricFunctionConfig, @@ -168,7 +168,7 @@ class ServerConfig(BaseModel): default=None, ) - integration: str = Field( + integration: Optional[str] = Field( default=None, description=f"The kind of integration to use. {INTEGRATIONS}", ) @@ -207,7 +207,8 @@ class ServerConfig(BaseModel): "default SystemLoggingConfig model is used.", ) - @validator("endpoints") + @field_validator("endpoints") + @classmethod def assert_unique_endpoint_names( cls, endpoints: List[EndpointConfig] ) -> List[EndpointConfig]: @@ -224,7 +225,8 @@ def assert_unique_endpoint_names( name_list.append(name) return endpoints - @validator("endpoints") + @field_validator("endpoints") + @classmethod def set_unique_endpoint_names( cls, endpoints: List[EndpointConfig] ) -> List[EndpointConfig]: diff --git a/src/deepsparse/server/config_hot_reloading.py b/src/deepsparse/server/config_hot_reloading.py index b419b8cfed..f017f2a0d4 100644 --- a/src/deepsparse/server/config_hot_reloading.py +++ b/src/deepsparse/server/config_hot_reloading.py @@ -114,7 +114,7 @@ def _diff_generator( timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") with open(old_path, "w") as fp: fp.write(f"# Version {version} saved at {timestamp} by deepsparse.server\n") - yaml.safe_dump(old_config.dict(), fp) + yaml.safe_dump(old_config.model_dump(), fp) _LOGGER.info(f"Saved old version of config to {old_path}") version += 1 @@ -147,10 +147,10 @@ def _update_endpoints( for endpoint in removed: _LOGGER.info(f"Requesting removal of endpoint '{endpoint.route}'") - requests.delete(url, json=endpoint.dict()).raise_for_status() + requests.delete(url, json=endpoint.model_dump()).raise_for_status() for endpoint in added: _LOGGER.info(f"Requesting addition of endpoint '{endpoint.route}'") - requests.post(url, json=endpoint.dict()).raise_for_status() + requests.post(url, json=endpoint.model_dump()).raise_for_status() return added, removed diff --git a/src/deepsparse/server/helpers.py b/src/deepsparse/server/helpers.py index 5c5103d039..0bbed5ed78 100644 --- a/src/deepsparse/server/helpers.py +++ b/src/deepsparse/server/helpers.py @@ -35,7 +35,7 @@ def create_error_response(status_code: HTTPStatus, message: str) -> JSONResponse: return JSONResponse( - ErrorResponse(message=message, type="invalid_request_error").dict(), + ErrorResponse(message=message, type="invalid_request_error").model_dump(), status_code=status_code.value, ) diff --git a/src/deepsparse/server/server.py b/src/deepsparse/server/server.py index 12757466de..0c52efc508 100644 --- a/src/deepsparse/server/server.py +++ b/src/deepsparse/server/server.py @@ -279,7 +279,7 @@ async def benchmark( json_params = await raw_request.json() benchmark_config = PipelineBenchmarkConfig(**json_params) results = benchmark_from_pipeline( - pipeline=proxy_pipeline.pipeline, **benchmark_config.dict() + pipeline=proxy_pipeline.pipeline, **benchmark_config.model_dump() ) return results diff --git a/src/deepsparse/subgraph_execute.py b/src/deepsparse/subgraph_execute.py index 8c8cf4197d..01bc510f05 100644 --- a/src/deepsparse/subgraph_execute.py +++ b/src/deepsparse/subgraph_execute.py @@ -33,9 +33,12 @@ class StreamingOutput(BaseModel): """ data_to_return: Any = Field( - description="Data that should be returned to be used in the next pipeline step" + None, + description="Data that should be returned to be used in the next pipeline step", + ) + data_to_yield: Any = Field( + None, description="Data that should be yielded to the user" ) - data_to_yield: Any = Field(description="Data that should be yielded to the user") class SubGraphExecutor: diff --git a/src/deepsparse/transformers/haystack/README.md b/src/deepsparse/transformers/haystack/README.md deleted file mode 100644 index 2fd153f85c..0000000000 --- a/src/deepsparse/transformers/haystack/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Information Retrieval with Haystack # -For more information about setup, usage, and examples see [integrations/haystack/README.md](https://github.com/neuralmagic/deepsparse/tree/main/integrations/haystack/README.md) diff --git a/src/deepsparse/transformers/haystack/__init__.py b/src/deepsparse/transformers/haystack/__init__.py index 4844b8896c..6f836dfa21 100644 --- a/src/deepsparse/transformers/haystack/__init__.py +++ b/src/deepsparse/transformers/haystack/__init__.py @@ -19,102 +19,6 @@ # flake8: noqa # isort: skip_file - -import logging as _logging -import os as _os - -import deepsparse as _deepsparse - - -_HAYSTACK_PREFERRED_VERSION = "1.4.0" -_HAYSTACK_EXTRAS = "[all]" - - -# check haystack installation -try: - import haystack as _haystack - - if _haystack.__version__ != _HAYSTACK_PREFERRED_VERSION: - raise ValueError( - f"Deepsparse requires farm-haystack=={_HAYSTACK_PREFERRED_VERSION}, " - f"but found {_haystack.__version__}" - ) - _haystack_import_error = None -except Exception as _haystack_import_err: - _haystack_import_error = _haystack_import_err - -_LOGGER = _logging.getLogger(__name__) - - -def _install_haystack_and_deps(): - import subprocess as _subprocess - import sys as _sys - - try: - _subprocess.check_call( - [ - _sys.executable, - "-m", - "pip", - "install", - f"farm-haystack{_HAYSTACK_EXTRAS}=={_HAYSTACK_PREFERRED_VERSION}", - "--no-dependencies", - ] - ) - - import haystack as _haystack - - _LOGGER.info("haystack and dependencies successfully installed") - except Exception: - raise ValueError( - "Unable to install and import haystack dependencies. Check " - "that haystack is installed, if not, install via " - "`pip install deepsparse[haystack]` and `pip install " - f"farm-haystack{_HAYSTACK_EXTRAS}=={_HAYSTACK_PREFERRED_VERSION} " - "--no-dependencies`" - ) - - -def _check_haystack_install(): - if _haystack_import_error is not None: - import os - - if os.getenv("NM_NO_AUTOINSTALL_HAYSTACK", False): - _LOGGER.warning( - "Unable to import haystack, skipping auto installation " - "due to NM_NO_AUTOINSTALL_HAYSTACK" - ) - # skip any further checks - return - else: - _LOGGER.warning( - "haystack installation not detected. Installing " - "haystack dependencies if haystack is already " - "installed in the environment, it will be overwritten. Set " - "environment variable NM_NO_AUTOINSTALL_HAYSTACK to disable" - ) - _install_haystack_and_deps() - - # re check import after potential install - try: - import haystack as _haystack - - if _haystack.__version__ != _HAYSTACK_PREFERRED_VERSION: - raise ValueError( - f"Deepsparse requires farm-haystack=={_HAYSTACK_PREFERRED_VERSION}, " - f"but found {_haystack.__version__}" - ) - except Exception: - _LOGGER.warning( - "haystack and its dependencies may not be installed. They can be installed " - "via `pip install deepsparse[haystack]` and `pip install " - f"farm-haystack{_HAYSTACK_EXTRAS}=={_HAYSTACK_PREFERRED_VERSION} " - "--no-dependencies`" - ) - - -_check_haystack_install() - -from .nodes import * -from .pipeline import * -from .helpers import * +raise DeprecationWarning( + "Haystack support with deepsparse has been deprecated, kindly use deepsparse-nightly==1.8.20240404 or older" +) diff --git a/src/deepsparse/transformers/haystack/haystack_reqs.txt b/src/deepsparse/transformers/haystack/haystack_reqs.txt deleted file mode 100644 index 37e937fbc1..0000000000 --- a/src/deepsparse/transformers/haystack/haystack_reqs.txt +++ /dev/null @@ -1,80 +0,0 @@ -# haystack_reqs.py -# -# This file is used to control which dependencies are installed by -# deepsparse[haystack]. This is done to ensure that farm-haystack has all of its -# dependencies without installing dependencies that conflict with NM. -# -# This file lists dependencies for farm-haystack[all]==1.4.0, excluding the following -# which conflict with deepsparse dependencies: -# [transformers] -# the following versions have been updated to match deepsparse and sparseml versioning -# [torch] -# you can see their haystack versions as comments in this file -# -# because haystack cannot be included in setup.py without its inclusion also causing -# an installation of huggingface/transformers, it will be auto installed through -# deepsparse/transformers/haystack/__init__.py - -importlib-metadata -torch>=1.12.1 -requests -pydantic -nltk -pandas -dill -tqdm -networkx -mmh3 -quantulum3 -posthog -azure-ai-formrecognizer>=3.2.0b2 -azure-core<1.23 -more_itertools -python-docx -langdetect -tika -sentence-transformers>=2.2.0 -scipy>=1.3.2 -scikit-learn>=1.0.0 -seqeval -mlflow -elasticsearch>=7.7,<=7.10 -elastic-apm -rapidfuzz -jsonschema -sqlalchemy>=1.4.2,<2 -sqlalchemy_utils -psycopg2-binary -faiss-cpu==1.7.2 -pymilvus<2.0.0 -weaviate-client==3.3.3 -pinecone-client -SPARQLWrapper -selenium -webdriver-manager -beautifulsoup4 -markdown -python-magic -pytesseract==0.3.7 -pillow -pdf2image==1.14.0 -onnxruntime -onnxruntime_tools -ray -aiorwlock>=1.3.0,<2 -grpcio==1.43.0 -beir -mypy -typing_extensions -pytest -responses -tox -coverage -python-multipart -psutil -pylint -black[jupyter] -mkdocs -jupytercontrib -watchdog -requests-cache diff --git a/src/deepsparse/transformers/haystack/helpers.py b/src/deepsparse/transformers/haystack/helpers.py deleted file mode 100644 index d17890ed48..0000000000 --- a/src/deepsparse/transformers/haystack/helpers.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from haystack.utils import print_documents - -from deepsparse.transformers.haystack import HaystackPipelineOutput - - -__all__ = [ - "print_pipeline_documents", -] - - -def print_pipeline_documents( - haystack_pipeline_output: HaystackPipelineOutput, -) -> None: - """ - Helper function to print documents directly from NM Haystack Pipeline outputs - - :param haystack_pipeline_output: instance of HaystackPipelineOutput schema - :return: None - """ - if isinstance(haystack_pipeline_output.query, list): - for i in range(len(haystack_pipeline_output.query)): - results_dict = { - key: value[i] for key, value in haystack_pipeline_output.dict().items() - } - print_documents(results_dict) - else: - print_documents(haystack_pipeline_output.dict()) diff --git a/src/deepsparse/transformers/haystack/nodes.py b/src/deepsparse/transformers/haystack/nodes.py deleted file mode 100644 index 1132b563c1..0000000000 --- a/src/deepsparse/transformers/haystack/nodes.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Optional, Union - -import numpy -from haystack.document_stores import BaseDocumentStore -from haystack.nodes import BaseReader, DensePassageRetriever, EmbeddingRetriever -from haystack.nodes.retriever._embedding_encoder import _BaseEmbeddingEncoder -from haystack.nodes.retriever.base import BaseRetriever -from haystack.schema import Answer, Document - -from deepsparse import Pipeline -from deepsparse.engine import Context -from deepsparse.log import get_main_logger -from deepsparse.transformers.pipelines.question_answering import ( - QuestionAnsweringOutput, - QuestionAnsweringPipeline, -) - - -__all__ = [ - "DeepSparseEmbeddingRetriever", - "DeepSparseDensePassageRetriever", - "DeepSparseEmbeddingEncoder", - "DeepSparseReader", -] - - -_LOGGER = get_main_logger() - - -class DeepSparseEmbeddingRetriever(EmbeddingRetriever): - """ - Deepsparse implementation of Haystack EmbeddingRetriever - Utilizes TransformersEmbeddingExtractionPipeline to create embeddings - - example integration into haystack pipeline: - ```python - document_store = ElasticsearchDocumentStore() - retriever = DeepSparseEmbeddingRetriever( - document_store=document_store, - model_path="masked_language_modeling_model_dir/" - ) - pipeline = DocumentSearchPipeline(retriever) - ``` - - :param document_store: reference to document store to retrieve from - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param batch_size: number of documents to encode at once. Default is 1 - :param max_seq_len: longest length of each document sequence. Maximum number - of tokens for the document text. Longer ones will be cut down - :param pooling_strategy: strategy for combining embeddings - :param emb_extraction_layer: f an int, the transformer layer number from - which the embeddings will be extracted. If a string, the name of last - ONNX node in model to draw embeddings from. If None, leave the model - unchanged. Default is -1 (last transformer layer before prediction head) - :param top_k: how many documents to return per query - :param progress_bar: if true displays progress bar during embedding. - Not supported by DeepSparse retriever nodes. Default is False - :param scale_score: whether to scale the similarity score to the unit interval - (range of [0,1]). If true (default) similarity scores (e.g. cosine or - dot_product) which naturally have a different value range will be scaled - to a range of [0,1], where 1 means extremely relevant. Otherwise raw - similarity scores (e.g. cosine or dot_product) will be used - :param embed_meta_fields: concatenate the provided meta fields and text - passage / table to a text pair that is then used to create the embedding. - This approach is also used in the TableTextRetriever paper and is likely - to improve performance if your titles contain meaningful information for - retrieval (topic, entities etc.). - :param kwargs: extra arguments passed to TransformersEmbeddingExtractionPipeline - """ - - def __init__( - self, - document_store: BaseDocumentStore, - model_path: str, - batch_size: int = 1, - max_seq_len: int = 512, - pooling_strategy: str = "reduce_mean", - emb_extraction_layer: Union[int, str, None] = -1, - top_k: int = 10, - progress_bar: bool = False, - scale_score: bool = True, - embed_meta_fields: List[str] = [], - **kwargs, - ): - super(BaseRetriever).__init__() - - self.document_store = document_store - self.model_path = model_path - self.batch_size = batch_size - self.max_seq_len = max_seq_len - self.pooling_strategy = pooling_strategy - self.emb_extraction_layer = emb_extraction_layer - self.top_k = top_k - self.progress_bar = progress_bar - self.scale_score = scale_score - self.embed_meta_fields = embed_meta_fields - - if self.batch_size != 1: - raise ValueError("DeepSparseEmbeddingRetriever only supports batch_size 1") - - _LOGGER.info(f"Init retriever using embeddings of model at {model_path}") - if self.progress_bar: - _LOGGER.warn( - "DeepSparseEmbeddingRetriever does not support progress bar, set " - "progress_bar to False" - ) - - self.embedding_encoder = DeepSparseEmbeddingEncoder(self, kwargs) - - def train(*args, **kwargs): - raise NotImplementedError("DeepSparse Engine does not support training") - - def save(*args, **kwargs): - raise NotImplementedError("DeepSparse Engine does not support saving to files") - - def load(*args, **kwargs): - raise NotImplementedError( - "DeepSparse Engine does not support loading from files" - ) - - -class DeepSparseDensePassageRetriever(DensePassageRetriever): - """ - Deepsparse implementation of Haystack DensePassageRetriever - Utilizes two instances of TransformersEmbeddingExtractionPipeline to - perform query model and passage model inference - - example integration into haystack pipeline: - ```python - document_store = ElasticsearchDocumentStore() - retriever = DeepSparseDensePassageRetriever( - document_store=document_store, - query_model_path="query_model_dir/", - passage_model_path="query_model_dir/" - ) - pipeline = DocumentSearchPipeline(retriever) - ``` - - :param document_store: reference to document store to retrieve from - :param query_model_path: sparsezoo stub to a query model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param passage_model_path: sparsezoo stub to a passage model or (preferred) - a directory containing a model.onnx, tokenizer config, and model config - :param max_seq_len_query: longest length of each query sequence. Maximum - number of tokens for the document text. Longer ones will be cut down. - Default is 32 - :param max_seq_len_passage: longest length of each document sequence. - Maximum number of tokens for the document text. Longer ones will be - cut down. Default is 156 - :param batch_size: number of documents and queries to encode at once. - Default is 1 - :param emb_extraction_layer: if an int, the transformer layer number from - which the embeddings will be extracted. If a string, the name of last - ONNX node in model to draw embeddings from. If None, leave the model - unchanged. Default is -1 (last transformer layer before prediction head) - :param pooling_strategy: strategy for combining embeddings. Default is - "cls_token" - :param top_k: how many documents to return per query. Default is 10 - :param embed_title: True if titles should be embedded into the passage. Raw - text input will be the title followed by a space followed by the content. - Default is False - :param progress_bar: if true displays progress bar during embedding. - Not supported by DeepSparse retriever nodes. Default is False - :param scale_score: whether to scale the similarity score to the unit interval - (range of [0,1]). If true (default) similarity scores (e.g. cosine or - dot_product) which naturally have a different value range will be scaled - to a range of [0,1], where 1 means extremely relevant. Otherwise raw - similarity scores will be used. Default is True - :param context: context shared between query and passage models. If None - is provided, then a new context with 4 streams will be created. Default - is None - :param pipeline_kwargs: extra arguments passed to - `TransformersEmbeddingExtractionPipeline` - """ - - def __init__( - self, - document_store: BaseDocumentStore, - query_model_path, - passage_model_path, - max_seq_len_query: int = 32, - max_seq_len_passage: int = 156, - batch_size: int = 1, - emb_extraction_layer: Union[int, str, None] = -1, - pooling_strategy: str = "cls_token", - top_k: int = 10, - embed_title: bool = False, - progress_bar: bool = False, - scale_score: bool = True, - context: Optional[Context] = None, - **pipeline_kwargs, - ): - super(BaseRetriever).__init__() - - self.document_store = document_store - self.batch_size = batch_size - self.progress_bar = progress_bar - self.pooling_strategy = pooling_strategy - self.top_k = top_k - self.embed_title = embed_title - self.scale_score = scale_score - self.context = context - self.use_gpu = False - self.devices = ["cpu"] - - if self.progress_bar: - _LOGGER.warn( - "DeepSparseDensePassageRetriever does not support progress bar, set " - "progress_bar to False" - ) - - if "model_path" in pipeline_kwargs: - del pipeline_kwargs["model_path"] # ignore model_path argument - if "max_seq_len" in pipeline_kwargs: - del pipeline_kwargs["max_seq_len"] # ignore max_seq_len argument - if document_store is None: - raise ValueError( - "DeepSparseDensePassageRetriever must be initialized with a " - "document_store" - ) - if pooling_strategy != "cls_token": - _LOGGER.warning( - "You are using a Dense Passage Retriever model with " - f"{pooling_strategy} pooling_strategy. We recommend you use " - "cls_token instead" - ) - if pooling_strategy == "per_token" and max_seq_len_query != max_seq_len_passage: - raise ValueError( - "per_token pooling strategy requires that max_seq_len_query " - f"({max_seq_len_query}) match max_seq_len_passage " - f"({max_seq_len_passage})" - ) - - if self.context is None: - self.context = Context() - - _LOGGER.info("Creating query pipeline") - self.query_pipeline = Pipeline.create( - "transformers_embedding_extraction", - query_model_path, - batch_size=batch_size, - sequence_length=max_seq_len_query, - emb_extraction_layer=emb_extraction_layer, - extraction_strategy=pooling_strategy, - context=context, - return_numpy=True, - **pipeline_kwargs, - ) - _LOGGER.info("Creating passage pipeline") - self.passage_pipeline = Pipeline.create( - "transformers_embedding_extraction", - passage_model_path, - batch_size=batch_size, - sequence_length=max_seq_len_passage, - emb_extraction_layer=emb_extraction_layer, - extraction_strategy=pooling_strategy, - context=context, - return_numpy=True, - **pipeline_kwargs, - ) - _LOGGER.info("Query and passage pipelines initialized") - - def embed_queries(self, texts: List[str]) -> List[numpy.ndarray]: - """ - :param texts: list of query strings to embed - :return: list of embeddings for each query - """ - return self.query_pipeline(texts).embeddings - - def embed_documents(self, docs: List[Document]) -> List[numpy.ndarray]: - """ - :param docs: list of document strings to embed - :return: list of embeddings for each document - """ - passage_inputs = [self._document_to_passage_input(doc) for doc in docs] - return self.passage_pipeline(passage_inputs).embeddings - - def train(*args, **kwargs): - raise NotImplementedError("DeepSparse Engine does not support model training") - - def save(*args, **kwargs): - raise NotImplementedError("DeepSparse Engine does not support saving to files") - - def load(*args, **kwargs): - raise NotImplementedError( - "DeepSparse Engine does not support loading from files" - ) - - def _document_to_passage_input(self, document: Document) -> str: - # Preprocesses documents to be used as pipeline inputs - # - # :param document: document to turn into raw text input - # :return: raw text input of document title and content - if ( - hasattr(document, "meta") - and document.meta.get("title", None) is not None - and self.embed_title - ): - return f"{document.meta['title']} {document.content}" - - return document.content - - def _get_predictions(*args, **kwargs): - raise NotImplementedError( - "This helper function is not used by DeepSparseDensePassageRetriever" - ) - - -class DeepSparseEmbeddingEncoder(_BaseEmbeddingEncoder): - """ - Deepsparse implementation of Haystack EmbeddingEncoder - - :param retriever: retriever that uses this encoder - :param pipeline_kwargs: extra arguments passed to - `TransformersEmbeddingExtractionPipeline` - """ - - def __init__(self, retriever: DeepSparseEmbeddingRetriever, pipeline_kwargs): - self.embedding_pipeline = Pipeline.create( - "transformers_embedding_extraction", - model_path=retriever.model_path, - batch_size=retriever.batch_size, - sequence_length=retriever.max_seq_len, - emb_extraction_layer=retriever.emb_extraction_layer, - extraction_strategy=retriever.pooling_strategy, - return_numpy=True, - **pipeline_kwargs, - ) - - self.batch_size = retriever.batch_size - self.show_progress_bar = retriever.progress_bar - document_store = retriever.document_store - - if self.show_progress_bar: - _LOGGER.warn( - "DeepSparseEmbeddingEncoder does not support progress bar, set " - "retriever progress_bar to False" - ) - if document_store.similarity != "cosine": - _LOGGER.warning( - f"You are using document store embeddings with the " - f"{document_store.similarity} function. We recommend using " - "cosine instead. This can be set when initializing DocumentStore" - ) - - def embed( - self, texts: Union[List[List[str]], List[str], str] - ) -> List[numpy.ndarray]: - """ - :param texts: list of strings to embed - :return: list of embeddings for each string - """ - return self.embedding_pipeline(texts).embeddings - - def embed_queries(self, texts: List[str]) -> List[numpy.ndarray]: - """ - :param texts: list of query strings to embed - :return: list of embeddings for each query - """ - return self.embed(texts) - - def embed_documents(self, docs: List[Document]) -> List[numpy.ndarray]: - """ - :param docs: list of document strings to embed - :return: list of embeddings for each document - """ - passages = [d.content for d in docs] - return self.embed(passages) - - -class DeepSparseReader(BaseReader): - def __init__( - self, - model_path: str, - top_k=10, - top_k_per_candidate=3, - max_seq_len=256, - doc_stride=128, - context_window: Union[str, int] = "passage", - **kwargs, - ): - super().__init__() - self.top_k = top_k - self.context_window = context_window - self.pipeline = QuestionAnsweringPipeline( - model_path=model_path, - doc_stride=doc_stride, - sequence_length=max_seq_len, - n_best_size=top_k_per_candidate, - max_answer_length=64, - **kwargs, - ) - - def predict(self, query: str, documents: List[Document], top_k): - answers = [] - for doc in documents: - out: QuestionAnsweringOutput = self.pipeline( - context=doc.content, question=query - ) - if self.context_window == "passage": - start = doc.content.rfind("\n\n", 0, out.start) - if start < 0: - start = doc.content.rfind("\n", 0, out.start) - if start < 0: - start = out.start - - end = doc.content.find("\n\n", out.end, len(doc.content)) - if end < 0: - end = doc.content.find("\n", out.end, len(doc.content)) - if end < 0: - end = out.end - - else: - assert isinstance(self.context_window, int) - start = max(0, out.start - self.context_window) - end = min(len(doc.content), out.end + self.context_window) - assert start >= 0 and end >= 0 and end > start, (start, end) - context = doc.content[start:end].strip() - - answers.append( - Answer( - answer=out.answer, - type="extractive", - score=out.score, - context=context, - document_id=doc.id, - meta=doc.meta, - ) - ) - - # sort answers by their `score` and select top-k - answers = sorted(answers, reverse=True) - answers = answers[: self.top_k] - return {"query": query, "answers": answers} - - def predict_batch(self, *args, **kwargs): - raise NotImplementedError diff --git a/src/deepsparse/transformers/haystack/pipeline.py b/src/deepsparse/transformers/haystack/pipeline.py deleted file mode 100644 index 9fc6f28185..0000000000 --- a/src/deepsparse/transformers/haystack/pipeline.py +++ /dev/null @@ -1,502 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# postprocessing adapted from huggingface/transformers - -# Copyright 2021 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Pipeline implementation and pydantic models for Haystack pipeline. Supports a -sample of haystack nodes meant to be used DeepSparseEmbeddingRetriever -""" -import importlib -from typing import Any, Dict, List, Optional, Tuple, Type, Union - -import numpy -from haystack.schema import Document -from pydantic import BaseModel, Field - -from deepsparse.legacy.pipeline import Pipeline -from deepsparse.transformers import haystack as DeepSparseHaystack - - -# because Haystack implements submodules as members of the Haystack module -# which are not initialized unless the Haystack module is initialized, -# these submodules must be imported using importlib -DocumentStoreHaystack = importlib.import_module(".document_stores", "haystack") -RetrieverHaystack = importlib.import_module(".nodes", "haystack") -PipelineHaystack = importlib.import_module(".pipelines", "haystack") - - -__all__ = [ - "HaystackPipelineInput", - "HaystackPipelineOutput", - "HaystackPipelineConfig", - "HaystackPipeline", -] - - -class HaystackPipelineInput(BaseModel): - """ - Schema for inputs to Haystack pipelines - """ - - queries: Union[str, List[str]] = Field( - description="String or list of strings to query documents with" - ) - params: Dict[Any, Any] = Field( - description="Dictionary of params to pass to Haystack pipeline", default={} - ) - - -class HaystackPipelineOutput(BaseModel): - """ - Schema for outputs to Haystack pipelines - """ - - documents: Union[List[List[Document]], List[Document]] = Field( - description="List of document results for each input query" - ) - root_node: Union[str, List[str]] = Field( - description="Root node of Haystack Pipeline's graph" - ) - params: Union[List[Dict[str, Any]], Dict[str, Any]] = Field( - description="Params passed to Haystack pipeline" - ) - query: Union[List[str], str] = Field( - description="Query passed to Haystack Pipeline" - ) - node_id: Union[List[str], str] = Field( - description="Node id field from Haystack Pipeline output" - ) - - -class HaystackPipelineConfig(BaseModel): - """ - Schema specifying HaystackPipeline config. Allows for specifying which - haystack nodes to use and what their arguments should be - """ - - document_store: str = Field( - description="Name of haystack document store to use. " - "Default ElasticsearchDocumentStore", - default="InMemoryDocumentStore", - ) - document_store_args: Dict[str, Any] = Field( - description="Keyword arguments for initializing document_store", - default={}, - ) - retriever: str = Field( - description="Name of document retriever to use. Default " - "DeepSparseEmbeddingRetriever (recommended)", - default="DeepSparseEmbeddingRetriever", - ) - retriever_args: Dict[str, Any] = Field( - description="Keyword arguments for initializing retriever", - default={}, - ) - haystack_pipeline: str = Field( - description="Name of Haystack pipeline to use. Default " - "DocumentSearchPipeline", - default="DocumentSearchPipeline", - ) - haystack_pipeline_args: Dict[str, Any] = Field( - description="Keyword arguments for initializing haystack_pipeline", - default={}, - ) - - -@Pipeline.register( - task="information_retrieval_haystack", - task_aliases=["haystack"], - default_model_path="zoo:nlp/masked_language_modeling/bert-base/pytorch/" - "huggingface/bookcorpus_wikitext/3layer_pruned90-none", -) -class HaystackPipeline(Pipeline): - """ - Neural Magic pipeline for running Haystack DocumentSearchPipeline. - Supports selected Haystack Nodes as well as Haystack nodes integrated - with the Neural Magic DeepSparse Engine - - example embedding model instantiation: - ```python - haystack_pipeline = Pipeline.create( - task="information_retrieval_haystack", - model_path="masked_language_modeling_model_dir/", - config={ - "document_store": "InMemoryDocumentStore", - "document_store_args": { - "similarity": "cosine", - "use_gpu": False, - }, - "retriever": "DeepSparseEmbeddingRetriever", - "retriever_args": { - "extraction_strategy": "reduce_mean" - } - }, - ) - ``` - - example deepsparse biencoder instantiation - ```python - haystack_pipeline = Pipeline.create( - task="information_retrieval_haystack", - config={ - "document_store": "InMemoryDocumentStore", - "document_store_args": { - "similarity": "cosine", - "use_gpu": False, - }, - "retriever": "DeepSparseDensePassageRetriever", - "retriever_args": { - "query_model_path": "./query_model", - "passage_model_path": "./passage_model" - } - }, - ) - ``` - - writing documents: - ```python - haystack_pipeline.write_documents([ - { - "title": "Claude Shannon", - "content": "Claude Elwood Shannon was an American mathematician, " - "electrical engineer, and cryptographer known as a father of " - "information theory. He was a 21-year-old master's degree student at " - "the Massachusetts Institute of Technology (MIT)." - }, - { - "title": "Vincent van Gogh", - "content": "Van Gogh was born into an upper-middle-class family. " - "As a child he was serious, quiet and thoughtful. He began drawing " - "at an early age and as a young man worked as an art dealer." - }, - { - "title": "Stevie Wonder", - "content": "Stevland Hardaway Morris, known professionally as " - "Stevie Wonder, is an American singer and musician, who is " - "credited as a pioneer and influence by musicians across a range " - "of genres." - } - ]) - ``` - - example queries: - ```python - from deepsparse.transformers.haystack import print_pipeline_documents - pipeline_outputs = haystack_pipeline( - queries="who invented information theory", - params={"Retriever": {"top_k": 4}} - ) - print_pipeline_documents(pipeline_outputs) - - pipeline_outputs = haystack_pipeline( - queries=[ - "famous artists", - "What is Stevie Wonder's real name?" - ], - params={"Retriever": {"top_k": 4}} - ) - print_pipeline_documents(pipeline_outputs) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: batch size to use for retriever inference - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param docs: list of documents to be written to document_store. Can also - be written after instantiation with write_documents method. - Default is None - :param config: dictionary or instance of HaystackPipelineConfig. Used to - specify Haystack node arguments - :param retriever_kwargs: keyword arguments to be passed to retriever. If - the retriever is a deepsparse retriever, then these arguments will also - be passed to the TransformersEmbeddingExtractionPipeline of the retriever - """ - - def __init__( - self, - *, - config: Optional[Union[HaystackPipelineConfig, Dict[str, Any]]] = None, - docs: Optional[List[Dict]] = None, - **retriever_kwargs, - ): - # pass arguments to retriever (which then passes to extraction pipeline) - self._config = self._parse_config(config) - - self._document_store = None - self._retriever = None - self._haystack_pipeline = None - self.initialize_pipeline(retriever_kwargs) - - if docs is not None: - self.write_documents(docs, overwrite=True) - - def initialize_pipeline(self, init_retriever_kwargs: Dict[str, Any]) -> None: - """ - Instantiate Haystack nodes needed to run pipeline - - :param init_retriever_kwargs: retriever args passed at the initialization - of this pipeline - :return: None - """ - # intialize document store from haystack - DocumentStoreClass = getattr(DocumentStoreHaystack, self._config.document_store) - self._document_store = DocumentStoreClass(**self._config.document_store_args) - - # find retriever class, merge args if necessary - if hasattr(RetrieverHaystack, self._config.retriever): - RetrieverClass = getattr(RetrieverHaystack, self._config.retriever) - - # manually merge batch size argument passed to HaystackPipeline - retriever_args = self._config.retriever_args.copy() - if init_retriever_kwargs.get("batch_size") is not None: - retriever_args["HaystackPipeline().batch_size"] = init_retriever_kwargs[ - "batch_size" - ] - retriever_args = self._rename_arg_with_check( - retriever_args, "HaystackPipeline().batch_size", "batch_size" - ) - else: - RetrieverClass = getattr(DeepSparseHaystack, self._config.retriever) - retriever_args = self._merge_retriever_args( - self._config.retriever_args, init_retriever_kwargs - ) - self._retriever = RetrieverClass(self._document_store, **retriever_args) - - # pipeline from haystack - PipelineClass = getattr(PipelineHaystack, self._config.haystack_pipeline) - self._haystack_pipeline = PipelineClass( - self._retriever, **self._config.haystack_pipeline_args - ) - - def write_documents( - self, docs: List[Union[Dict[Any, Any], Document]], overwrite: bool = True - ) -> None: - """ - Write documents to document_store - - :param docs: list of dicts or Documents to write - :param overwrite: delete previous documents in store before writing - :return: None - """ - if overwrite: - self._document_store.delete_documents() - self._document_store.write_documents(docs) - self._document_store.update_embeddings(self._retriever) - - def __call__(self, *args, **kwargs) -> BaseModel: - """ - Run Haystack pipeline - - :param args: input args - :param kwargs: input kwargs - :return: outputs from Haystack pipeline. If multiple inputs are passed, - then each field contains a list of values - """ - if "engine_inputs" in kwargs: - raise ValueError( - "invalid kwarg engine_inputs. engine inputs determined " - f"by {self.__class__.__qualname__}.parse_inputs" - ) - - # parse inputs into input_schema schema if necessary - pipeline_inputs = self.parse_inputs(*args, **kwargs) - if not isinstance(pipeline_inputs, self.input_schema): - raise RuntimeError( - f"Unable to parse {self.__class__} inputs into a " - f"{self.input_schema} object. Inputs parsed to {type(pipeline_inputs)}" - ) - - # run pipeline - if isinstance(pipeline_inputs.queries, List): - pipeline_results = [ - self._haystack_pipeline.run(query=query, params=pipeline_inputs.params) - for query in pipeline_inputs.queries - ] - else: - pipeline_results = self._haystack_pipeline.run( - query=pipeline_inputs.queries, params=pipeline_inputs.params - ) - - outputs = self.process_pipeline_outputs(pipeline_results) - - # validate outputs format - if not isinstance(outputs, self.output_schema): - raise ValueError( - f"Outputs of {self.__class__} must be instances of " - f"{self.output_schema} found output of type {type(pipeline_results)}" - ) - - return outputs - - def process_pipeline_outputs( - self, results: Union[Dict[str, Any], List[Dict[str, Any]]] - ) -> BaseModel: - """ - :results: list or instance of a dictionary containing outputs from - Haystack pipeline - :return: results cast to output_schema. If multiple inputs are passed, - then each field contains a list of values - """ - if isinstance(results, List): - outputs = {key: [] for key in results[0].keys()} - for result in results: - for key, value in result.items(): - outputs[key].append(value) - else: - outputs = results - - return self.output_schema(**outputs) - - @property - def input_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return HaystackPipelineInput - - @property - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to - """ - return HaystackPipelineOutput - - @property - def config_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that configs passed to this pipeline must - comply to - """ - return HaystackPipelineConfig - - def setup_onnx_file_path(self) -> str: - raise NotImplementedError() - - def process_engine_outputs( - self, - engine_outputs: List[numpy.ndarray], - **kwargs, - ) -> BaseModel: - raise NotImplementedError() - - def process_inputs( - self, - inputs: BaseModel, - ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]: - raise NotImplementedError() - - def _parse_config( - self, - config: Optional[Union[HaystackPipelineConfig, dict]], - ) -> BaseModel: - # :param config: instance of config_schema or dictionary of config values - # :return: instance of config_schema - config = config if config else self.config_schema() - - if isinstance(config, self.config_schema): - pass - - elif isinstance(config, dict): - config = self.config_schema(**config) - - else: - raise ValueError( - f"pipeline {self.__class__} only supports either only a " - f"{self.config_schema} object a dict of keywords used to " - f"construct one. Found {config} instead" - ) - - return config - - def _rename_arg_with_check( - self, arguments_dict: Dict[str, Any], old_arg_name: str, new_arg_name: str - ) -> Dict[str, Any]: - # :param arguments_dict: dictionary containing arguments to be renamed - # :param old_arg_name: name of argument to be renamed - # :param new_arg_name: new name of the argument - # :return: arguments_dict with new updated name - arguments_dict_copy = arguments_dict.copy() - if old_arg_name in arguments_dict_copy: - if new_arg_name in arguments_dict_copy: - raise ValueError( - f"Found both {old_arg_name} and {new_arg_name} in arguments. " - "Specify only one" - ) - arguments_dict_copy[new_arg_name] = arguments_dict_copy[old_arg_name] - del arguments_dict_copy[old_arg_name] - - return arguments_dict_copy - - def _merge_retriever_args( - self, - config_retriever_args: Dict[str, Any], - init_retriever_kwargs: Dict[str, Any], - ) -> Dict[str, Any]: - # Merges retriever args given in config with args given at - # HaystackPipeline initialization. Raises errors for conflicts - - # :param config_retriever_args: arguments given in config - # :param init_retriever_kwargs: retriever arguments given at - # HaystackPipeline initialization - # :return: merged arguments from both inputs - - # check for conflicting arguments - for key in init_retriever_kwargs.keys(): - if key in config_retriever_args.keys(): - raise ValueError( - f"Found {key} in both HaystackPipeline arguments and config " - "retriever_args. Specify only one" - ) - - # merge - merged_args = {} - merged_args.update(config_retriever_args) - merged_args.update(init_retriever_kwargs) - - # rename pipeline arguments to fit retriever arguments - merged_args = self._rename_arg_with_check( - merged_args, "extraction_strategy", "pooling_strategy" - ) - merged_args = self._rename_arg_with_check( - merged_args, "sequence_length", "max_seq_len" - ) - - return merged_args diff --git a/src/deepsparse/transformers/pipelines/embedding_extraction.py b/src/deepsparse/transformers/pipelines/embedding_extraction.py index 9429a2fc73..fd21ad7da6 100644 --- a/src/deepsparse/transformers/pipelines/embedding_extraction.py +++ b/src/deepsparse/transformers/pipelines/embedding_extraction.py @@ -38,7 +38,7 @@ from typing import Any, List, Type, Union import numpy -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy from deepsparse.legacy import Pipeline @@ -77,9 +77,7 @@ class EmbeddingExtractionOutput(BaseModel): description="The output of the model which is an embedded " "representation of the input" ) - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class ExtractionStrategy(str, Enum): diff --git a/src/deepsparse/transformers/pipelines/mnli_text_classification.py b/src/deepsparse/transformers/pipelines/mnli_text_classification.py index 1cc05e954c..2bbbdcf54b 100644 --- a/src/deepsparse/transformers/pipelines/mnli_text_classification.py +++ b/src/deepsparse/transformers/pipelines/mnli_text_classification.py @@ -90,7 +90,8 @@ class MnliTextClassificationInput(ZeroShotTextClassificationInputBase): labels: Optional[Union[List[str], str]] = Field( description="The set of possible class labels to classify each " "sequence into. Can be a single label, a string of comma-separated " - "labels, or a list of labels." + "labels, or a list of labels.", + default=None, ) hypothesis_template: Optional[str] = Field( description="A formattable template for wrapping around the provided " diff --git a/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py index 71c60a5936..4baa8bfbad 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py +++ b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py @@ -24,9 +24,9 @@ class CompileGenerationsOutput(BaseModel): - generated_tokens: Any = Field(description="generated_tokens") - generated_logits: Any = Field(description="generated_logits") - finished_reason: Any = Field(description="finished_reason") + generated_tokens: Any = Field(None, description="generated_tokens") + generated_logits: Any = Field(None, description="generated_logits") + finished_reason: Any = Field(None, description="finished_reason") class CompileGenerations(Operator): diff --git a/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py index 7dd171c625..0e78860556 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py @@ -28,13 +28,13 @@ class KVCacheCreatorOutput(BaseModel): - kv_cache: Any = Field(description="KV Cache Created") # DecoderKVCache + kv_cache: Any = Field(None, description="KV Cache Created") # DecoderKVCache class KVCacheCreatorInput(BaseModel): - cache_shape: Any = Field(description="shape") - kv_cache_data_type: Any = Field(description="data type") - output_names: Any = Field(description="output names") + cache_shape: Any = Field(None, description="shape") + kv_cache_data_type: Any = Field(None, description="data type") + output_names: Any = Field(None, description="output names") class KVCacheCreator(Operator): diff --git a/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py index 1f631573ae..df275bc5d2 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py @@ -18,7 +18,7 @@ from typing import Any, List, Optional, Tuple, Union import numpy -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.operators.engine_operator import ( DEEPSPARSE_ENGINE, @@ -37,7 +37,7 @@ class NLEngineInputs(BaseModel): engine_inputs: List = Field(description="engine_inputs") - kv_cache: Any = Field(description="kv_cache object") + kv_cache: Any = Field(None, description="kv_cache object") tokens: List = Field(description="tokens") in_generation: Any = Field(description="in_generation", default=None) engine: Optional[Any] = Field( @@ -75,13 +75,12 @@ def join(cls, inputs: List["NLEngineInputs"]) -> "NLEngineInputs": kv_cache=all_kv_cache, ) - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class NLEngineOutputs(BaseModel): - engine_outputs: Any = Field(description="engine_outputs") - kv_cache: Any = Field(description="kv_cache object") + engine_outputs: Any = Field(None, description="engine_outputs") + kv_cache: Any = Field(None, description="kv_cache object") tokens: List = Field(description="tokens") in_generation: Any = Field(description="in_generation", default=None) diff --git a/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator_no_kv_cache.py b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator_no_kv_cache.py index c6ae6c51f3..5daefeca51 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator_no_kv_cache.py +++ b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator_no_kv_cache.py @@ -28,8 +28,8 @@ class NLEngineInputsNoCache(BaseModel): - input_ids: Any - attention_mask: Any + input_ids: Any = None + attention_mask: Any = None class NLEngineOperatorNoCache(EngineOperator): diff --git a/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py index 66b0c2a79b..4a7cc687e4 100644 --- a/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py +++ b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py @@ -29,10 +29,10 @@ class PrepareForGenerationOutput(BaseModel): prompt_logits: Any = Field( - description="A set of prompt logits generated during prefill" + None, description="A set of prompt logits generated during prefill" ) - kv_cache: Optional[Any] = Field(description="kv cache") - in_generation: Optional[bool] = Field(description="in_generation flag") + kv_cache: Optional[Any] = Field(None, description="kv cache") + in_generation: Optional[bool] = Field(None, description="in_generation flag") class PrepareGeneration(Operator): diff --git a/src/deepsparse/transformers/pipelines/token_classification.py b/src/deepsparse/transformers/pipelines/token_classification.py index 4c719ee0c2..a12df915da 100644 --- a/src/deepsparse/transformers/pipelines/token_classification.py +++ b/src/deepsparse/transformers/pipelines/token_classification.py @@ -95,16 +95,18 @@ class TokenClassificationResult(BaseModel): score: float = Field(description="The corresponding probability for `entity`") word: str = Field(description="token/word classified") start: Optional[int] = Field( + None, description=( "index of the start of the corresponding entity in the sentence. " "Only exists if the offsets are available within the tokenizer" - ) + ), ) end: Optional[int] = Field( + None, description=( "index of the end of the corresponding entity in the sentence. " "Only exists if the offsets are available within the tokenizer" - ) + ), ) index: Optional[int] = Field( description=( diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py index 887af4c594..0e639f568b 100644 --- a/src/deepsparse/transformers/pipelines_cli.py +++ b/src/deepsparse/transformers/pipelines_cli.py @@ -233,7 +233,7 @@ def response_to_json(response: Any): elif isinstance(response, dict): return {key: response_to_json(val) for key, val in response.items()} elif isinstance(response, BaseModel): - return response.dict() + return response.model_dump() return json.dumps(response) diff --git a/src/deepsparse/transformers/schemas/text_generation_schemas.py b/src/deepsparse/transformers/schemas/text_generation_schemas.py index 32a7675694..f77d91e09a 100644 --- a/src/deepsparse/transformers/schemas/text_generation_schemas.py +++ b/src/deepsparse/transformers/schemas/text_generation_schemas.py @@ -18,7 +18,7 @@ from enum import Enum from typing import Any, Callable, Dict, List, Optional, Sequence, Union -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from transformers import GenerationConfig @@ -50,8 +50,7 @@ class FinishReason(Enum): class TextGenerationInput(BaseModel): - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) sequences: Union[str, List[str]] = Field( alias="prompt", @@ -162,7 +161,4 @@ class TextGenerationOutput(BaseModel): "Dictionary containing token_ids and attention_mask, " "both mapping to arrays of size [batch_size, sequence_length]", ) - - class Config: - arbitrary_types_allowed = True - extra = "allow" + model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow") diff --git a/src/deepsparse/yolact/schemas.py b/src/deepsparse/yolact/schemas.py index bbab699d50..c44f730490 100644 --- a/src/deepsparse/yolact/schemas.py +++ b/src/deepsparse/yolact/schemas.py @@ -21,7 +21,7 @@ import numpy from PIL import Image -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.pipelines.computer_vision import ComputerVisionSchema @@ -98,8 +98,7 @@ def from_files( ) return input_schema - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class YOLACTOutputSchema(BaseModel): @@ -117,11 +116,9 @@ class YOLACTOutputSchema(BaseModel): description="List of bounding boxes, one for each prediction" ) masks: Optional[List[Any]] = Field( - description="List of masks, one for each prediction" + None, description="List of masks, one for each prediction" ) - - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) def __getitem__(self, index): if index >= len(self.classes): diff --git a/src/deepsparse/yolo/schemas.py b/src/deepsparse/yolo/schemas.py index ce5aa79dc6..3a256ca010 100644 --- a/src/deepsparse/yolo/schemas.py +++ b/src/deepsparse/yolo/schemas.py @@ -18,11 +18,11 @@ """ from collections import namedtuple -from typing import Any, Iterable, List, Optional, TextIO +from typing import Any, Iterable, List, Optional, TextIO, Union import numpy from PIL import Image -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field from deepsparse.pipelines.computer_vision import ComputerVisionSchema @@ -92,8 +92,7 @@ def from_files( ) return input_schema - class Config: - arbitrary_types_allowed = True + model_config = ConfigDict(arbitrary_types_allowed=True) class YOLOOutput(BaseModel): @@ -107,7 +106,7 @@ class YOLOOutput(BaseModel): scores: List[List[float]] = Field( description="List of scores, one for each prediction" ) - labels: List[List[str]] = Field( + labels: List[Union[List[str], List[float]]] = Field( description="List of labels, one for each prediction" ) intermediate_outputs: Optional[Any] = Field( diff --git a/src/deepsparse/yolov8/schemas.py b/src/deepsparse/yolov8/schemas.py index 058574d91a..71dfe72803 100644 --- a/src/deepsparse/yolov8/schemas.py +++ b/src/deepsparse/yolov8/schemas.py @@ -35,7 +35,7 @@ class YOLOSegOutput(BaseModel): description="List of labels, one for each prediction" ) masks: Optional[List[Any]] = Field( - description="List of masks, one for each prediction" + None, description="List of masks, one for each prediction" ) intermediate_outputs: Optional[Tuple[Any, Any]] = Field( diff --git a/tests/deepsparse/legacy/loggers/test_helpers.py b/tests/deepsparse/legacy/loggers/test_helpers.py index c9c069463c..dfb89e77f8 100644 --- a/tests/deepsparse/legacy/loggers/test_helpers.py +++ b/tests/deepsparse/legacy/loggers/test_helpers.py @@ -71,15 +71,15 @@ def test_check_identifier_match(template, identifier, expected_output): class MockModel__(BaseModel): - key_3: Any + key_3: Any = None class MockModel_(BaseModel): - key_2: Any + key_2: Any = None class MockModel(BaseModel): - key_1: Any + key_1: Any = None value_1 = MockModel(key_1=MockModel_(key_2=[0, 1, 2, 3])) diff --git a/tests/deepsparse/loggers/test_config.py b/tests/deepsparse/loggers/test_config.py index ebf6b8a89a..37c0b93cc2 100644 --- a/tests/deepsparse/loggers/test_config.py +++ b/tests/deepsparse/loggers/test_config.py @@ -47,7 +47,7 @@ def test_config_generates_default_json(): """ expected_dict = yaml.safe_load(expected_config) - default_dict = LoggingConfig().dict() + default_dict = LoggingConfig().model_dump() assert expected_dict == default_dict diff --git a/tests/deepsparse/pipelines/test_numpy_schemas.py b/tests/deepsparse/pipelines/test_numpy_schemas.py index ddb4555ff1..9e07a9d2b9 100644 --- a/tests/deepsparse/pipelines/test_numpy_schemas.py +++ b/tests/deepsparse/pipelines/test_numpy_schemas.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy -from pydantic import BaseModel, ValidationError +from pydantic.v1 import BaseModel, ValidationError import pytest from deepsparse.pipelines.numpy_schemas import Float32, NumpyArray, UInt8 diff --git a/tests/deepsparse/utils/test_data.py b/tests/deepsparse/utils/test_data.py index b38012dd35..d601e267bb 100644 --- a/tests/deepsparse/utils/test_data.py +++ b/tests/deepsparse/utils/test_data.py @@ -85,7 +85,7 @@ def test_prep_for_serialization(unserialized_output, target_serialized_output): def check_dict_equality(dict_1, dict_2): for key, value in dict_1.items(): if isinstance(value, BaseModel): - value = value.dict() + value = value.model_dump() check_dict_equality(value, dict_2[key].dict()) elif isinstance(value, dict): check_dict_equality(value, dict_2[key]) @@ -97,6 +97,6 @@ def check_dict_equality(dict_1, dict_2): assert value == dict_2[key] serialized_output = prep_for_serialization(unserialized_output) - serialized_output = serialized_output.dict() - target_serialized_output = target_serialized_output.dict() + serialized_output = serialized_output.model_dump() + target_serialized_output = target_serialized_output.model_dump() check_dict_equality(target_serialized_output, serialized_output) diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py index 95b8b5957e..3632de48d5 100644 --- a/tests/server/test_endpoints.py +++ b/tests/server/test_endpoints.py @@ -67,7 +67,7 @@ def test_docs_exist(self, client): def test_home_redirects_to_docs(self, client): response = client.get("/") assert response.status_code == 200 - assert response.request.path_url == "/docs" + assert str(response.request.url).endswith("/docs") assert len(response.history) > 0 assert response.history[-1].is_redirect @@ -295,13 +295,14 @@ def test_dynamic_add_and_remove_endpoint(engine_mock): assert response.status_code == 200 # remove /predict - response = client.delete( + response = client.request( + "DELETE", "/endpoints", json=EndpointConfig( route="/v2/models/test_model/infer", task="text-classification", model="default", - ).dict(), + ).model_dump(), ) assert response.status_code == 200 assert 404 == client.post("/predict", json=dict(sequences="asdf")).status_code diff --git a/tests/server/test_helpers.py b/tests/server/test_helpers.py index 259fada050..701578416f 100644 --- a/tests/server/test_helpers.py +++ b/tests/server/test_helpers.py @@ -26,9 +26,9 @@ class DummyOutputSchema(BaseModel): - field_1: Any - field_2: Any - field_3: Any + field_1: Any = None + field_2: Any = None + field_3: Any = None yaml_config_1 = """