diff --git a/.github/workflows/workflows_scans.yml b/.github/workflows/workflows_scans.yml index ace73b18751606..3711d81c8f819e 100644 --- a/.github/workflows/workflows_scans.yml +++ b/.github/workflows/workflows_scans.yml @@ -39,13 +39,13 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 + uses: github/codeql-action/init@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8 with: languages: "actions" build-mode: "none" - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 + uses: github/codeql-action/analyze@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8 with: category: "/language:actions" diff --git a/docs/articles_en/about-openvino.rst b/docs/articles_en/about-openvino.rst index 8c1552de072500..ec47f6d9f0962e 100644 --- a/docs/articles_en/about-openvino.rst +++ b/docs/articles_en/about-openvino.rst @@ -20,7 +20,6 @@ toolkit designed to optimize, accelerate, and deploy deep learning models for us OpenVINO is actively developed by Intel® to work efficiently on a wide range of Intel® hardware platforms, including CPUs (x86 and Arm), GPUs, and NPUs. - Features ############################################################## @@ -32,23 +31,16 @@ To learn about the main properties of OpenVINO, see the :doc:`Key Features `__ and `core components `__. - -OpenVINO Ecosystem -############################################################## - -Along with the primary components of model optimization and runtime, the toolkit also includes: - -* `Neural Network Compression Framework (NNCF) `__ - a tool for enhanced OpenVINO™ inference to get performance boost with minimal accuracy drop. -* :doc:`Openvino Notebooks `- Jupyter Python notebook, which demonstrate key features of the toolkit. -* `OpenVINO Model Server `__ - a server that enables scalability via a serving microservice. -* :doc:`OpenVINO Training Extensions ` – a convenient environment to train Deep Learning models and convert them using the OpenVINO™ toolkit for optimized inference. -* :doc:`Dataset Management Framework (Datumaro) ` - a tool to build, transform, and analyze datasets. +To learn more about how OpenVINO works, read the Developer documentation on its +`architecture `__ +and +`core components `__. Community ############################################################## -OpenVINO community plays a vital role in the growth and development of the open-sourced toolkit. Users can contribute to OpenVINO and get support using the following channels: +OpenVINO community plays a vital role in the growth and development of the open-sourced toolkit. +Users can contribute to OpenVINO and get support using the following channels: * `OpenVINO GitHub issues, discussions and pull requests `__ * `OpenVINO Blog `__ @@ -59,6 +51,7 @@ OpenVINO community plays a vital role in the growth and development of the open- Case Studies ############################################################## -OpenVINO has been employed in various case studies across a wide range of industries and applications, including healthcare, retail, safety and security, transportation, and more. Read about how OpenVINO enhances efficiency, accuracy, and safety in different sectors on the `success stories page `__. - - +OpenVINO has been employed in various case studies across a wide range of industries and +applications, including healthcare, retail, safety and security, transportation, and more. +Read about how OpenVINO enhances efficiency, accuracy, and safety in different sectors on the +`success stories page `__. diff --git a/docs/articles_en/about-openvino/openvino-ecosystem.rst b/docs/articles_en/about-openvino/openvino-ecosystem.rst index 765a5d87a46c2e..5593ce56af48fe 100644 --- a/docs/articles_en/about-openvino/openvino-ecosystem.rst +++ b/docs/articles_en/about-openvino/openvino-ecosystem.rst @@ -1,136 +1,30 @@ -OpenVINO™ Ecosystem Overview -============================== - +OpenVINO Ecosystem +================== .. meta:: - :description: OpenVINO™ ecosystem offers various resources for developing deep learning - solutions. - + :description: Explore the OpenVINO™ ecosystem of tools and resources for developing deep + learning solutions. .. toctree:: :maxdepth: 1 :hidden: - openvino-ecosystem/openvino-training-extensions - openvino-ecosystem/openvino-test-drive - openvino-ecosystem/datumaro - openvino-ecosystem/openvino-security-add-on - - - -OpenVINO™ is a big project, offering more than just the core runtime. This page will give -you an overview of a whole ecosystem of tools and solutions under the OpenVINO umbrella. - - -| **GenAI** -| :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` - -OpenVINO™ GenAI Library aims to simplify running inference of generative AI -models. Check the LLM-powered Chatbot Jupyter notebook to see how GenAI works. -|hr| - - -| **Neural Network Compression Framework** -| :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` - -A suite of advanced algorithms for Neural Network inference optimization with minimal accuracy -drop. NNCF applies quantization, filter pruning, binarization, and sparsity algorithms to PyTorch -and TensorFlow models during training. -|hr| - - -| **OpenVINO Model Server** -| :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` - -A high-performance system that can be used to access the host models via request to the model -server. -|hr| - - -| **OpenVINO Notebooks** -| :bdg-link-dark:`Github ` - :bdg-link-success:`Jupyter Notebook Collection ` - -A collection of Jupyter notebooks for learning and experimenting with the OpenVINO™ Toolkit. -|hr| - - -| **Hugging Face OpenVINO models** -| :bdg-link-info:`Model Collection ` - -A Hugging Face repository hosting pre-optimized OpenVINO IR models, so that you can use them -without the need to convert. -|hr| - - -| **OpenVINO Training Extensions** -| :bdg-link-dark:`Github ` - :bdg-link-success:`Overview Page ` - -A convenient environment to train Deep Learning models and convert them using the OpenVINO™ -toolkit for optimized inference. -|hr| - - -| **OpenVINO Security Addon** -| :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` - -A solution for Model Developers and Independent Software Vendors to use secure packaging and -secure model execution. -|hr| - - -| **Datumaro** -| :bdg-link-dark:`Github ` - :bdg-link-success:`Overview Page ` - -A framework and a CLI tool for building, transforming, and analyzing datasets. -|hr| - - -| **Intel® Geti™** -| :bdg-link-dark:`Github ` - :bdg-link-info:`Official Website ` - -Intel's new software for building computer vision -models in a fraction of the time and with less data. This software eases laborious -data labeling, model training and optimization tasks across the AI model -development process, empowering teams to produce custom AI models at scale. -|hr| - - -| **Intel® Test Drive** -| :bdg-link-dark:`Github ` - -OpenVINO™ Test Drive is cross-platform graphic user interface application that enables running -generative AI and vision models directly on your computer or edge device using OpenVINO™ Runtime. -|hr| - - -| **Tokenizers** -| :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` - -OpenVINO Tokenizers add text processing operations to OpenVINO. - -OpenVINO-based AI projects -########################## - -OpenVINO is used in many educational, multimodal, and AI projects in the fields of AI Audio, -Natural Language Processing, AI Computer Vision, Generative AI, operating systems and API extensions. -Examples of such projects are: `OpenVINO AI Plugins for GIMP `__, -`OpenVINO Code `__, and -`NVIDIA GPU Plugin `__. + OpenVINO Integrations + The OpenVINO Project + OpenVINO Adoptions -A detailed listing of OpenVINO-based AI projects is available in the -`Awesome OpenVINO repository `__ +OpenVINO™, as a toolkit should, involves multiple components and integrations that may be used +in various areas of your Deep Learning pipelines. This section will give you an overview of a +whole ecosystem of resources either developed under the OpenVINO umbrella, integrating it with +external solutions, or utilizing its potential. +| :doc:`OpenVINO Integrations <./openvino-ecosystem/openvino-integrations>` +| See what other tools OpenVINO is easily integrated with and how you can benefit from its + performance, without rewriting your software. -.. |hr| raw:: html +| :doc:`The OpenVINO project <./openvino-ecosystem/openvino-project>` +| Check out the most noteworthy components of the OpenVINO project. -
+| :doc:`OpenVINO adoptions <./openvino-ecosystem/openvino-adoptions>` +| Here, you will find information about a selection of software projects utilizing OpenVINO. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-adoptions.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-adoptions.rst new file mode 100644 index 00000000000000..b83a41837b8a2f --- /dev/null +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-adoptions.rst @@ -0,0 +1,57 @@ +OpenVINO Adoptions +========================== + +OpenVINO has been adopted by multiple AI projects in various areas. For an extensive list of +community-based projects involving OpenVINO, see the +`Awesome OpenVINO repository `__. + +Here is a small selection of adoptions, including proprietary and commercial tools: + +| **DaVinCI Resolve** +| :bdg-link-info:`Official Website ` + +DaVinci resolve is a professional video editing suite by Blackmagic Design. It uses OpenVINO to +run some of its industry-leading AI features. +|hr| + +| **OpenVINO AI Plugins for GIMP** +| :bdg-link-dark:`Official Repository ` + +Gimp is an image editor that has promoted open source values for over two decades. Now, you can +use generative AI directly in the application, thanks to the OpenVINO plugin, just like in the +leading graphics suites. +|hr| + +| **OpenVINO AI Plugins for Audacity** +| :bdg-link-info:`Official Website ` + :bdg-link-dark:`Official Repository ` + +Audacity is a hugely popular audio editing and recording application. Now, it offers AI-based +plugins running on OpenVINO, providing new effects, generators, and analyzers. +|hr| + +| **VisionGuard** +| :bdg-link-dark:`Official Repository ` + +A desktop tool developed within Google Summer of Code. Its aim is to help computer users battle +eye strain, utilizing gaze estimation. +|hr| + +| **OpenVINO Code** +| :bdg-link-dark:`Official Repository ` + +A coding assistant. A community-developed extension for Visual Studio Code, aiming to help +programmers by providing code completion and suggestions. +|hr| + +| **NVIDIA GPU Plugin** +| :bdg-link-dark:`Official Repository ` + +A device plugin for OpenVINO. A community-developed extension, enabling inference using +NVIDIA GPUs. +|hr| + + +.. |hr| raw:: html + +
diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-integrations.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-integrations.rst new file mode 100644 index 00000000000000..16283402a68c31 --- /dev/null +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-integrations.rst @@ -0,0 +1,182 @@ +OpenVINO™ Integrations +============================== + + +.. meta:: + :description: Check a list of integrations between OpenVINO and other Deep Learning solutions. + + + +.. = 1 ======================================================================================== + +**Hugging Face Optimum-Intel** + +|hr| + +.. grid:: 1 1 2 2 + :gutter: 4 + + .. grid-item:: + + | Grab and use models leveraging OpenVINO within the Hugging Face API. + The repository hosts pre-optimized OpenVINO IR models, so that you can use + them in your projects without the need for any adjustments. + | Benefits: + | - Minimize complex coding for Generative AI. + + .. grid-item:: + + * :doc:`Run inference with HuggingFace and Optimum Intel <../../openvino-workflow-generative/inference-with-optimum-intel>` + * `A notebook example: llm-chatbot `__ + * `Hugging Face Inference documentation `__ + * `Hugging Face Compression documentation `__ + * `Hugging Face Reference Documentation `__ + +.. dropdown:: Check example code + :animate: fade-in-slide-down + :color: secondary + + .. code-block:: py + + -from transformers import AutoModelForCausalLM + +from optimum.intel.openvino import OVModelForCausalLM + + from transformers import AutoTokenizer, pipeline + model_id = "togethercomputer/RedPajama-INCITE-Chat-3B-v1" + + -model = AutoModelForCausalLM.from_pretrained(model_id) + +model = OVModelForCausalLM.from_pretrained(model_id, export=True) + + +.. = 2 ======================================================================================== + +**OpenVINO Execution Provider for ONNX Runtime** + +|hr| + +.. grid:: 1 1 2 2 + :gutter: 4 + + .. grid-item:: + + | Utilize OpenVINO as a backend with your existing ONNX Runtime code. + | Benefits: + | - Enhanced inference performance on Intel hardware with minimal code modifications. + + .. grid-item:: + + * A notebook example: YOLOv8 object detection + * `ONNX User documentation `__ + * `Build ONNX RT with OV EP `__ + * `ONNX Examples `__ + + +.. dropdown:: Check example code + :animate: fade-in-slide-down + :color: secondary + + .. code-block:: cpp + + device = `CPU_FP32` + # Set OpenVINO as the Execution provider to infer this model + sess.set_providers([`OpenVINOExecutionProvider`], [{device_type` : device}]) + + +.. = 3 ======================================================================================== + +**Torch.compile with OpenVINO** + +|hr| + +.. grid:: 1 1 2 2 + :gutter: 4 + + .. grid-item:: + + | Use OpenVINO for Python-native applications by JIT-compiling code into optimized kernels. + | Benefits: + | - Enhanced inference performance on Intel hardware with minimal code modifications. + + .. grid-item:: + + * :doc:`PyTorch Deployment via torch.compile <../../openvino-workflow/torch-compile>` + * A notebook example: n.a. + * `torch.compiler documentation `__ + * `torch.compiler API reference `__ + +.. dropdown:: Check example code + :animate: fade-in-slide-down + :color: secondary + + .. code-block:: python + + import openvino.torch + + ... + model = torch.compile(model, backend='openvino') + ... + + + +.. = 4 ======================================================================================== + +**OpenVINO LLMs with LlamaIndex** + +|hr| + +.. grid:: 1 1 2 2 + :gutter: 4 + + .. grid-item:: + + | Build context-augmented GenAI applications with the LlamaIndex framework and enhance + runtime performance with OpenVINO. + | Benefits: + | - Minimize complex coding for Generative AI. + + .. grid-item:: + + * :doc:`LLM inference with Optimum-intel <../../openvino-workflow-generative/inference-with-optimum-intel>` + * `A notebook example: llm-agent-rag `__ + * + * `Inference documentation `__ + * `Rerank documentation `__ + * `Embeddings documentation `__ + * `API Reference `__ + +.. dropdown:: Check example code + :animate: fade-in-slide-down + :color: secondary + + .. code-block:: python + + ov_config = { + "PERFORMANCE_HINT": "LATENCY", + "NUM_STREAMS": "1", + "CACHE_DIR": "", + } + + ov_llm = OpenVINOLLM( + model_id_or_path="HuggingFaceH4/zephyr-7b-beta", + context_window=3900, + max_new_tokens=256, + model_kwargs={"ov_config": ov_config}, + generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, + messages_to_prompt=messages_to_prompt, + completion_to_prompt=completion_to_prompt, + device_map="cpu", + ) + + + + + + + + + +.. ============================================================================================ + +.. |hr| raw:: html + +
\ No newline at end of file diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project.rst new file mode 100644 index 00000000000000..2b32c4e54426a5 --- /dev/null +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project.rst @@ -0,0 +1,112 @@ +OpenVINO™ Project Overview +============================== + + +.. meta:: + :description: Check an overview of the most interesting components of the OpenVINO™ project. + + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino-project/openvino-training-extensions + openvino-project/datumaro + openvino-project/openvino-security-add-on + openvino-project/openvino-test-drive + + +This page provides an overview of the most noteworthy tools and components for AI developers, +hosted in repositories under the OpenVINO project: + +| **GenAI** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +OpenVINO™ GenAI Library simplifies running inference of generative AI +models. Check the LLM-powered Chatbot Jupyter notebook to see how GenAI works. +|hr| + +| **Neural Network Compression Framework** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +A suite of advanced algorithms for Neural Network inference optimization with minimal accuracy +drop. NNCF applies quantization, filter pruning, binarization, and sparsity algorithms to PyTorch +and TensorFlow models during training. +|hr| + +| **OpenVINO Model Server** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +A high-performance system that can be used to access the host models via request to the model +server. +|hr| + +| **OpenVINO Notebooks** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`Jupyter Notebook Collection ` + +A collection of Jupyter notebooks for learning and experimenting with the OpenVINO™ Toolkit. +|hr| + +| **OpenVINO Training Extensions** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`Overview Page ` + +A convenient environment to train Deep Learning models and convert them using the OpenVINO™ +toolkit for optimized inference. +|hr| + +| **OpenVINO Security Addon** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +A solution for Model Developers and Independent Software Vendors to use secure packaging and +secure model execution. +|hr| + +| **OpenVINO Test Drive** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`Overview Page ` + +A cross-platform graphic user interface application for running and testing generative and +vision AI models on computers or edge devices. +|hr| + +| **Datumaro** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`Overview Page ` + +A framework and a CLI tool for building, transforming, and analyzing datasets. +|hr| + +| **Intel® Geti™** +| :bdg-link-dark:`GitHub ` + :bdg-link-info:`Official Website ` + +Intel's new software for building computer vision +models in a fraction of the time and with less data. This software eases laborious +data labeling, model training and optimization tasks across the AI model +development process, empowering teams to produce custom AI models at scale. +|hr| + +| **Tokenizers** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +OpenVINO Tokenizers add text processing operations to OpenVINO. +|hr| + +| **OpenVINO's Open Model Zoo** +| :bdg-link-dark:`GitHub ` + :bdg-link-success:`User Guide ` + +Open Model Zoo includes optimized deep learning models and a set of demos to +expedite development of high-performance deep learning inference applications. + + +.. |hr| raw:: html + +
diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/datumaro.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/datumaro.rst similarity index 98% rename from docs/articles_en/about-openvino/openvino-ecosystem/datumaro.rst rename to docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/datumaro.rst index 36fed2543878fe..0b02d63045f59f 100644 --- a/docs/articles_en/about-openvino/openvino-ecosystem/datumaro.rst +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/datumaro.rst @@ -19,7 +19,7 @@ Plus, enjoy `Jupyter notebooks ` on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. +The OpenVINO™ Security Add-on works with the :doc:`OpenVINO™ Model Server <../../../openvino-workflow/model-server/ovms_what_is_openvino_model_server>` on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. The OpenVINO™ Security Add-on consists of three components that run in Kernel-based Virtual Machines (KVMs). These components provide a way to run security-sensitive operations in an isolated environment. A brief description of the three components are as follows. Click each triangled line for more information about each. @@ -45,7 +45,7 @@ The OpenVINO™ Security Add-on consists of three components that run in Kernel- **Where the OpenVINO™ Security Add-on Fits into Model Development and Deployment** -.. image:: ../../assets/images/ovsa_diagram.svg +.. image:: ../../../assets/images/ovsa_diagram.svg The binding between SWTPM (vTPM used in guest VM) and HW TPM (TPM on the host) is explained in `this document. `__ @@ -743,7 +743,7 @@ The following figure describes the interactions between the Model Developer, Ind The Model Developer/Independent Software Vendor and User roles are related to virtual machine use and one person might fill the tasks required by multiple roles. In this document the tasks of Model Developer and Independent Software Vendor are combined and use the Guest VM named ``ovsa_isv``. It is possible to have all roles set up on the same Host Machine. -.. image:: ../../assets/images/ovsa_example.svg +.. image:: ../../../assets/images/ovsa_example.svg Model Developer Instructions ++++++++++++++++++++++++++++ diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-test-drive.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-test-drive.rst similarity index 95% rename from docs/articles_en/about-openvino/openvino-ecosystem/openvino-test-drive.rst rename to docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-test-drive.rst index 602a2b8ec24eb2..703bf5f9976350 100644 --- a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-test-drive.rst +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-test-drive.rst @@ -50,20 +50,20 @@ Inference of models from Hugging Face 1. Find a model on `Hugging Face `__ and import it. - .. image:: ../../assets/images/TestDrive_llm_import.gif + .. image:: ../../../assets/images/TestDrive_llm_import.gif :align: center :alt: how to import a model to test drive 2. Chat with LLMs via the `Playground` tab. - .. image:: ../../assets/images/TestDrive_llm_model_chat.gif + .. image:: ../../../assets/images/TestDrive_llm_model_chat.gif :align: center :alt: chatting with llm models in test drive 3. Use the `Performance metrics` tab to get model performance metrics on your computer or an edge device. - .. image:: ../../assets/images/TestDrive_llm_metrics.gif + .. image:: ../../../assets/images/TestDrive_llm_metrics.gif :align: center :alt: verifying llm performance in test drive @@ -74,7 +74,7 @@ Inference of models trained with Intel® Geti™ by Intel® Geti™ (refer to the `Intel® Geti™ documentation `__ for more details). - .. image:: ../../assets/images/TestDrive_geti_download.gif + .. image:: ../../../assets/images/TestDrive_geti_download.gif :align: center :alt: verifying llm performance in test drive diff --git a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-training-extensions.rst b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-training-extensions.rst similarity index 94% rename from docs/articles_en/about-openvino/openvino-ecosystem/openvino-training-extensions.rst rename to docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-training-extensions.rst index 8a5bd91f9c1b7b..ea79aa65884c72 100644 --- a/docs/articles_en/about-openvino/openvino-ecosystem/openvino-training-extensions.rst +++ b/docs/articles_en/about-openvino/openvino-ecosystem/openvino-project/openvino-training-extensions.rst @@ -16,7 +16,7 @@ inference. It allows you to export and convert the models to the needed format. Detailed Workflow ################# -.. image:: ../../assets/images/training_extensions_framework.png +.. image:: ../../../assets/images/training_extensions_framework.png 1. To start working with OpenVINO Training Extensions, prepare and annotate your dataset. For example, on CVAT. @@ -25,7 +25,7 @@ Detailed Workflow .. note:: Prepare a separate dataset or split the dataset you have for more accurate quality evaluation. -3. Having successful evaluation results received, you have an opportunity to deploy your model or continue optimizing it, using NNCF. For more information about these frameworks, go to :doc:`Optimization Guide <../../openvino-workflow/model-optimization>`. +3. Having successful evaluation results received, you have an opportunity to deploy your model or continue optimizing it, using NNCF. For more information about these frameworks, go to :doc:`Optimization Guide <../../../openvino-workflow/model-optimization>`. If the results are unsatisfactory, add datasets and perform the same steps, starting with dataset annotation. diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 70af3ce17566e6..2ffe9f04d6b922 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -26,71 +26,356 @@ OpenVINO Release Notes What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* . +* More GenAI coverage and framework integrations to minimize code changes. + + * New models supported: Qwen 2.5. + * Whisper Model: Improved performance on CPUs, built-in GPUs, and discrete GPUs with GenAI API. + * Preview: Introducing NPU support for torch.compile, giving developers the ability to use the + OpenVINO backend to run the PyTorch API on NPUs. 300+ deep learning models enabled from the + TorchVision, Timm, and TorchBench repositories. + +* Broader Large Language Model (LLM) support and more model compression techniques. + + * Preview: Addition of Prompt Lookup to GenAI API improves 2nd token latency for LLMs by + effectively utilizing predefined prompts that match the intended use case. + * Asymmetric KV Cache compression is now enabled for INT8 on CPUs, resulting in lower + memory consumption and improved 2nd token latency, especially when dealing with long prompts + that require significant memory. The option should be explicitly specified by the user. + +* More portability and performance to run AI at the edge, in the cloud, or locally. + + * Support for the latest Intel® Core™ Ultra 200H series processors (formerly codenamed Arrow + Lake-H) + * Preview: The GenAI API now offers image-to-image inpainting functionality. This feature + enables models to generate realistic content by inpainting specified modifications and + seamlessly integrating them with the original image. + * Integration of the OpenVINO backend with the Triton Inference Server allows developers to + utilize the Triton server for enhanced model serving performance when deploying on Intel + CPUs. + * Preview: A new OpenVINO backend integration allows developers to leverage OpenVINO + performance optimizations directly within Keras 3 workflows for faster AI inference on + Intel® CPUs, built-in GPUs, discrete GPUs, and NPUs. This feature is available with the + latest Keras 3.8 release. + + +Now Deprecated ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +* Legacy prefixes (l_,w_,m_) have been removed from OpenVINO archive names. +* The `runtime` namespace for Python API has been marked as deprecated and designated to be + removed for 2026.0. The new namespace structure has been delivered, and migration is possible + immediately. Details will be communicated through warnings and via documentation. +* NNCF create_compressed_model() method is deprecated. nncf.quantize() method is now + recommended for Quantization-Aware Training of PyTorch and TensorFlow models. +OpenVINO™ Runtime ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Common +----------------------------------------------------------------------------------------------- +* Support for Python 3.13 has been enabled for OpenVINO Runtime. Tools, like NNCF will follow + based on their dependency's readiness. + + +AUTO Inference Mode +----------------------------------------------------------------------------------------------- + +* The issue where AUTO failed to load models to NPU, found on Intel® Core™ Ultra 200V processors + platform only, has been fixed. +* When ov::CompiledModel, ov::InferRequest, ov::Model are defined as static variables, the APP + crash issue during exiting has been fixed. -OpenVINO™ Runtime -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ CPU Device Plugin ----------------------------------------------------------------------------------------------- -* . -* . +* Intel® Core™ Ultra 200H processors (formerly code named Arrow Lake-H) are now fully supported. +* Asymmetric 8bit key-value cache compression is now enabled on CPU by default, reducing memory + usage and memory bandwidth consumption for large language models and improving performance + for 2nd token generation. Asymmetric 4bit key-value cache compression on CPU is now supported + as an option to further reduce memory consumption. +* Performance of models running in FP16 on 6th generation of Intel® Xeon® processors with P-core + has been enhanced by improving utilization of the underlying AMX FP16 capabilities. +* LLM performance has been improved on CPU when using OpenVINO GenAI APIs with the continuous + batching feature. +* Performance of depth-wise convolution neural networks has been improved. +* CPU platforms where some CPU cores are disabled in the system, which is used in some + virtualization or real-time system configurations, are now supported. + GPU Device Plugin ----------------------------------------------------------------------------------------------- -* . +* Intel® Core™ Ultra 200H processors (formerly code named Arrow Lake-H) are now fully supported. +* ScaledDotProductAttention (SDPA) operator has been enhanced, improving LLM performance for + OpenVINO GenAI APIs with continuous batching and SDPA-based LLMs with long prompts (>4k). +* Stateful models are now enabled, significantly improving performance of Whisper models on all + GPU platforms. +* Stable Diffusion 3 and Flux.1 performance has been improved. +* The issue of a black image output for image generation models, including SDXL, SD3, and + Flux.1, with FP16 precision has been solved. NPU Device Plugin ----------------------------------------------------------------------------------------------- -* . +* Performance has been improved for Channel-Wise symmetrically quantized LLMs, including + Llama2-7B-chat, Llama3-8B-instruct, qwen-2-7B, Mistral-0.2-7B-instruct, phi-3-mini-4K-instruct, + miniCPM-1B models. The best performance is achieved using fp16-in4 quantized models. +* Preview: Introducing NPU support for torch.compile, giving developers the ability to use the + OpenVINO backend to run the PyTorch API on NPUs. 300+ deep learning models enabled from + the TorchVision, Timm, and TorchBench repositories. +OpenVINO Python API +----------------------------------------------------------------------------------------------- +* Ov:OpExtension feature has been completed for Python API. It will enable users to experiment + with models and operators that are not officially supported, directly with python. It's + equivalent to the well-known add_extension option for C++. +* Constant class has been extended with get_tensor_view and get_strides methods that will allow + advanced users to easily manipulate Constant and Tensor objects, to experiment with data flow + and processing. -OpenVINO.GenAI +OpenVINO Node.js API +----------------------------------------------------------------------------------------------- + +* OpenVINO tokenizer bindings for JavaScript are now available via the + `npm package `__. + This is another OpenVINO tool available for JavaScript developers in a way that is most + natural and easy to use and extends capabilities we are delivering to that ecosystem. + + +TensorFlow Framework Support +----------------------------------------------------------------------------------------------- + +* The following has been fixed: + + * Output of TensorListLength to be a scalar. + * Support of corner cases for ToBool op such as scalar input. + * Correct output type for UniqueWithCounts. + +PyTorch Framework Support +----------------------------------------------------------------------------------------------- + +* Preview: Introducing NPU support for torch.compile , giving developers the ability to use + the OpenVINO backend to run the PyTorch API on NPUs. 300+ deep learning models enabled from + the TorchVision, Timm, and TorchBench repositories. +* Preview: Support conversion of PyTorch models with AWQ weights compression, enabling models + like SauerkrautLM-Mixtral-8x7B-AWQ and similar. + + +OpenVINO Python API +----------------------------------------------------------------------------------------------- + +* JAX 0.4.38 is now supported. + + +Keras 3 Multi-backend Framework Support +----------------------------------------------------------------------------------------------- + +* Preview: with Keras 3.8, inference-only OpenVINO backend is introduced, for running model + predictions using OpenVINO in Keras 3 workflow. To switch to the OpenVINO backend, set the + KERAS_BACKEND environment variable to "openvino". It supports base operations to infer + convolutional and transformer models such as MobileNet and Bert from Keras Hub. + + Note: The OpenVINO backend may currently lack support for some operations. This will be + addressed in upcoming Keras releases as operation coverage is being expanded + + +ONNX Framework Support +----------------------------------------------------------------------------------------------- + +* Runtime memory consumption for models with quantized weight has been reduced. +* Models from the com.microsoft domain that use the following operations are now enabled: + SkipSimplifiedLayerNormalization, SimplifiedLayerNormalization, FusedMatMul, QLinearSigmoid, + QLinearLeakyRelu, QLinearAdd, QLinearMul, Range, DynamicQuantizeMatMul, MatMulIntegerToFloat. +* Workflow which affected reading of 2 bytes data types has been fixed. + + + + +OpenVINO Model Server +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The encrypted_model_causal_lm sample is now available, showing how to decrypt a model. +* New feature: Windows native server deployment + * Model server deployment is now available as a binary application on Windows operating + systems. + * Generative endpoints are fully supported, including text generation and embeddings based on + the OpenAI API, and reranking based on the Cohere API. + * Functional parity with the Linux version is available with minor differences. + * The feature is targeted at client machines with Windows 11 and Data Center environment + with Windows 2022 Server OS. + * Demos have been updated to work on both Linux and Windows. Check the + `installation guide `__ +* The following is now officially supported: + * Intel® Arc™ B-Series Graphics + * Intel® Core™ Ultra 200V and 200S Processors CPU, iGPU, and NPU. -Other Changes and Known Issues +* Image base OSes have been updated: + dropped Ubuntu20 and Red Hat UBI 8, added Ubuntu24 and Red Hat UBI9. + +* The following has been added: + + * Truncate option in the embeddings endpoint. It is now possible to export the embeddings + model and automatically truncate the input to match the embeddings context length. + By default, an error is raised if the input is too long. + * Speculative decoding algorithm in text generation. Check + `the demo `__. + * Direct support for models without named outputs. For models without named outputs, generic + names are assigned during model initialization using the pattern ``out_``. + * Chat/completions have been extended to support max_completion_tokens parameter and message + content as an array, ensuring API compatibility with OpenAI API. + * Histogram metric for tracking pipeline processing duration. + * Security and stability improvements. + +* The following has been fixed: + + * Cancelling text generation for disconnected clients. + * Detecting of the model context length for embeddings endpoint. + + +Neural Network Compression Framework +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -Jupyter Notebooks ------------------------------ +* Post-training quantization time with the Fast Bias Correction algorithm has been reduced. +* Model compression time with nncf.compress_weights() has been reduced significantly. +* Added a new method quantize_pt2e() for accurate quantization of Torch FX models with NNCF + algorithms for different non-OpenVINO torch.compile() backends. +* Introduced OpenVINOQuantizer class inherited from PyTorch 2 Quantizer for more accurate and + efficient quantized PyTorch models for deployments with OpenVINO. +* Added support for nncf.quantize() method as the initialization step for Quantization-Aware + Training for TensorFlow models. +* NNCF create_compressed_model() method is deprecated. nncf.quantize() method is now + recommended for Quantization-Aware Training of PyTorch and TensorFlow models. + + +OpenVINO Tokenizers ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +* WordLevel tokenizer/detokenizer and WordPiece detokenizer models are now supported. +* UTF-8 (UCS Transformation Format 8) validation with replacement is now enabled by default in + detokenizer. +* New models are supported: GLM Edge, ModernBERT, BART-G2P. + + +OpenVINO.GenAI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The following has been added: +* Samples + * Restructured the samples folder, grouping the samples by use case. + * ChunkStreamer for multinomial_causal_lm.py increasing performance for smaller LLMs. + * Imageimage and inpainting image generation samples. + * Progress bar for cpp/image_generation samples. +* Python API specific + * PYI file describing Python API. + * TorchGenerator which wraps torch.Generator for random generation. +* WhisperPipeline + * Stateful decoder for WhisperPipeline. Whisper decoder models with past are deprecated. + * Export a model with new optimum-intel to obtain stateful version. + * Performance metrics for WhisperPipeline. + * initial_prompt and hotwords parameters for whisper pipeline allowing to guide generation. +* LLMPipeline + * LoRA support for speculative decoding and continuous batching backend. + * Prompt lookup decoding with LoRA support. +* Image generation + * Image2Image and Inpainting pipelines which currently support only Unet-based pipelines. + * rng_seed parameter to ImageGenerationConfig. + * Callback for image generation pipelines allowing to track generation progress and obtain + intermediate results. + * EulerAncestralDiscreteScheduler - SDXL turbo. + * PNDMScheduler – Stable Diffusion 1.x and 2.x. + * Models: black-forest-labs/FLUX.1-schnell, Freepik/flux.1-lite-8B-alpha, + black-forest-labs/FLUX.1-dev shuttleai/shuttle-3-diffusion. + * T5 encoder for SD3 Pipeline. -Previous 2025 releases +* VLMPipeline + + * Qwen2VL support. + * Performance metrics. + +* Enabled streaming with non-empty stop_strings. + + +Other Changes and Known Issues +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Jupyter Notebooks +----------------------------- +* `Janus Pro `__ +* `Running LLMs with OpenVINO and LocalAI `__ +* `GLM-V-Edge `__ +* `Multimodal RAG with Llamaindex `__ +* `OmniGen `__ +* `Sana `__ +* `LTX Video `__ +* `Image-to-Image generation using OpenVINO GenAI `__ +* `Inpainting using OpenVINO GenAI `__ +* `RAG using OpenVINO GenAI and LangChain `__ +* `LLM chatbot `__ + extended with GLM-Edge, Phi4, and Deepseek-R1 distilled models + + +Known Issues +----------------------------- -.. dropdown:: 2024.6 - 18 December 2024 - :animate: fade-in-slide-down - :color: secondary +| **Component: OVC** +| ID: 160167 +| Description: +| TensorFlow Object Detection models converted to the IR through the OVC tool gives poor + performance on CPU and GPU devices. As a workaround, please use the MO tool from 2024.6 or + earlier to generate IRs. +| **Component: Tokenizers** +| ID: 159392 +| Description: +| ONNX model fails to convert when openvino-tokenizers is installed. As a workaround please + uninstall openvino-tokenizers to convert ONNX model to the IR. +| **Component: CPU Plugin** +| ID: 161336 +| Description: +| Compilation of an openvino model performing weight quantization fails with Segmentation + Fault on LNL. The following workaround can be applied to make it work with existing OV + versions (including 25.0 RCs) before application run: export DNNL_MAX_CPU_ISA=AVX2_VNNI. + +| **Component: GPU Plugin** +| ID: 160802 +| Description: +| mllama model crashes on LNL. Please use OpenVINO 2024.6 or earlier to run the model. + +| **Component: GPU Plugin** +| ID: 160948 +| Description: +| Several models have accuracy degradation on LNL, ACM, and BMG. Please use OpenVINO 2024.6 + to run the models. Model list: Denoise, Sharpen-Sharpen, fastseg-small, hbonet-0.5, + modnet_photographic_portrait_matting, modnet_webcam_portrait_matting, + mobilenet-v3-small-1.0-224, nasnet-a-mobile-224, yolo_v4, yolo_v5m, yolo_v5s, yolo_v8n, + yolox-tiny, yolact-resnet50-fpn-pytorch. + + + + +.. Previous 2025 releases +.. +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -105,8 +390,8 @@ Deprecation And Support Using deprecated features and components is not advised. They are available to enable a smooth transition to new solutions and will be discontinued in the future. To keep using discontinued features, you will have to revert to the last LTS OpenVINO version supporting them. -For more details, refer to the `OpenVINO Legacy Features and Components __` -page. +For more details, refer to: +`OpenVINO Legacy Features and Components `__. diff --git a/docs/articles_en/assets/images/deploy_encrypted_model.svg b/docs/articles_en/assets/images/deploy_encrypted_model.svg index fa897731b54fef..3287667eeb8feb 100644 --- a/docs/articles_en/assets/images/deploy_encrypted_model.svg +++ b/docs/articles_en/assets/images/deploy_encrypted_model.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:454a531a9b2d2883ac9a6beb01ce7ecdd7ec69ea2c68d63b39b65f3780c957fe -size 54772 +oid sha256:02e27919487f371839ec4773fa1cdc1063f59b1a5cdd9875faf9ac75f57181d5 +size 85978 diff --git a/docs/articles_en/assets/snippets/ov_custom_op.py b/docs/articles_en/assets/snippets/ov_custom_op.py new file mode 100644 index 00000000000000..9d8bdd7ce10bae --- /dev/null +++ b/docs/articles_en/assets/snippets/ov_custom_op.py @@ -0,0 +1,45 @@ +# Copyright (C) 2018-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# ! [op:common_include] +from openvino import Op +# ! [op:common_include] + + + +# ! [op:header] +class Identity(Op): +# ! [op:header] + +# ! [op:ctor] + def __init__(self, inputs=None, **attrs): + super().__init__(self, inputs) + self._attrs = attrs +# ! [op:ctor] + +# ! [op:validate] + def validate_and_infer_types(self): + self.set_output_type(0, self.get_input_element_type(0), self.get_input_partial_shape(0)) +# ! [op:validate] + +# ! [op:copy] + def clone_with_new_inputs(self, new_inputs): + return Identity(new_inputs) +# ! [op:copy] + +# ! [op:evaluate] + def evaluate(self, outputs, inputs): + outputs[0].shape = inputs[0].shape + inputs[0].copy_to(outputs[0]) + return True + + def has_evaluate(self): + return True +# ! [op:evaluate] + +# ! [op:visit_attributes] + def visit_attributes(self, visitor): + visitor.on_attributes(self._attrs) + return True +# ! [op:visit_attributes] diff --git a/docs/articles_en/assets/snippets/ov_extensions.py b/docs/articles_en/assets/snippets/ov_extensions.py index bf8b7db52dbf3f..141fcb912f9bf2 100644 --- a/docs/articles_en/assets/snippets/ov_extensions.py +++ b/docs/articles_en/assets/snippets/ov_extensions.py @@ -3,18 +3,30 @@ # import openvino as ov +from ov_custom_op import Identity #! [py_frontend_extension_ThresholdedReLU_header] -import openvino.runtime.opset12 as ops +import openvino.runtime.opset14 as ops from openvino.frontend import ConversionExtension #! [py_frontend_extension_ThresholdedReLU_header] #! [add_extension] -# Not implemented +core = ov.Core() + +# Use operation type to add operation extension +core.add_extension(Identity) + +# or you can add operation extension object which is equivalent form +core.add_extension(ov.OpExtension(Identity)) #! [add_extension] #! [add_frontend_extension] -# Not implemented +# Register more sophisticated mapping with decomposition +def conversion(node): + input_node = node.get_input(0) + return Identity(input_node).outputs() + +core.add_extension(ConversionExtension("Identity", conversion)) #! [add_frontend_extension] from utils import get_path_to_extension_library diff --git a/docs/articles_en/documentation.rst b/docs/articles_en/documentation.rst index f1c240066f5f05..f1a229a4bb059e 100644 --- a/docs/articles_en/documentation.rst +++ b/docs/articles_en/documentation.rst @@ -16,20 +16,28 @@ Documentation Compatibility and Support Legacy Features OpenVINO Extensibility - OpenVINO™ Security + OpenVINO Security -This section provides reference documents that guide you through the OpenVINO toolkit workflow, from preparing models, optimizing them, to deploying them in your own deep learning applications. +This section provides reference documents for the OpenVINO toolkit, such as API and Operation +listing. | :doc:`API Reference doc path ` -| A collection of reference articles for OpenVINO C++, C, and Python APIs. +| A collection of reference articles for OpenVINO C++, C, Node.js, and Python APIs, as well as + the Python API for OpenVINO GenAI. -| :doc:`OpenVINO Ecosystem ` -| Apart from the core components, OpenVINO offers tools, plugins, and expansions revolving around it, even if not constituting necessary parts of its workflow. This section gives you an overview of what makes up the OpenVINO toolkit. +| :doc:`OpenVINO IR format ` +| A section describing the OpenVINO IR model format and its opsets. + +| :doc:`Legacy Features ` +| The information on all OpenVINO components that have recently been deprecated or discontinued. | :doc:`OpenVINO Extensibility Mechanism ` -| The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with various frameworks, including TensorFlow, PyTorch, ONNX, TensorFlow Lite, and PaddlePaddle. Learn how to extend OpenVINO functionality with custom settings. +| The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with + various frameworks, including TensorFlow, PyTorch, ONNX, TensorFlow Lite, and PaddlePaddle. + Learn how to extend OpenVINO functionality with custom settings. | :doc:`OpenVINO™ Security ` -| Learn how to use OpenVINO securely and protect your data to meet specific security and privacy requirements. +| Learn how to use OpenVINO securely and protect your data to meet specific security and privacy + requirements. diff --git a/docs/articles_en/documentation/compatibility-and-support/supported-devices.rst b/docs/articles_en/documentation/compatibility-and-support/supported-devices.rst index 8708f9f4f81ad3..b249b418634258 100644 --- a/docs/articles_en/documentation/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/documentation/compatibility-and-support/supported-devices.rst @@ -43,7 +43,6 @@ Feature Support and API Coverage :doc:`Multi-stream execution <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` Yes Yes No :doc:`Model caching <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview>` Yes Partial Yes :doc:`Dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` Yes Partial No - :doc:`Import/Export <../../about-openvino/openvino-ecosystem>` Yes Yes Yes :doc:`Preprocessing acceleration <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>` Yes Yes No :doc:`Stateful models <../../openvino-workflow/running-inference/stateful-models>` Yes Yes Yes :doc:`Extensibility <../../documentation/openvino-extensibility>` Yes Yes No diff --git a/docs/articles_en/documentation/openvino-extensibility.rst b/docs/articles_en/documentation/openvino-extensibility.rst index 4be585bbcec367..eaf5dfe890bb9b 100644 --- a/docs/articles_en/documentation/openvino-extensibility.rst +++ b/docs/articles_en/documentation/openvino-extensibility.rst @@ -51,6 +51,8 @@ If such decomposition is not possible or appears too bulky with a large number o You might prefer implementing a custom operation class if you already have a generic C++ implementation of operation kernel. Otherwise, try to decompose the operation first, as described above. Then, after verifying correctness of inference and resulting performance, you may move on to optional implementation of Bare Metal C++. +Additionally, it is also possible to implement custom operations using Python. OpenVINO provides a Python API that allows you to define and register custom operations. This can be particularly useful for rapid prototyping and testing of new operations. + Mapping from Framework Operation ################################ @@ -110,9 +112,7 @@ The ``Identity`` is a custom operation class defined in :doc:`Custom Operation G :language: cpp :fragment: [add_frontend_extension] -When Python API is used, there is no way to implement a custom OpenVINO operation. Even if custom OpenVINO operation is implemented in C++ and loaded into the runtime by a shared library, there is still no way to add a frontend mapping extension that refers to this custom operation. In this case, use C++ shared library approach to implement both operations semantics and framework mapping. - -Python can still be used to map and decompose operations when only operations from the standard OpenVINO operation set are used. +If custom OpenVINO operation is implemented in C++ and loaded into the runtime through a shared library, there is no way to add a frontend mapping extension that refers to this custom operation. In this case, use C++ shared library approach to implement both operations semantics and framework mapping. .. _create_a_library_with_extensions: diff --git a/docs/articles_en/documentation/openvino-extensibility/custom-openvino-operations.rst b/docs/articles_en/documentation/openvino-extensibility/custom-openvino-operations.rst index c6ffcfc4abd72c..9c9845c33d944e 100644 --- a/docs/articles_en/documentation/openvino-extensibility/custom-openvino-operations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/custom-openvino-operations.rst @@ -7,30 +7,62 @@ Custom OpenVINO Operations custom operations to support models with operations not supported by OpenVINO. -OpenVINO™ Extension API allows you to register custom operations to support models with operations which OpenVINO™ does not support out-of-the-box. This capability requires writing code in C++, so if you are using Python to develop your application you need to build a separate shared library implemented in C++ first and load it in Python using ``add_extension`` API. Please refer to :ref:`Create library with extensions ` for more details on library creation and usage. The remaining part of this document describes how to implement an operation class. +OpenVINO™ Extension API allows you to register custom operations to support models with operations which OpenVINO™ does not support out-of-the-box. A custom operation might be implemented both in C++ and Python.``` + +Also it is possible to create a shared library with custom operation implemented in C++ first and load it using ``add_extension`` API. Please refer to :ref:`Create library with extensions ` for more details on library creation and usage. The remaining part of this document describes how to implement an operation class using both the C++ API and Python API. Operation Class ############### -To add your custom operation, create a new class that extends ``ov::Op``, which is in turn derived from ``ov::Node``, the base class for all graph operations in OpenVINO™. To add ``ov::Op``, include the next file: +.. tab-set:: + + + .. tab-item:: Python + :sync: py + + To add your custom operation, create a new class that extends ``openvino.Op``, which is in turn derived from ``openvino.Node``, the base class for all graph operations in OpenVINO™. To add ``openvino.Op`` you need to import it. + + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:common_include] + + Follow the steps below to add a simple custom operation: + + 1. Define the ``__init__`` method to initialize the class with inputs and attributes. + + 2. Override the shape inference method ``validate_and_infer_types``. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the ``get_input_partial_shape()`` and ``get_input_element_type()`` methods of ``openvino.Node``. Set the inferred shape and element type of the output using ``set_output_type``. + + 3. Override the ``visit_attributes`` method, which enables serialization and deserialization of operation attributes. An ``AttributeVisitor`` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware ``on_attribute`` helper. Helpers are already implemented for standard types like ``int``, ``float``, ``bool``, ``vector``, and for existing OpenVINO defined types. + + 4. Override ``evaluate`` method with the code that will run when this operation is encountered in the model graph during the model inference. It works only for CPU device and enables OpenVINO runtime to run your arbitrary Python code as a part of model inference. If your operation contains ``evaluate`` method you also need to override the ``has_evaluate`` method which returns `True`, this method allows to get information about availability of ``evaluate`` method for the operation. + + 5. Override the ``clone_with_new_inputs``, which is an optional method that graph manipulation routines to create copies of this operation and connect it to different nodes during optimization. + + .. tab-item:: C++ + :sync: cpp + + To add your custom operation, create a new class that extends ``ov::Op``, which is in turn derived from ``ov::Node``, the base class for all graph operations in OpenVINO™. To add ``ov::Op``, include the next file: -.. doxygensnippet:: src/core/template_extension/identity.hpp - :language: cpp - :fragment: [op:common_include] -Follow the steps below to add a custom operation: + .. doxygensnippet:: src/core/template_extension/identity.hpp + :language: cpp + :fragment: [op:common_include] + + Follow the steps below to add a custom operation: -1. Add the ``OPENVINO_OP`` macro which defines a ``NodeTypeInfo`` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an operation currently consists of a string operation identifier and a string for operation version. + 1. Add the ``OPENVINO_OP`` macro. The type info of an operation consists of a string operation identifier and a string for operation version. -2. Implement default constructor and constructors that optionally take the operation inputs and attributes as parameters. + 2. Implement default constructor and constructors that optionally take the operation inputs and attributes as parameters. -3. Override the shape inference method ``validate_and_infer_types``. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the ``get_input_partial_shape()`` and ``get_input_element_type()`` methods of ``ov::Node``. Set the inferred shape and element type of the output using ``set_output_type``. + 3. Override the shape inference method ``validate_and_infer_types``. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the ``get_input_partial_shape()`` and ``get_input_element_type()`` methods of ``ov::Node``. Set the inferred shape and element type of the output using ``set_output_type``. -4. Override the ``clone_with_new_inputs`` method, which enables graph manipulation routines to create copies of this operation and connect it to different nodes during optimization. + 4. Override the ``clone_with_new_inputs`` method, which enables graph manipulation routines to create copies of this operation and connect it to different nodes during optimization. -5. Override the ``visit_attributes`` method, which enables serialization and deserialization of operation attributes. An ``AttributeVisitor`` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware ``on_attribute`` helper. Helpers are already implemented for standard C++ types like ``int64_t``, ``float``, ``bool``, ``vector``, and for existing OpenVINO defined types. + 5. Override the ``visit_attributes`` method, which enables serialization and deserialization of operation attributes. An ``AttributeVisitor`` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware ``on_attribute`` helper. Helpers are already implemented for standard C++ types like ``int64_t``, ``float``, ``bool``, ``vector``, and for existing OpenVINO defined types. + + 6. Override ``evaluate`` method, which enables fallback of some devices to this implementation and the application of constant folding if there is a custom operation on the constant branch. If your operation contains ``evaluate`` method you also need to override the ``has_evaluate`` method, this method allows to get information about availability of ``evaluate`` method for the operation. -6. Override ``evaluate``, which is an optional method that enables fallback of some devices to this implementation and the application of constant folding if there is a custom operation on the constant branch. If your operation contains ``evaluate`` method you also need to override the ``has_evaluate`` method, this method allows to get information about availability of ``evaluate`` method for the operation. Based on that, declaration of an operation class can look as follows: @@ -43,43 +75,103 @@ OpenVINO™ operation contains two constructors: * Default constructor, which enables you to create an operation without attributes * Constructor that creates and validates an operation with specified inputs and attributes -.. doxygensnippet:: src/core/template_extension/identity.cpp - :language: cpp - :fragment: [op:ctor] +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:ctor] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: src/core/template_extension/identity.cpp + :language: cpp + :fragment: [op:ctor] ``validate_and_infer_types()`` ++++++++++++++++++++++++++++++ ``ov::Node::validate_and_infer_types`` method validates operation attributes and calculates output shapes using attributes of the operation. -.. doxygensnippet:: src/core/template_extension/identity.cpp - :language: cpp - :fragment: [op:validate] +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:validate] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: src/core/template_extension/identity.cpp + :language: cpp + :fragment: [op:validate] ``clone_with_new_inputs()`` +++++++++++++++++++++++++++ ``ov::Node::clone_with_new_inputs`` method creates a copy of the operation with new inputs. -.. doxygensnippet:: src/core/template_extension/identity.cpp - :language: cpp - :fragment: [op:copy] +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:copy] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: src/core/template_extension/identity.cpp + :language: cpp + :fragment: [op:copy] ``visit_attributes()`` ++++++++++++++++++++++ ``ov::Node::visit_attributes`` method enables you to visit all operation attributes. -.. doxygensnippet:: src/core/template_extension/identity.cpp - :language: cpp - :fragment: [op:visit_attributes] +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:visit_attributes] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: src/core/template_extension/identity.cpp + :language: cpp + :fragment: [op:visit_attributes] ``evaluate() and has_evaluate()`` +++++++++++++++++++++++++++++++++ ``ov::Node::evaluate`` method enables you to apply constant folding to an operation. -.. doxygensnippet:: src/core/template_extension/identity.cpp - :language: cpp - :fragment: [op:evaluate] +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_custom_op.py + :language: python + :fragment: [op:evaluate] + + .. tab-item:: C++ + :sync: cpp + + .. doxygensnippet:: src/core/template_extension/identity.cpp + :language: cpp + :fragment: [op:evaluate] diff --git a/docs/articles_en/documentation/openvino-security.rst b/docs/articles_en/documentation/openvino-security.rst index c2b2d2ce04188e..c00d71c20fe657 100644 --- a/docs/articles_en/documentation/openvino-security.rst +++ b/docs/articles_en/documentation/openvino-security.rst @@ -8,7 +8,7 @@ with encryption or other security tools. Actual security and privacy requirements depend on your unique deployment scenario. This section provides general guidance on using OpenVINO tools and libraries securely. The main security measure for OpenVINO is its -:doc:`Security Add-on <../about-openvino/openvino-ecosystem/openvino-security-add-on>`. You can find its description +:doc:`Security Add-on <../about-openvino/openvino-ecosystem/openvino-project/openvino-security-add-on>`. You can find its description in the Ecosystem section. .. _encrypted-models: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst index 726e2872bdae0e..dafd1ed1577460 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-apt.rst @@ -56,28 +56,28 @@ Step 1: Set Up the OpenVINO Toolkit APT Repository .. code-block:: sh - echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu24 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2024.list + echo "deb https://apt.repos.intel.com/openvino/2025 ubuntu24 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2025.list .. tab-item:: Ubuntu 22 :sync: ubuntu-22 .. code-block:: sh - echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu22 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2024.list + echo "deb https://apt.repos.intel.com/openvino/2025 ubuntu22 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2025.list .. tab-item:: Ubuntu 20 :sync: ubuntu-20 .. code-block:: sh - echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu20 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2024.list + echo "deb https://apt.repos.intel.com/openvino/2025 ubuntu20 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2025.list .. tab-item:: Ubuntu 18 :sync: ubuntu-18 .. code-block:: sh - echo "deb https://apt.repos.intel.com/openvino/2024 ubuntu18 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2024.list + echo "deb https://apt.repos.intel.com/openvino/2025 ubuntu18 main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2025.list 3. Update the list of packages via the update command: @@ -133,7 +133,7 @@ Step 2: Install OpenVINO Runtime Using the APT Package Manager .. code-block:: sh - sudo apt install openvino-2024.4.0 + sudo apt install openvino-2025.0.0 .. note:: @@ -206,7 +206,7 @@ To uninstall OpenVINO Runtime via APT, run the following command based on your n .. code-block:: sh - sudo apt autoremove openvino-2024.4.0 + sudo apt autoremove openvino-2025.0.0 What's Next? diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst index fca179d815fca7..226cacbc3af74e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-linux.rst @@ -58,7 +58,7 @@ Step 1: Download and Install the OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for your system `_, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -73,9 +73,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu24_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu24_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_ubuntu24_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_ubuntu24_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 @@ -83,9 +83,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu22_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu22_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 @@ -93,9 +93,9 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_ubuntu20_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_ubuntu20_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: RHEL 8 :sync: rhel-8 @@ -103,18 +103,18 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_rhel8_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_rhel8_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_rhel8_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_rhel8_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: CentOS 7 :sync: centos-7 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_centos7_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_centos7_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_centos7_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_centos7_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: ARM 64-bit @@ -122,25 +122,25 @@ Step 1: Download and Install the OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_arm64.tgz -O openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_ubuntu20_2024.6.0.17404.4c0f47d2335_arm64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_ubuntu20_2025.0.0.17942.1f68be9f594_arm64.tgz -O openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_ubuntu20_2025.0.0.17942.1f68be9f594_arm64 /opt/intel/openvino_2025.0.0 .. tab-item:: ARM 32-bit :sync: arm-32 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/linux/l_openvino_toolkit_debian10_2024.6.0.17404.4c0f47d2335_armhf.tgz -O openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv l_openvino_toolkit_debian10_2024.6.0.17404.4c0f47d2335_armhf /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_debian10_2025.0.0.17942.1f68be9f594_armhf.tgz -O openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_debian10_2025.0.0.17942.1f68be9f594_armhf /opt/intel/openvino_2025.0.0 5. Install required system dependencies on Linux. To do this, OpenVINO provides a script in the extracted installation directory. Run the following command: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2025.0.0 sudo -E ./install_dependencies/install_openvino_dependencies.sh 6. (Optional) Install *numpy* Python Library: @@ -149,11 +149,11 @@ Step 1: Download and Install the OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.6.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2025.0.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2025.0.0 python3 -m pip install -r ./python/requirements.txt 7. For simplicity, it is useful to create a symbolic link as below: @@ -162,11 +162,11 @@ Step 1: Download and Install the OpenVINO Core Components cd /opt/intel - sudo ln -s openvino_2024.6.0 openvino_2024 + sudo ln -s openvino_2025.0.0 openvino_2025 .. note:: - If you have already installed a previous release of OpenVINO 2024, a symbolic link to the ``openvino_2024`` folder may already exist. - Unlink the previous link with ``sudo unlink openvino_2024``, and then re-run the command above. + If you have already installed a previous release of OpenVINO 2025, a symbolic link to the ``openvino_2025`` folder may already exist. + Unlink the previous link with ``sudo unlink openvino_2025``, and then re-run the command above. Congratulations, you have finished the installation! For some use cases you may still @@ -174,9 +174,9 @@ need to install additional components. Check the description below, as well as t :doc:`list of additional configurations <./configurations>` to see if your case needs any of them. -The ``/opt/intel/openvino_2024`` folder now contains the core components for OpenVINO. +The ``/opt/intel/openvino_2025`` folder now contains the core components for OpenVINO. If you used a different path in Step 2, for example, ``/home//intel/``, -OpenVINO is now in ``/home//intel/openvino_2024``. The path to the ``openvino_2024`` +OpenVINO is now in ``/home//intel/openvino_2025``. The path to the ``openvino_2025`` directory is also referred as ```` throughout the OpenVINO documentation. @@ -185,11 +185,11 @@ Step 2: Configure the Environment You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. -If your is not ``/opt/intel/openvino_2024``, use the correct one instead. +If your is not ``/opt/intel/openvino_2025``, use the correct one instead. .. code-block:: sh - source /opt/intel/openvino_2024/setupvars.sh + source /opt/intel/openvino_2025/setupvars.sh If you have more than one OpenVINO version installed on your system, you can easily switch versions by sourcing the `setupvars.sh` of your choice. @@ -198,7 +198,7 @@ If you have more than one OpenVINO version installed on your system, you can eas The above command must be re-run every time you start a new terminal session. To set up Linux to automatically run the command every time a new terminal is opened, - open ``~/.bashrc`` in your favorite editor and add ``source /opt/intel/openvino_2024/setupvars.sh`` after the last line. + open ``~/.bashrc`` in your favorite editor and add ``source /opt/intel/openvino_2025/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``.bashrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. @@ -257,7 +257,7 @@ If you have created the symbolic link, remove the link first: .. code-block:: sh - sudo rm /opt/intel/openvino_2024 + sudo rm /opt/intel/openvino_2025 To delete the files: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst index 89aab67b231c41..10bd9d36185d11 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-macos.rst @@ -47,7 +47,7 @@ Step 1: Install OpenVINO Core Components cd /Downloads -4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: +4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: .. tab-set:: @@ -57,9 +57,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/macos/m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_x86_64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_x86_64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/macos/openvino_toolkit_macos_12_6_2025.0.0.17942.1f68be9f594_x86_64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_macos_12_6_2025.0.0.17942.1f68be9f594_x86_64 /opt/intel/openvino_2025.0.0 .. tab-item:: ARM, 64-bit :sync: arm-64 @@ -67,9 +67,9 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/macos/m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_arm64.tgz --output openvino_2024.6.0.tgz - tar -xf openvino_2024.6.0.tgz - sudo mv m_openvino_toolkit_macos_12_6_2024.6.0.17404.4c0f47d2335_arm64 /opt/intel/openvino_2024.6.0 + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/macos/openvino_toolkit_macos_12_6_2025.0.0.17942.1f68be9f594_arm64.tgz --output openvino_2025.0.0.tgz + tar -xf openvino_2025.0.0.tgz + sudo mv openvino_toolkit_macos_12_6_2025.0.0.17942.1f68be9f594_arm64 /opt/intel/openvino_2025.0.0 5. (Optional) Install *numpy* Python Library: @@ -78,11 +78,11 @@ Step 1: Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2024.6.0/python`` folder: + You can use the ``requirements.txt`` file from the ``/opt/intel/openvino_2025.0.0/python`` folder: .. code-block:: sh - cd /opt/intel/openvino_2024.6.0 + cd /opt/intel/openvino_2025.0.0 python3 -m pip install -r ./python/requirements.txt 6. For simplicity, it is useful to create a symbolic link as below: @@ -90,12 +90,12 @@ Step 1: Install OpenVINO Core Components .. code-block:: sh - sudo ln -s /opt/intel/openvino_2024.6.0 /opt/intel/openvino_2024 + sudo ln -s /opt/intel/openvino_2025.0.0 /opt/intel/openvino_2025 .. note:: - If you have already installed a previous release of OpenVINO 2024, a symbolic link to the ``openvino_2024`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2024``, and then re-run the command above. + If you have already installed a previous release of OpenVINO 2025, a symbolic link to the ``openvino_2025`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2025``, and then re-run the command above. Congratulations, you have finished the installation! For some use cases you may still @@ -103,9 +103,9 @@ need to install additional components. Check the description below, as well as t :doc:`list of additional configurations <./configurations>` to see if your case needs any of them. -The ``/opt/intel/openvino_2024`` folder now contains the core components for OpenVINO. +The ``/opt/intel/openvino_2025`` folder now contains the core components for OpenVINO. If you used a different path in Step 2, for example, ``/home//intel/``, -OpenVINO is now in ``/home//intel/openvino_2024``. The path to the ``openvino_2024`` +OpenVINO is now in ``/home//intel/openvino_2025``. The path to the ``openvino_2025`` directory is also referred as ```` throughout the OpenVINO documentation. @@ -114,19 +114,19 @@ Step 2: Configure the Environment You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your ```` (the folder you used to install OpenVINO) is not -the default ``/opt/intel/openvino_2024``, use the correct one instead. +the default ``/opt/intel/openvino_2025``, use the correct one instead. .. code-block:: sh - cd /opt/intel/openvino_2024 - source /opt/intel/openvino_2024/setupvars.sh + cd /opt/intel/openvino_2025 + source /opt/intel/openvino_2025/setupvars.sh If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the ``setupvars.sh`` of your choice. .. note:: - The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open ``~/.zshrc`` in your favorite editor and add ``source /opt/intel/openvino_2024/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``~/.zshrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. + The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open ``~/.zshrc`` in your favorite editor and add ``source /opt/intel/openvino_2025/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``~/.zshrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. @@ -174,7 +174,7 @@ If you have created the symbolic link, remove the link first: .. code-block:: sh - sudo rm /opt/intel/openvino_2024 + sudo rm /opt/intel/openvino_2025 To delete the files: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst index ebda48847aafab..c8a2e400afd1fb 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-archive-windows.rst @@ -41,18 +41,18 @@ Step 1: Download and Install OpenVINO Core Components ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. -2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. +2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. If you prefer using command-lines, run the following commands in the command prompt window you opened: .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/windows/w_openvino_toolkit_windows_2024.6.0.17404.4c0f47d2335_x86_64.zip --output openvino_2024.6.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/windows/openvino_toolkit_windows_2025.0.0.17942.1f68be9f594_x86_64.zip --output openvino_2025.0.0.zip .. note:: - A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2024.6.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. + A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2025.0.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. 3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory. @@ -61,9 +61,9 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh - tar -xf openvino_2024.6.0.zip - ren w_openvino_toolkit_windows_2024.6.0.17404.4c0f47d2335_x86_64 openvino_2024.6.0 - move openvino_2024.6.0 "C:\Program Files (x86)\Intel" + tar -xf openvino_2025.0.0.zip + ren openvino_toolkit_windows_2025.0.0.17942.1f68be9f594_x86_64 openvino_2025.0.0 + move openvino_2025.0.0 "C:\Program Files (x86)\Intel" 4. (Optional) Install *numpy* Python Library: @@ -72,11 +72,11 @@ Step 1: Download and Install OpenVINO Core Components This step is required only when you decide to use Python API. - You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2024.6.0\python`` folder: + You can use the ``requirements.txt`` file from the ``C:\Program Files (x86)\Intel\openvino_2025.0.0\python`` folder: .. code-block:: sh - cd "C:\Program Files (x86)\Intel\openvino_2024.6.0" + cd "C:\Program Files (x86)\Intel\openvino_2025.0.0" python -m pip install -r .\python\requirements.txt @@ -85,12 +85,12 @@ Step 1: Download and Install OpenVINO Core Components .. code-block:: sh cd C:\Program Files (x86)\Intel - mklink /D openvino_2024 openvino_2024.6.0 + mklink /D openvino_2025 openvino_2025.0.0 .. note:: - If you have already installed a previous release of OpenVINO 2024, a symbolic link to the ``openvino_2024`` folder may already exist. If you want to override it, navigate to the ``C:\Program Files (x86)\Intel`` folder and delete the existing linked folder before running the ``mklink`` command. + If you have already installed a previous release of OpenVINO 2025, a symbolic link to the ``openvino_2025`` folder may already exist. If you want to override it, navigate to the ``C:\Program Files (x86)\Intel`` folder and delete the existing linked folder before running the ``mklink`` command. Congratulations, you have finished the installation! For some use cases you may still @@ -98,9 +98,9 @@ need to install additional components. Check the description below, as well as t :doc:`list of additional configurations <./configurations>` to see if your case needs any of them. -The ``C:\Program Files (x86)\Intel\openvino_2024`` folder now contains the core components for OpenVINO. -If you used a different path in Step 1, you will find the ``openvino_2024`` folder there. -The path to the ``openvino_2024`` directory is also referred as ```` +The ``C:\Program Files (x86)\Intel\openvino_2025`` folder now contains the core components for OpenVINO. +If you used a different path in Step 1, you will find the ``openvino_2025`` folder there. +The path to the ``openvino_2025`` directory is also referred as ```` throughout the OpenVINO documentation. @@ -127,11 +127,11 @@ You must update several environment variables before you can compile and run Ope :sync: cmd Open the Command Prompt, and run the ``setupvars.bat`` batch file to temporarily set your environment variables. - If your ```` is not ``C:\Program Files (x86)\Intel\openvino_2024``, use the correct directory instead. + If your ```` is not ``C:\Program Files (x86)\Intel\openvino_2025``, use the correct directory instead. .. code-block:: sh - "C:\Program Files (x86)\Intel\openvino_2024\setupvars.bat" + "C:\Program Files (x86)\Intel\openvino_2025\setupvars.bat" .. important:: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index 6b544f48541ec9..4c53063166bf93 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -42,7 +42,7 @@ Installing OpenVINO Runtime with Anaconda Package Manager .. code-block:: sh - conda install -c conda-forge openvino=2024.4.0 + conda install -c conda-forge openvino=2025.0.0 Congratulations! You've just Installed OpenVINO! For some use cases you may still need to install additional components. Check the description below, as well as the @@ -132,7 +132,7 @@ with the proper OpenVINO version number: .. code-block:: sh - conda remove openvino=2024.4.0 + conda remove openvino=2025.0.0 If you have installed specific components of OpenVINO, you can remove them, using: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst index 215c50c2c0653a..c2ef4ca687fe26 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst @@ -49,24 +49,24 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu24_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/linux/openvino_genai_ubuntu24_2025.0.0.0_x86_64.tar.gz --output openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz .. tab-item:: Ubuntu 22.04 :sync: ubuntu-22 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu22_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/linux/openvino_genai_ubuntu22_2025.0.0.0_x86_64.tar.gz --output openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz .. tab-item:: Ubuntu 20.04 :sync: ubuntu-20 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu20_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/linux/openvino_genai_ubuntu20_2025.0.0.0_x86_64.tar.gz --output openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz .. tab-item:: ARM 64-bit @@ -74,8 +74,8 @@ Linux .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/linux/openvino_genai_ubuntu20_2024.6.0.0_arm64.tar.gz -O openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/linux/openvino_genai_ubuntu20_2025.0.0.0_arm64.tar.gz -O openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz Windows @@ -84,7 +84,7 @@ Windows .. code-block:: sh cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/windows/openvino_genai_windows_2024.6.0.0_x86_64.zip --output openvino_genai_2024.6.0.0.zip + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/windows/openvino_genai_windows_2025.0.0.0_x86_64.zip --output openvino_genai_2025.0.0.0.zip macOS ++++++++++++++++++++++++++ @@ -96,16 +96,16 @@ macOS .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/macos/openvino_genai_macos_12_6_2024.6.0.0_x86_64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/macos/openvino_genai_macos_12_6_2025.0.0.0_x86_64.tar.gz --output openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz .. tab-item:: ARM, 64-bit :sync: arm-64 .. code-block:: sh - curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2024.6/macos/openvino_genai_macos_12_6_2024.6.0.0_arm64.tar.gz --output openvino_genai_2024.6.0.0.tgz - tar -xf openvino_genai_2024.6.0.0.tgz + curl -L https://storage.openvinotoolkit.org/repositories/openvino_genai/packages/2025.0/macos/openvino_genai_macos_12_6_2025.0.0.0_arm64.tar.gz --output openvino_genai_2025.0.0.0.tgz + tar -xf openvino_genai_2025.0.0.0.tgz Here are the full guides: diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst index b2d1870376b444..298b078c00a282 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-yum.rst @@ -25,25 +25,25 @@ Step 1: Set Up the Repository +++++++++++++++++++++++++++++ -1. Create a YUM repository file (``openvino-2024.repo``) in the ``/tmp`` directory as a normal user: +1. Create a YUM repository file (``openvino-2025.repo``) in the ``/tmp`` directory as a normal user: .. code-block:: sh - tee > /tmp/openvino-2024.repo << EOF + tee > /tmp/openvino-2025.repo << EOF [OpenVINO] - name=Intel(R) Distribution of OpenVINO 2024 - baseurl=https://yum.repos.intel.com/openvino/2024 + name=Intel(R) Distribution of OpenVINO 2025 + baseurl=https://yum.repos.intel.com/openvino/2025 enabled=1 gpgcheck=1 repo_gpgcheck=1 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB EOF -2. Move the new ``openvino-2024.repo`` file to the YUM configuration directory, i.e. ``/etc/yum.repos.d``: +2. Move the new ``openvino-2025.repo`` file to the YUM configuration directory, i.e. ``/etc/yum.repos.d``: .. code-block:: sh - sudo mv /tmp/openvino-2024.repo /etc/yum.repos.d + sudo mv /tmp/openvino-2025.repo /etc/yum.repos.d 3. Verify that the new repository is set up properly. @@ -93,7 +93,7 @@ Install OpenVINO Runtime .. code-block:: sh - sudo yum install openvino-2024.4.0 + sudo yum install openvino-2025.0.0 @@ -164,7 +164,7 @@ To uninstall OpenVINO Runtime via YUM, run the following command based on your n .. code-block:: sh - sudo yum autoremove openvino-2024.4.0 + sudo yum autoremove openvino-2025.0.0 diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst index ca17a7b1bc45c1..827be91fe77d7e 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-zypper.rst @@ -116,7 +116,7 @@ To uninstall OpenVINO Runtime via ZYPPER, run the following command based on you .. code-block:: sh - sudo zypper remove *openvino-2024.4.0* + sudo zypper remove *openvino-2025.0.0* diff --git a/docs/articles_en/openvino-workflow-generative.rst b/docs/articles_en/openvino-workflow-generative.rst index d37f357dc167b3..95568ae4cb5c90 100644 --- a/docs/articles_en/openvino-workflow-generative.rst +++ b/docs/articles_en/openvino-workflow-generative.rst @@ -98,6 +98,6 @@ Proceed to guides on: * :doc:`OpenVINO GenAI <./openvino-workflow-generative/inference-with-genai>` * :doc:`Hugging Face and Optimum Intel <./openvino-workflow-generative/inference-with-optimum-intel>` -* `Generative AI with Base OpenVINO `__ +* `Generative AI with Base OpenVINO `__ diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index 80c98e1b857522..5f1eb71cc361a1 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -7,7 +7,7 @@ LLM Weight Compression weight-compression/4-bit-weight-quantization weight-compression/microscaling-quantization - + Weight compression enhances the efficiency of models by reducing their memory footprint, @@ -18,14 +18,14 @@ only targets weights, keeping activations as floating-point numbers. This means of the model's accuracy while improving its speed and reducing its size. The reduction in size is especially noticeable with larger models. For instance the 8 billion parameter Llama 3 model can be reduced -from about 16.1 GB to 4.8 GB using 4-bit weight quantization on top of bfloat16 model. +from about 16.1 GB to 4.8 GB using 4-bit weight quantization on top of a bfloat16 model. .. note:: With smaller language models (i.e. less than 1B parameters), low-bit weight compression may result in more accuracy reduction than with larger models. -LLMs and other GenAI models that require +LLMs and other generative AI models that require extensive memory to store the weights during inference can benefit from weight compression as it: diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst index 3994e5550c4e2f..a9b2f21a2df469 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst @@ -137,8 +137,8 @@ trade-offs after optimization: .. tip:: - NNCF allows stacking the supported optimization methods. For example, AWQ, Scale Estimation - and GPTQ methods can be enabled all together to achieve better accuracy. + NNCF enables you to stack the supported optimization methods. For example, AWQ, + Scale Estimation and GPTQ methods may be enabled all together to achieve better accuracy. 4-bit Weight Quantization with GPTQ ################################### diff --git a/src/common/transformations/include/transformations/common_optimizations/activations_scaling.hpp b/src/common/transformations/include/transformations/common_optimizations/activations_scaling.hpp index d8c96a1df542af..2c0ebf6b007a4e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/activations_scaling.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/activations_scaling.hpp @@ -18,7 +18,6 @@ namespace activations_scaling { class TRANSFORMATIONS_API ScaleDownSingleLayer; class TRANSFORMATIONS_API EliminateScalarMul; -class TRANSFORMATIONS_API MulConcatTransformation; class TRANSFORMATIONS_API MulShareTransformation; class TRANSFORMATIONS_API MoveDownScalarMul; @@ -56,29 +55,6 @@ class ov::pass::activations_scaling::EliminateScalarMul : public ov::pass::Match EliminateScalarMul(); }; -// input_a const_a input_b const_b input_c const_c -// \ / \ / \ / -// Multiply_a Multiply_b Multiply_c -// \ | / -// \ | / -// ---------- Concat ------------ -// ==> -// (const_a (const_b (const_c -// input_a /const_c) input_b /const_c) input_c /const_c) -// \ / \ / \ / -// Multiply_a Multiply_b Multiply_c -// \ | / -// \ | / -// ---------- Concat ------------ -// | const_c -// | / -// Multiply -class ov::pass::activations_scaling::MulConcatTransformation : public ov::pass::MatcherPass { -public: - OPENVINO_MATCHER_PASS_RTTI("MulConcatTransformation", "0"); - MulConcatTransformation(); -}; - // input input // / \ | // Norm Mul ==> Mul (expect to be fused into the input layer) diff --git a/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp b/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp index 7fd1a5a237fa3b..b9e8acaf34dd2d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/activations_scaling.cpp @@ -10,7 +10,6 @@ #include "low_precision/network_helper.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/op/add.hpp" -#include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/convolution.hpp" @@ -212,92 +211,6 @@ ov::pass::activations_scaling::EliminateScalarMul::EliminateScalarMul() { this->register_matcher(m, callback); } -ov::pass::activations_scaling::MulConcatTransformation::MulConcatTransformation() { - MATCHER_SCOPE(MulConcatTransformation); - - auto concat_m = wrap_type(); - - ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto& pattern_map = m.get_pattern_value_map(); - - OPENVINO_ASSERT(pattern_map.count(concat_m), "Not found any Concat layer"); - - auto concat = pattern_map.at(concat_m).get_node_shared_ptr(); - - if (transformation_callback(concat)) { - return false; - } - - // check if all inputs are Multiply with scalar operand - ov::Output last_dep_const = {}; - ov::element::Type last_dep_const_type = ov::element::undefined; - for (auto& input : concat->inputs()) { - auto dep_node = ov::as_type_ptr(input.get_source_output().get_node_shared_ptr()); - if (!dep_node) { - return false; - } - auto dep_const0 = - ov::as_type_ptr(dep_node->input(0).get_source_output().get_node_shared_ptr()); - auto dep_const1 = - ov::as_type_ptr(dep_node->input(1).get_source_output().get_node_shared_ptr()); - if (!dep_const0 && !dep_const1) { - return false; - } - last_dep_const = - dep_const0 ? dep_node->input(0).get_source_output() : dep_node->input(1).get_source_output(); - if (!is_scalar_node(last_dep_const)) - return false; - if (last_dep_const_type != ov::element::undefined && - last_dep_const_type != last_dep_const.get_element_type()) - return false; - last_dep_const_type = last_dep_const.get_element_type(); - } - - auto target_inputs = concat->get_output_target_inputs(0); - - for (auto& input : concat->inputs()) { - auto dep_node = input.get_source_output().get_node_shared_ptr(); - auto dep_input0 = dep_node->input(0).get_source_output().get_node(); - size_t const_index = ov::is_type(dep_input0) ? 0 : 1; - size_t activation_index = ov::is_type(dep_input0) ? 1 : 0; - - auto dep_type = dep_node->get_output_element_type(0); - auto new_mul = std::make_shared>( - std::vector{dep_type, dep_type}, - std::vector{dep_type}, - ov::op::TemporaryReplaceOutputType(dep_node->input(activation_index).get_source_output(), dep_type) - .get(), - ov::op::TemporaryReplaceOutputType( - ov::op::util::eltwise_fold(dep_node->input(const_index).get_source_output(), - last_dep_const), - dep_type) - .get()); - new_mul->set_friendly_name(dep_node->get_friendly_name() + "_c"); - ov::copy_runtime_info(dep_node, new_mul); - - input.replace_source_output(new_mul); - } - - auto concat_type = concat->get_output_element_type(0); - auto new_mul = std::make_shared>( - std::vector{concat_type, concat_type}, - std::vector{concat_type}, - ov::op::TemporaryReplaceOutputType(concat->output(0), concat_type).get(), - ov::op::TemporaryReplaceOutputType(last_dep_const, concat_type).get()); - new_mul->set_friendly_name(concat->get_friendly_name() + "_c"); - ov::copy_runtime_info(concat, new_mul); - - for (auto& in : target_inputs) { - in.replace_source_output(new_mul); - } - - return true; - }; - - auto m = std::make_shared(concat_m, "MulConcatTransformation"); - this->register_matcher(m, callback); -} - ov::pass::activations_scaling::MulShareTransformation::MulShareTransformation() { MATCHER_SCOPE(MulShareTransformation); diff --git a/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp b/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp index a8797b588c31cf..4264c4e3620fb8 100644 --- a/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp +++ b/src/common/transformations/tests/common_optimizations/activations_scaling_test.cpp @@ -12,7 +12,6 @@ #include "common_test_utils/graph_comparator.hpp" #include "common_test_utils/ov_test_utils.hpp" #include "openvino/op/add.hpp" -#include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convolution.hpp" #include "openvino/op/group_normalization.hpp" @@ -98,38 +97,6 @@ TEST_F(TransformationTestsF, EliminateScalarMulTest) { } } -TEST_F(TransformationTestsF, ConcatTransformationTest) { - { - auto input0 = std::make_shared(ov::element::f16, ov::PartialShape{6, 12, 10, 24}); - auto scale_const0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10}); - auto mul0 = std::make_shared(input0, scale_const0); - auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{6, 12, 10, 24}); - auto scale_const1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10}); - auto mul1 = std::make_shared(input1, scale_const1); - auto concat = std::make_shared(OutputVector{mul0, mul1}, 0); - auto convert = std::make_shared(concat, ov::element::f32); - auto result = std::make_shared(convert); - - model = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input0, input1}); - manager.register_pass(); - } - { - auto input0 = std::make_shared(ov::element::f16, ov::PartialShape{6, 12, 10, 24}); - auto scale_const0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {1}); - auto mul0 = std::make_shared(input0, scale_const0); - auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{6, 12, 10, 24}); - auto scale_const1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {1}); - auto mul1 = std::make_shared(input1, scale_const1); - auto concat = std::make_shared(OutputVector{mul0, mul1}, 0); - auto new_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10}); - auto new_mul = std::make_shared(concat, new_scale_const); - auto convert = std::make_shared(new_mul, ov::element::f32); - auto result = std::make_shared(convert); - - model_ref = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{input0, input1}); - } -} - TEST_F(TransformationTestsF, MoveDownScalarMulTest) { { auto input0 = std::make_shared(ov::element::f16, ov::PartialShape{6, 12, 10, 24}); diff --git a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp index 3268be21e6dad2..eb684282ce0e3d 100644 --- a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp +++ b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp @@ -70,6 +70,15 @@ ov::ParameterVector nodes_to_params(const ov::NodeVector& node_vec) { return params; } +static std::shared_ptr make_param(const PartialShape& pshape, + element::Type element_type, + const std::string& name) { + auto param = makeOP({}, {{"shape", pshape}, {"element_type", element_type}}); + param->set_friendly_name(name); + param->get_output_tensor(0).set_names({name}); + return param; +} + enum QKV : int { Q = 0, K = 1, V = 2 }; vector MOCK_VALUE = {1}; @@ -445,11 +454,10 @@ class SDPAToPATest : public TransformationTestsF, public ::testing::WithParamInt TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_General) { const auto model_precision = GetParam(); { - // Inputs to SDPA transformer: - auto beam_idx = makeOP({}, {{"shape", PartialShape{DYN}}, el_type_i64}); - auto position_ids = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); - auto attention_mask = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); - auto input_ids = makeOP({}, {{"shape", PartialShape{DYN, DYN}}, el_type_i64}); + auto beam_idx = make_param(PartialShape{DYN}, element::i64, "beam_idx"); + auto position_ids = make_param(PartialShape{DYN, DYN}, element::i64, "position_ids"); + auto attention_mask = make_param(PartialShape{DYN, DYN}, element::i64, "attention_mask"); + auto input_ids = make_param(PartialShape{DYN, DYN}, element::i64, "input_ids"); ParameterVector params = nodes_to_params({position_ids, input_ids, attention_mask, beam_idx}); beam_idx->output(0).add_names({"beam_idx"}); @@ -640,15 +648,6 @@ TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_TotalSequenceLengthPattern) { disable_rt_info_check(); } -static std::shared_ptr make_param(const PartialShape& pshape, - element::Type element_type, - const std::string& name) { - auto param = makeOP({}, {{"shape", pshape}, {"element_type", element_type}}); - param->set_friendly_name(name); - param->get_output_tensor(0).set_names({name}); - return param; -} - // TODO: split the models in blocks the way it's done for Qwen and make the code not to be such a clutter // TODO: write a test for StateManagementPattern only (because changes for Alibi are inside it) // TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention @@ -904,6 +903,551 @@ TEST_P(SDPAToPATest, SDPAToPA_Baichuan2_13b_General) { } } +TEST_P(SDPAToPATest, SDPAToPA_nanoLLaVA_General) { + { + auto beam_idx = make_param(PartialShape{DYN}, element::i32, "beam_idx"); + auto inputs_embeds = make_param(PartialShape{DYN, DYN, 8}, element::f32, "inputs_embeds"); + auto position_ids = make_param(PartialShape{DYN, DYN}, element::i64, "position_ids"); + auto attention_mask = make_param(PartialShape{DYN, DYN}, element::i64, "attention_mask"); + + auto ShapeOf_19592 = makeOP({inputs_embeds}, {{"output_type", "i64"}}); + auto Gather_19597 = makeOP({ShapeOf_19592, {0}, 0}, {{"batch_dims", 0}}); + auto Concat_19604 = makeOP({Gather_19597, {2l}, {0l}, {2l}}, {{"axis", 0}}); + auto Broadcast_19607 = makeOP({0.000000f, Concat_19604}, {{"mode", "numpy"}}); + auto ReadValue_19126 = makeOP( + {Broadcast_19607}, + {{"variable_id", "var1"}, {"variable_type", "f32"}, {"variable_shape", PartialShape{DYN, 2, DYN, 2}}}); + auto Gather_18655 = makeOP({ReadValue_19126, beam_idx, 0}, {{"batch_dims", 0}}); + auto Constant_16156 = + makeConst(element::f32, + ov::Shape({1, 1, 8}), + {1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f}); + auto Constant_16155 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {1.000000f}); + auto Constant_16153 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {2.000000f}); + auto __module_model_model_layers_0_input_layernorm_aten_pow_Power = + makeOP({inputs_embeds, Constant_16153}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_mean_ReduceMean = + makeOP({__module_model_model_layers_0_input_layernorm_aten_pow_Power, {-1}}, + {{"keep_dims", true}}); + auto Constant_16154 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {0.000001f}); + auto __module_model_model_layers_0_input_layernorm_aten_add_Add = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mean_ReduceMean, Constant_16154}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_rsqrt_Sqrt = + makeOP({__module_model_model_layers_0_input_layernorm_aten_add_Add}); + auto __module_model_model_layers_0_input_layernorm_aten_rsqrt_Divide = + makeOP({Constant_16155, __module_model_model_layers_0_input_layernorm_aten_rsqrt_Sqrt}, + {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto __module_model_model_layers_0_input_layernorm_aten_mul_Multiply = + makeOP({inputs_embeds, __module_model_model_layers_0_input_layernorm_aten_rsqrt_Divide}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1 = + makeOP({Constant_16156, __module_model_model_layers_0_input_layernorm_aten_mul_Multiply}, + {{"auto_broadcast", "numpy"}}); + auto self_model_model_layers_0_self_attn_q_proj_weight = makeConst(element::f32, + ov::Shape({8, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_q_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_q_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape = + makeOP({__module_model_model_layers_0_self_attn_q_proj_aten_linear_MatMul, {0, 0, 4, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape, {0, 2, 1, 3}}); + auto self_model_model_layers_0_self_attn_rotary_emb_cos_cached = makeConst(element::f32, + ov::Shape({32768, 2}), + MOCK_VALUE); + auto ShapeOf_16753 = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1}, + {{"output_type", "i64"}}); + auto Gather_16756 = makeOP({ShapeOf_16753, 1, 0}, {{"batch_dims", 0}}); + auto Reshape_16764 = makeOP({Gather_16756, {-1}}, {{"special_zero", false}}); + auto ReadValue_19120 = makeOP( + {Broadcast_19607}, + {{"variable_id", "var2"}, {"variable_type", "f32"}, {"variable_shape", PartialShape{DYN, 2, DYN, 2}}}); + auto Gather_18646 = makeOP({ReadValue_19120, beam_idx, 0}, {{"batch_dims", 0}}); + auto ShapeOf_16767 = makeOP({Gather_18646}, {{"output_type", "i64"}}); + auto Gather_16770 = makeOP({ShapeOf_16767, 2, 0}, {{"batch_dims", 0}}); + auto Reshape_16772 = makeOP({Gather_16770, {-1}}, {{"special_zero", false}}); + auto __module_model_model_layers_0_self_attn_aten_add__Add = + makeOP({Reshape_16764, Reshape_16772}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice = + makeOP({self_model_model_layers_0_self_attn_rotary_emb_cos_cached, + {0}, + __module_model_model_layers_0_self_attn_aten_add__Add, + {1}, + {0}}); + auto __module_model_model_aten_view_Reshape = + makeOP({position_ids, {0, 0}}, {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_index_Convert = + makeOP({__module_model_model_aten_view_Reshape}, {{"destination_type", "i32"}}); + auto __module_model_model_layers_0_self_attn_aten_index_Gather = + makeOP({__module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice, + __module_model_model_layers_0_self_attn_aten_index_Convert, + 0}, + {{"batch_dims", 0}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze = + makeOP({__module_model_model_layers_0_self_attn_aten_index_Gather, 1}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply = + makeOP({__module_model_model_layers_0_self_attn_aten_transpose_Transpose, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose, {1}, {LLONG_MAX}, {1}, {3}}); + auto Constant_16157 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {-1.000000f}); + auto __module_model_model_layers_0_self_attn_aten_neg_Multiply = + makeOP({__module_model_model_layers_0_self_attn_aten_slice_Slice, Constant_16157}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_1 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose, {0}, {1}, {1}, {3}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat = + makeOP({__module_model_model_layers_0_self_attn_aten_neg_Multiply, + __module_model_model_layers_0_self_attn_aten_slice_Slice_1}, + {{"axis", -1}}); + auto self_model_model_layers_0_self_attn_rotary_emb_sin_cached = makeConst(element::f32, + ov::Shape({32768, 2}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice_1 = + makeOP({self_model_model_layers_0_self_attn_rotary_emb_sin_cached, + {0}, + __module_model_model_layers_0_self_attn_aten_add__Add, + {1}, + {0}}); + auto __module_model_model_layers_0_self_attn_aten_index_Gather_1 = + makeOP({__module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice_1, + __module_model_model_layers_0_self_attn_aten_index_Convert, + 0}, + {{"batch_dims", 0}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_index_Gather_1, 1}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_add_Add = + makeOP({__module_model_model_layers_0_self_attn_aten_mul_Multiply, + __module_model_model_layers_0_self_attn_aten_mul_Multiply_1}, + {{"auto_broadcast", "numpy"}}); + auto self_model_model_layers_0_self_attn_k_proj_weight = makeConst(element::f32, + ov::Shape({4, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_k_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_k_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape_1 = + makeOP({__module_model_model_layers_0_self_attn_k_proj_aten_linear_MatMul, {0, 0, 2, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape_1, {0, 2, 1, 3}}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_2 = + makeOP({__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_2 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, {1}, {LLONG_MAX}, {1}, {3}}); + auto Constant_16158 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {-1.000000f}); + auto __module_model_model_layers_0_self_attn_aten_neg_Multiply_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_slice_Slice_2, Constant_16158}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_3 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, {0}, {1}, {1}, {3}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_neg_Multiply_1, + __module_model_model_layers_0_self_attn_aten_slice_Slice_3}, + {{"axis", -1}}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_3 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat_1, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_add_Add_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_mul_Multiply_2, + __module_model_model_layers_0_self_attn_aten_mul_Multiply_3}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat_2 = + makeOP({Gather_18646, __module_model_model_layers_0_self_attn_aten_add_Add_1}, + {{"axis", -2}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_2 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat_2, 2}); + auto Gather_16778 = makeOP({ShapeOf_16753, {0}, 0}, {{"batch_dims", 0}}); + auto Add_16793 = makeOP({Reshape_16772, Reshape_16764}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_prim_ListConstruct_2 = + makeOP({Gather_16778, {2l}, {2l}, Add_16793, {2l}}, {{"axis", 0}}); + auto __module_model_model_layers_0_self_attn_aten_expand_Broadcast = + makeOP({__module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_2, + __module_model_model_layers_0_self_attn_prim_ListConstruct_2}, + {{"mode", "bidirectional"}}); + auto __module_model_model_layers_0_self_attn_aten_reshape_Reshape = + makeOP({__module_model_model_layers_0_self_attn_aten_expand_Broadcast, {0, 4, -1, 2}}, + {{"special_zero", true}}); + auto ReadValue_19122 = makeOP( + {Broadcast_19607}, + {{"variable_id", "var3"}, {"variable_type", "f32"}, {"variable_shape", PartialShape{DYN, 2, DYN, 2}}}); + auto Gather_18649 = makeOP({ReadValue_19122, beam_idx, 0}, {{"batch_dims", 0}}); + auto self_model_model_layers_0_self_attn_v_proj_weight = makeConst(element::f32, + ov::Shape({4, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_v_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_v_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape_2 = + makeOP({__module_model_model_layers_0_self_attn_v_proj_aten_linear_MatMul, {0, 0, 2, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose_2 = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape_2, {0, 2, 1, 3}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat_3 = + makeOP({Gather_18649, __module_model_model_layers_0_self_attn_aten_transpose_Transpose_2}, + {{"axis", -2}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_3 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat_3, 2}); + auto __module_model_model_layers_0_self_attn_aten_expand_Broadcast_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_3, + __module_model_model_layers_0_self_attn_prim_ListConstruct_2}, + {{"mode", "bidirectional"}}); + auto __module_model_model_layers_0_self_attn_aten_reshape_Reshape_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_expand_Broadcast_1, {0, 4, -1, 2}}, + {{"special_zero", true}}); + auto Constant_16160 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {1.000000f}); + auto __module_model_model_aten_unsqueeze_Unsqueeze = makeOP({attention_mask, 1}); + auto __module_model_model_aten_unsqueeze_Unsqueeze_1 = + makeOP({__module_model_model_aten_unsqueeze_Unsqueeze, 2}); + auto ShapeOf_16779 = makeOP({attention_mask}, {{"output_type", "i64"}}); + auto Gather_16782 = makeOP({ShapeOf_16779, {1}, 0}, {{"batch_dims", 0}}); + auto __module_model_model_prim_ListConstruct_1 = + makeOP({Gather_16778, {1l}, Reshape_16764, Gather_16782}, {{"axis", 0}}); + auto __module_model_model_aten_expand_Broadcast = makeOP( + {__module_model_model_aten_unsqueeze_Unsqueeze_1, __module_model_model_prim_ListConstruct_1}, + {{"mode", "bidirectional"}}); + auto __module_model_model_aten_to_Convert_1 = + makeOP({__module_model_model_aten_expand_Broadcast}, {{"destination_type", "f32"}}); + auto Constant_16159 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {1.000000f}); + auto __module_model_model_aten_rsub_Multiply = + makeOP({__module_model_model_aten_to_Convert_1, Constant_16159}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_rsub_Subtract = + makeOP({Constant_16160, __module_model_model_aten_rsub_Multiply}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_to_Convert_2 = + makeOP({__module_model_model_aten_rsub_Subtract}, {{"destination_type", "boolean"}}); + auto __module_model_model_aten_masked_fill_Select = makeOP( + {__module_model_model_aten_to_Convert_2, -FLT_MAX, __module_model_model_aten_rsub_Subtract}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_to_Convert_4 = + makeOP({__module_model_model_aten_masked_fill_Select}, {{"destination_type", "boolean"}}); + auto __module_model_model_aten_add_Add = + makeOP({Gather_16756, Gather_16770}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_sub_Subtract = + makeOP({__module_model_model_aten_add_Add, Gather_16756}, {{"auto_broadcast", "numpy"}}); + auto Unsqueeze_124 = makeOP({__module_model_model_aten_sub_Subtract, 0}); + auto __module_model_model_prim_ListConstruct_2 = + makeOP({Reshape_16764, Unsqueeze_124}, {{"axis", 0}}); + auto __module_model_model_aten_zeros_Broadcast = + makeOP({0.000000f, __module_model_model_prim_ListConstruct_2}, {{"mode", "numpy"}}); + auto __module_model_model_aten_arange_Range = + makeOP({0, Gather_16756, 1}, {{"output_type", "f32"}}); + auto __module_model_model_aten_arange_ConvertLike = + makeOP({__module_model_model_aten_arange_Range}, {{"destination_type", "i64"}}); + auto __module_model_model_aten_add_Add_1 = + makeOP({__module_model_model_aten_arange_ConvertLike, {1l}}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_view_Reshape_1 = + makeOP({__module_model_model_aten_add_Add_1, {0, 1}}, {{"special_zero", true}}); + auto __module_model_model_aten_lt_Less = makeOP( + {__module_model_model_aten_arange_ConvertLike, __module_model_model_aten_view_Reshape_1}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_prim_ListConstruct_3 = + makeOP({Reshape_16764, {2}}, {{"mode", "numpy"}}); + auto __module_model_model_aten_full_Broadcast = + makeOP({-FLT_MAX, __module_model_model_prim_ListConstruct_3}, {{"mode", "numpy"}}); + auto __module_model_model_aten_masked_fill__Select = makeOP( + {__module_model_model_aten_lt_Less, 0.000000f, __module_model_model_aten_full_Broadcast}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_aten_cat_Concat = makeOP( + {__module_model_model_aten_zeros_Broadcast, __module_model_model_aten_masked_fill__Select}, + {{"axis", -1}}); + auto __module_model_model_aten_unsqueeze_Unsqueeze_2 = + makeOP({__module_model_model_aten_cat_Concat, 0}); + auto __module_model_model_aten_unsqueeze_Unsqueeze_3 = + makeOP({__module_model_model_aten_unsqueeze_Unsqueeze_2, 1}); + auto __module_model_model_aten_add_Add_2 = + makeOP({Reshape_16764, Unsqueeze_124}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_prim_ListConstruct_5 = + makeOP({Gather_16778, {1l}, Reshape_16764, __module_model_model_aten_add_Add_2}, + {{"axis", 0}}); + auto __module_model_model_aten_expand_Broadcast_1 = makeOP( + {__module_model_model_aten_unsqueeze_Unsqueeze_3, __module_model_model_prim_ListConstruct_5}, + {{"mode", "bidirectional"}}); + auto __module_model_model_aten_masked_fill_Select_1 = makeOP( + {__module_model_model_aten_to_Convert_4, -FLT_MAX, __module_model_model_aten_expand_Broadcast_1}, + {{"auto_broadcast", "numpy"}}); + auto sdpa = + makeOP({__module_model_model_layers_0_self_attn_aten_add_Add, + __module_model_model_layers_0_self_attn_aten_reshape_Reshape, + __module_model_model_layers_0_self_attn_aten_reshape_Reshape_1, + __module_model_model_aten_masked_fill_Select_1}, + {{"causal", false}}); + + auto res = makeOP({sdpa}); + + ParameterVector params = nodes_to_params({beam_idx, position_ids, attention_mask, inputs_embeds}); + model = std::make_shared(OutputVector{res}, params); + + manager.register_pass(); + } + + { + auto max_context_len = make_param(PartialShape{}, element::i32, "max_context_len"); + auto block_indices_begins = make_param(PartialShape{DYN}, element::i32, "block_indices_begins"); + auto block_indices = make_param(PartialShape{DYN}, element::i32, "block_indices"); + auto subsequence_begins = make_param(PartialShape{DYN}, element::i32, "subsequence_begins"); + auto past_lens = make_param(PartialShape{DYN}, element::i32, "past_lens"); + auto value_cache_0 = make_param(PartialShape{DYN, 2, 2}, element::f32, "value_cache_0"); + auto key_cache_0 = make_param(PartialShape{DYN, 2, 2}, element::f32, "key_cache_0"); + auto inputs_embeds = make_param(PartialShape{DYN, DYN, 8}, element::f32, "inputs_embeds"); + auto position_ids = make_param(PartialShape{DYN}, element::i64, "position_ids"); + + ParameterVector params = nodes_to_params({max_context_len, + block_indices_begins, + block_indices, + subsequence_begins, + past_lens, + value_cache_0, + key_cache_0, + inputs_embeds, + position_ids}); + + auto Constant_16156 = makeConst(element::f32, + ov::Shape({1, 1, 8}), + {1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f}); + auto Constant_16155 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {1.000000f}); + auto Constant_16153 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {2.000000f}); + auto __module_model_model_layers_0_input_layernorm_aten_pow_Power = + makeOP({inputs_embeds, Constant_16153}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_mean_ReduceMean = + makeOP({__module_model_model_layers_0_input_layernorm_aten_pow_Power, {-1}}, + {{"keep_dims", true}}); + auto Constant_16154 = makeConst(element::f32, + ov::Shape({1, 1, 1}), + {0.000001f}); + auto __module_model_model_layers_0_input_layernorm_aten_add_Add = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mean_ReduceMean, Constant_16154}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_rsqrt_Sqrt = + makeOP({__module_model_model_layers_0_input_layernorm_aten_add_Add}); + auto __module_model_model_layers_0_input_layernorm_aten_rsqrt_Divide = + makeOP({Constant_16155, __module_model_model_layers_0_input_layernorm_aten_rsqrt_Sqrt}, + {{"auto_broadcast", "numpy"}, {"m_pythondiv", true}}); + auto __module_model_model_layers_0_input_layernorm_aten_mul_Multiply = + makeOP({inputs_embeds, __module_model_model_layers_0_input_layernorm_aten_rsqrt_Divide}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1 = + makeOP({Constant_16156, __module_model_model_layers_0_input_layernorm_aten_mul_Multiply}, + {{"auto_broadcast", "numpy"}}); + auto self_model_model_layers_0_self_attn_q_proj_weight = makeConst(element::f32, + ov::Shape({8, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_q_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_q_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape = + makeOP({__module_model_model_layers_0_self_attn_q_proj_aten_linear_MatMul, {0, 0, 4, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape, {0, 2, 1, 3}}); + auto self_model_model_layers_0_self_attn_rotary_emb_cos_cached = makeConst(element::f32, + ov::Shape({32768, 2}), + MOCK_VALUE); + auto ShapeOf_16753 = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1}, + {{"output_type", "i64"}}); + auto Gather_16756 = makeOP({ShapeOf_16753, 1, 0}, {{"batch_dims", 0}}); + auto Reshape_16764 = makeOP({Gather_16756, {-1}}, {{"special_zero", false}}); + auto ShapeOf_52004 = makeOP({inputs_embeds}, {{"output_type", "i64"}}); + auto Gather_52005 = makeOP({ShapeOf_52004, 1, 0}, {{"batch_dims", 0}}); + auto Convert_52006 = makeOP({Gather_52005}, {{"destination_type", "i32"}}); + auto Subtract_52007 = makeOP({max_context_len, Convert_52006}, {{"auto_broadcast", "numpy"}}); + auto Convert_52008 = makeOP({Subtract_52007}, {{"destination_type", "i64"}}); + auto Reshape_16772 = makeOP({Convert_52008, {-1}}, {{"special_zero", false}}); + auto __module_model_model_layers_0_self_attn_aten_add__Add = + makeOP({Reshape_16764, Reshape_16772}, {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice = + makeOP({self_model_model_layers_0_self_attn_rotary_emb_cos_cached, + {0}, + __module_model_model_layers_0_self_attn_aten_add__Add, + {1}, + {0}}); + auto Unsqueeze_51575 = makeOP({position_ids, 1}); + auto __module_model_model_aten_view_Reshape = + makeOP({Unsqueeze_51575, {0, 0}}, {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_index_Convert = + makeOP({__module_model_model_aten_view_Reshape}, {{"destination_type", "i32"}}); + auto __module_model_model_layers_0_self_attn_aten_index_Gather = + makeOP({__module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice, + __module_model_model_layers_0_self_attn_aten_index_Convert, + 0}, + {{"batch_dims", 0}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze = + makeOP({__module_model_model_layers_0_self_attn_aten_index_Gather, 1}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply = + makeOP({__module_model_model_layers_0_self_attn_aten_transpose_Transpose, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose, {1}, {LLONG_MAX}, {1}, {3}}); + auto Constant_16157 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {-1.000000f}); + auto __module_model_model_layers_0_self_attn_aten_neg_Multiply = + makeOP({__module_model_model_layers_0_self_attn_aten_slice_Slice, Constant_16157}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_1 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose, {0}, {1}, {1}, {3}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat = + makeOP({__module_model_model_layers_0_self_attn_aten_neg_Multiply, + __module_model_model_layers_0_self_attn_aten_slice_Slice_1}, + {{"axis", -1}}); + auto self_model_model_layers_0_self_attn_rotary_emb_sin_cached = makeConst(element::f32, + ov::Shape({32768, 2}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice_1 = + makeOP({self_model_model_layers_0_self_attn_rotary_emb_sin_cached, + {0}, + __module_model_model_layers_0_self_attn_aten_add__Add, + {1}, + {0}}); + auto __module_model_model_layers_0_self_attn_aten_index_Gather_1 = + makeOP({__module_model_model_layers_0_self_attn_rotary_emb_aten_slice_Slice_1, + __module_model_model_layers_0_self_attn_aten_index_Convert, + 0}, + {{"batch_dims", 0}}); + auto __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_index_Gather_1, 1}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_add_Add = + makeOP({__module_model_model_layers_0_self_attn_aten_mul_Multiply, + __module_model_model_layers_0_self_attn_aten_mul_Multiply_1}, + {{"auto_broadcast", "numpy"}}); + auto Transpose_51951 = + makeOP({__module_model_model_layers_0_self_attn_aten_add_Add, {0, 2, 1, 3}}); + auto Reshape_51953 = makeOP({Transpose_51951, {0, -1}}, {{"special_zero", true}}); + auto self_model_model_layers_0_self_attn_k_proj_weight = makeConst(element::f32, + ov::Shape({4, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_k_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_k_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape_1 = + makeOP({__module_model_model_layers_0_self_attn_k_proj_aten_linear_MatMul, {0, 0, 2, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape_1, {0, 2, 1, 3}}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_2 = + makeOP({__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_2 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, {1}, {LLONG_MAX}, {1}, {3}}); + auto Constant_16158 = makeConst(element::f32, + ov::Shape({1, 1, 1, 1}), + {-1.000000f}); + auto __module_model_model_layers_0_self_attn_aten_neg_Multiply_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_slice_Slice_2, Constant_16158}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_slice_Slice_3 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose_1, {0}, {1}, {1}, {3}}); + auto __module_model_model_layers_0_self_attn_aten_cat_Concat_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_neg_Multiply_1, + __module_model_model_layers_0_self_attn_aten_slice_Slice_3}, + {{"axis", -1}}); + auto __module_model_model_layers_0_self_attn_aten_mul_Multiply_3 = + makeOP({__module_model_model_layers_0_self_attn_aten_cat_Concat_1, + __module_model_model_layers_0_self_attn_aten_unsqueeze_Unsqueeze_1}, + {{"auto_broadcast", "numpy"}}); + auto __module_model_model_layers_0_self_attn_aten_add_Add_1 = + makeOP({__module_model_model_layers_0_self_attn_aten_mul_Multiply_2, + __module_model_model_layers_0_self_attn_aten_mul_Multiply_3}, + {{"auto_broadcast", "numpy"}}); + auto Transpose_51954 = + makeOP({__module_model_model_layers_0_self_attn_aten_add_Add_1, {0, 2, 1, 3}}); + auto Reshape_51957 = makeOP({Transpose_51954, {0, -1}}, {{"special_zero", true}}); + auto self_model_model_layers_0_self_attn_v_proj_weight = makeConst(element::f32, + ov::Shape({4, 8}), + MOCK_VALUE); + auto __module_model_model_layers_0_self_attn_v_proj_aten_linear_MatMul = + makeOP({__module_model_model_layers_0_input_layernorm_aten_mul_Multiply_1, + self_model_model_layers_0_self_attn_v_proj_weight}, + {{"transpose_a", false}, {"transpose_b", true}}); + auto __module_model_model_layers_0_self_attn_aten_view_Reshape_2 = + makeOP({__module_model_model_layers_0_self_attn_v_proj_aten_linear_MatMul, {0, 0, 2, 2}}, + {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_transpose_Transpose_2 = + makeOP({__module_model_model_layers_0_self_attn_aten_view_Reshape_2, {0, 2, 1, 3}}); + auto Transpose_51955 = makeOP( + {__module_model_model_layers_0_self_attn_aten_transpose_Transpose_2, {0, 2, 1, 3}}); + auto Reshape_51959 = makeOP({Transpose_51955, {0, -1}}, {{"special_zero", true}}); + + auto c1 = makeConst(element::f32, {}, {0.707107f}); + auto c2 = makeConst(element::i32, {}, {0}); + // an empty Constant needs to be created in a usual way, not using makeConst() + auto c3 = v0::Constant::create(element::f32, {0}, {}); + auto PagedAttentionExtension_51962 = + std::make_shared(ov::OutputVector{Reshape_51953, + Reshape_51957, + Reshape_51959, + key_cache_0, + value_cache_0, + past_lens, + subsequence_begins, + block_indices, + block_indices_begins, + c1, + c2, + c3, + max_context_len}); + auto ShapeOf_51965 = makeOP({Transpose_51955}, {{"output_type", "i64"}}); + auto Gather_51966 = makeOP({ShapeOf_51965, -1, 0}, {{"batch_dims", 0}}); + auto Unsqueeze_51971 = makeOP({Gather_51966, 0}); + auto Concat_51972 = makeOP({{0l}, {1l}, {-1l}, Unsqueeze_51971}, {{"axis", 0}}); + auto Reshape_51973 = + makeOP({PagedAttentionExtension_51962->output(0), Concat_51972}, {{"special_zero", true}}); + auto __module_model_model_layers_0_self_attn_aten_scaled_dot_product_attention_ScaledDotProductAttention = + makeOP({Reshape_51973, {0, 2, 1, 3}}); + + auto res = std::make_shared( + __module_model_model_layers_0_self_attn_aten_scaled_dot_product_attention_ScaledDotProductAttention); + model_ref = std::make_shared(ResultVector{res}, params); + + comparator.disable(FunctionsComparator::PRECISIONS); + disable_result_friendly_names_check(); + disable_rt_info_check(); + } +} + /* As there's often a need to cover specific model's architecutres in these tests, please, make sure you name the tests in the following manner: diff --git a/src/core/src/pass/sdpa_to_paged_attention.cpp b/src/core/src/pass/sdpa_to_paged_attention.cpp index 2ccd19ca3e1fc3..3d70e7e713f54c 100644 --- a/src/core/src/pass/sdpa_to_paged_attention.cpp +++ b/src/core/src/pass/sdpa_to_paged_attention.cpp @@ -90,12 +90,23 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptrset_partial_shape(PartialShape{-1}); - auto input_ids_target_inputs = input_ids_node->get_output_target_inputs(0); - auto unsqueezed_input_ids = - std::make_shared(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1})); - for (const auto& target : input_ids_target_inputs) { - target.replace_source_output(unsqueezed_input_ids); + std::shared_ptr processed_input_ids; + if (input_ids_node->get_friendly_name() == "input_ids") { + auto input_ids_target_inputs = input_ids_node->get_output_target_inputs(0); + input_ids_node->set_partial_shape(PartialShape{-1}); + processed_input_ids = + std::make_shared(input_ids_node, v0::Constant::create(element::i32, Shape{}, {1})); + for (const auto& target : input_ids_target_inputs) { + target.replace_source_output(processed_input_ids); + } + } else if (input_ids_node->get_friendly_name() == "inputs_embeds") { + // VLMs have the input_ids part + embeddings calculation + // served as "inputs_embeds" input, so there's no need + // for additional work on the input here as this is done + // for "input_ids" + processed_input_ids = input_ids_node; + } else { + OPENVINO_ASSERT(processed_input_ids, "Couldn't process neither input_ids, nor inputs_embeds."); } ParameterVector kv_parameters; @@ -141,7 +152,7 @@ bool ov::pass::SDPAToPagedAttention::run_on_model(const std::shared_ptr(unsqueezed_input_ids, max_context_len, position_ids); + manager.register_pass(processed_input_ids, max_context_len, position_ids); manager.register_pass(max_context_len); manager.register_pass(max_context_len); manager.register_pass(unsqueezed_position_ids); diff --git a/src/frontends/tensorflow_common/src/op/squeeze.cpp b/src/frontends/tensorflow_common/src/op/squeeze.cpp index 62cd7e0b1ab950..93bff25f03a557 100644 --- a/src/frontends/tensorflow_common/src/op/squeeze.cpp +++ b/src/frontends/tensorflow_common/src/op/squeeze.cpp @@ -8,6 +8,8 @@ #include "helper_ops/complex_type_mark.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/floor_mod.hpp" +#include "openvino/op/string_tensor_pack.hpp" +#include "openvino/op/string_tensor_unpack.hpp" #include "openvino/op/subtract.hpp" #include "utils.hpp" @@ -48,6 +50,19 @@ OutputVector translate_squeeze_op(const NodeContext& node) { set_node_name(node.get_name(), squeeze); auto squeeze_complex = make_shared(squeeze, complex_part_type); return {squeeze_complex->output(0)}; + } else if (input.get_element_type() == element::string) { + ov::OutputVector unpacked_input = make_shared(input)->outputs(); + auto begins = unpacked_input[0]; + auto ends = unpacked_input[1]; + auto chars = unpacked_input[2]; + + // squeeze begins and ends by given dimensions + begins = std::make_shared(begins, axis_const); + ends = std::make_shared(ends, axis_const); + + ov::Output string_pack_result = make_shared(begins, ends, chars); + set_node_name(node.get_name(), string_pack_result.get_node_shared_ptr()); + return {string_pack_result}; } auto squeeze = make_shared(input, axis_const); diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 64cfbb1ce3d36a..ea36ca54cf7e27 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -47,7 +47,9 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { } } -dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::element::Type& elementType) { +std::optional DnnlExtensionUtils::ElementTypeToDataType( + const ov::element::Type& elementType, + DnnlExtensionUtils::nothrow_tag) noexcept { switch (elementType) { case ov::element::f32: return memory::data_type::f32; @@ -81,11 +83,18 @@ dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::elem case ov::element::undefined: return memory::data_type::undef; default: { - OPENVINO_THROW("CPU plugin does not support ", elementType.to_string(), " for use with oneDNN."); + return {}; } } } +dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::element::Type& elementType, + DnnlExtensionUtils::throw_tag) { + auto&& result = ElementTypeToDataType(elementType, nothrow_tag{}); + OPENVINO_ASSERT(result, "CPU plugin does not support ", elementType.to_string(), " for use with oneDNN."); + return result.value(); +} + ov::element::Type DnnlExtensionUtils::DataTypeToElementType(const dnnl::memory::data_type& dataType) { switch (dataType) { case memory::data_type::f32: diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h index 81f83048cb1f46..49da38356287ca 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.h +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.h @@ -8,6 +8,7 @@ */ #pragma once +#include #include #include "common/c_types_map.hpp" @@ -25,9 +26,18 @@ class Shape; class IMemory; class DnnlExtensionUtils { +public: + struct throw_tag {}; + struct nothrow_tag {}; + public: static uint8_t sizeOfDataType(dnnl::memory::data_type dataType); - static dnnl::memory::data_type ElementTypeToDataType(const ov::element::Type& elementType); + static dnnl::memory::data_type ElementTypeToDataType(const ov::element::Type& elementType, + throw_tag tag = throw_tag{}); + + static std::optional ElementTypeToDataType(const ov::element::Type& elementType, + nothrow_tag) noexcept; + static ov::element::Type DataTypeToElementType(const dnnl::memory::data_type& dataType); static Dim convertToDim(const dnnl::memory::dim& dim); static dnnl::memory::dim convertToDnnlDim(const Dim& dim); diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 094ebbff01eb6d..3ab561f9c5b79a 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -6,8 +6,6 @@ #include #include -#include -#include #include #include #include @@ -15,17 +13,12 @@ #include #include -#include "common/cpu_convert.h" #include "common/float16.hpp" #include "common/primitive_hashing_utils.hpp" #include "config.h" #include "cpu/ref_eltwise.hpp" #include "cpu_types.h" #include "dnnl_extension_utils.h" -#include "emitters/plugin/x64/jit_bf16_emitters.hpp" -#include "emitters/plugin/x64/jit_dnnl_emitters.hpp" -#include "emitters/plugin/x64/jit_eltwise_emitters.hpp" -#include "emitters/plugin/x64/jit_emitter.hpp" #include "fake_quantize.h" #include "input.h" #include "memory_desc/dnnl_blocked_memory_desc.h" @@ -55,14 +48,17 @@ # include "cpu/aarch64/cpu_isa_traits.hpp" # include "executors/aarch64/jit_eltwise.hpp" # include "kernels/aarch64/jit_uni_eltwise_generic.hpp" +#elif defined(OPENVINO_ARCH_X86_64) +# include "cpu/x64/cpu_isa_traits.hpp" +# include "kernels/x64/jit_uni_eltwise_generic.hpp" #endif using namespace dnnl::impl::utils; using namespace dnnl::impl::cpu; -#ifndef OPENVINO_ARCH_ARM64 +#if defined(OPENVINO_ARCH_X86_64) +using namespace ov::intel_cpu::x64; using namespace dnnl::impl::cpu::x64; -using namespace Xbyak; #endif #if defined(OPENVINO_ARCH_ARM64) @@ -70,8 +66,6 @@ using namespace ov::intel_cpu::aarch64; using namespace dnnl::impl::cpu::aarch64; #endif -#define GET_OFF(field) offsetof(jit_eltwise_call_args_ptrs, field) - namespace ov { namespace intel_cpu { namespace node { @@ -94,1052 +88,6 @@ bool jitIsSupported(const Node* node, } // namespace #endif -#if defined(OPENVINO_ARCH_X86_64) - -template -struct SupportedPrecisions { - void operator()(std::set>& precisions) { - precisions = T::get_supported_precisions(); - } -}; - -struct EltwiseEmitterContext { - std::shared_ptr emitter; - jit_generator* host; - cpu_isa_t host_isa; - const EltwiseData& opData; - ov::element::Type exec_prc; -}; - -template -struct EltwiseEmitter { - void operator()(EltwiseEmitterContext& ctx) { - ctx.emitter = std::make_shared(ctx.host, ctx.host_isa, ctx.exec_prc); - } -}; - -template <> -struct EltwiseEmitter { - void operator()(EltwiseEmitterContext& ctx) { - auto algKind = static_cast(ctx.opData.onednnAlgorithm); - ctx.emitter = std::make_shared(ctx.host, - ctx.host_isa, - algKind, - ctx.opData.alpha, - ctx.opData.beta, - ctx.exec_prc); - } -}; - -template <> -struct EltwiseEmitter { - void operator()(EltwiseEmitterContext& ctx) { - ctx.emitter = std::make_shared(ctx.host, - ctx.host_isa, - ctx.opData.alpha, - ctx.opData.beta, - ctx.opData.gamma, - ctx.exec_prc); - } -}; - -template <> -struct EltwiseEmitter { - void operator()(EltwiseEmitterContext& ctx) { - ctx.emitter = std::make_shared(ctx.host, - ctx.host_isa, - ctx.exec_prc, - ctx.opData.alpha, - ctx.opData.beta); - } -}; - -static void set_intersection(const std::set>& precisions1, - const std::set>& precisions2, - std::set>& intersection) { - std::map intersection_types; - - for (auto it1 = precisions1.begin(); it1 != precisions1.end(); ++it1) { - for (auto it2 = precisions2.begin(); it2 != precisions2.end(); ++it2) { - const auto& it1_precisions = *it1; - // all element types are equal - if (it1_precisions[0] == (*it2)[0]) { - // first precisions size is used - intersection_types.emplace(it1_precisions[0], it1_precisions.size()); - } - } - } - - for (auto it = intersection_types.begin(); it != intersection_types.end(); ++it) { - intersection.insert(std::vector(it->second, it->first)); - } -} - -ov::element::Type eltwise_precision_helper::get_precision(const size_t inputs_number, - const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], - const std::vector& eltwise_data) { - ov::element::Type exec_prc = ov::element::undefined; - - std::set> supported_precision_intersection = - get_supported_precisions(eltwise_data.front().algo); - - // for element-wise operations all inputs must to have the same precisions - auto has_same_precision = [](const std::vector& precisions) { - return std::all_of(precisions.begin(), precisions.end(), [&precisions](const element::Type precision) { - return precision == precisions[0]; - }); - }; - - assert(std::all_of(supported_precision_intersection.begin(), - supported_precision_intersection.end(), - has_same_precision)); - - for (size_t i = 1; i < eltwise_data.size(); ++i) { - std::set> prcs = get_supported_precisions(eltwise_data[i].algo); - std::set> prcs_intersect = {}; - - OPENVINO_ASSERT(std::all_of(prcs.begin(), prcs.end(), has_same_precision), - "for element-wise nodes all precisions have to be equal"); - - set_intersection(supported_precision_intersection, prcs, prcs_intersect); - - supported_precision_intersection = prcs_intersect; - } - - static const element::Type exec_precisions_priority[] = - {element::u8, element::i8, element::u16, element::i16, element::bf16, element::i32, element::f32}; - - for (const auto prc : exec_precisions_priority) { - if (std::any_of(supported_precision_intersection.begin(), - supported_precision_intersection.end(), - [&prc, &src_prc](const std::vector& precisions) { - return (std::find(precisions.begin(), precisions.end(), prc) != precisions.end()) && - (src_prc[0] == prc); - })) { - exec_prc = prc; - break; - } - } - - for (size_t i = 0; i < inputs_number; i++) { - if (src_prc[i] != exec_prc) { - exec_prc = ov::element::f32; - break; - } - } - - if (exec_prc == ov::element::undefined) { - OPENVINO_THROW("Eltwise jitter failed to specify execution precision for Eltwise node"); - } - - return exec_prc; -} - -std::set> eltwise_precision_helper::get_supported_precisions(const Algorithm& algo) { - std::set> precisions; - - OV_SWITCH(intel_cpu, - SupportedPrecisions, - precisions, - algo, - OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseGeluErf, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseGeluTanh, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), - OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), - OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), - OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), - OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), - OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), - OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), - OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter), - OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), - OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), - OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), - OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), - OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), - OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), - OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), - OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), - OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), - OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), - OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), - OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), - OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), - OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), - OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), - OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), - OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), - OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), - OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), - OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter), - OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), - OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter), - OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), - OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), - OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), - OV_CASE(Algorithm::EltwiseBitwiseAnd, jit_bitwise_and_emitter), - OV_CASE(Algorithm::EltwiseBitwiseNot, jit_bitwise_not_emitter), - OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), - OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); - - if (precisions.empty()) { - OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); - } - - return precisions; -} - -template -struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic) - - explicit jit_uni_eltwise_generic(const jit_eltwise_params& jep, - const std::vector& eltwise_data, - const std::vector& ops_list, - const dnnl::post_ops& post_ops) - : jit_uni_eltwise_kernel(jep), - jit_generator(jit_name()), - eltwise_data_(eltwise_data), - ops_list_(ops_list), - post_ops_(post_ops) {} - - void create_ker() override { - jit_generator::create_kernel(); - ker_ = (decltype(ker_))jit_ker(); - } - - void generate() override { - auto const exec_prc = eltwise_precision_helper::get_precision(jep_.inputs_number, jep_.src_prc, eltwise_data_); - - eltwise_emitter = create_eltwise_emitter(eltwise_data_.front(), exec_prc); - for (size_t i = 1; i < eltwise_data_.size(); ++i) { - post_op_emitters.push_back(create_eltwise_emitter(eltwise_data_[i], exec_prc)); - } - - const auto& p = post_ops_.get(); - for (int i = 0; i < post_ops_.len(); ++i) { - if (!p->entry_[i].is_quantization()) { - OPENVINO_THROW("Eltwise jitter error. Unsupported post op detected"); - } - quantization_injectors.push_back(std::make_shared>(this, - p->entry_[i], - vmm_d_weights, - vmm_d_bias, - reg_d_weights, - reg_d_bias)); - } - - if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) { - auto const mode = jep_.do_output_saturation ? jit_uni_vcvtneps2bf16::conversion_mode::saturation_mode - : jit_uni_vcvtneps2bf16::conversion_mode::default_mode; - uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa, element::bf16, mode)); - } - - const auto& jep = jep_; - - this->preamble(); - - const int offset_count = jep.input_size - 1; - - // ptrs initializing - if (jep.use_runtime_ptrs) { - for (size_t i = 0; i < jep.inputs_number; i++) { - mov(start_to_offsets, ptr[reg_const_params + GET_OFF(src_offsets) + i * sizeof(size_t)]); - mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]); - for (int j = 0; j < offset_count; j++) { - mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]); - imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); - add(get_src_reg(i), reg_tmp_64); - } - } - - mov(start_to_offsets, ptr[reg_const_params + GET_OFF(dst_offsets)]); - mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]); - for (int j = 0; j < offset_count; j++) { - mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]); - imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); - add(reg_dst, reg_tmp_64); - } - - xor_(reg_oc_off, reg_oc_off); - - mov(reg_work_amount, ptr[reg_const_params + GET_OFF(work_amount)]); - } else { - auto init_ptrs_with_offsets = [this, offset_count](Reg64 pointer, const std::vector& offsets) { - for (int j = 0; j < offset_count; j++) { - if (jep_.dims[j] != 1 && offsets[j] != 0) { - mov(reg_tmp_64, offsets[j]); - imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); - add(pointer, reg_tmp_64); - } - } - }; - - for (size_t i = 0; i < jep.inputs_number; i++) { - mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]); - init_ptrs_with_offsets(get_src_reg(i), jep.src_offsets[i]); - } - - mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]); - init_ptrs_with_offsets(reg_dst, jep.dst_offsets); - - xor_(reg_oc_off, reg_oc_off); - init_ptrs_with_offsets(reg_oc_off, jep.oc_offsets); - - mov(reg_work_amount, jep.work_amount); - } - - mov(reg_post_op_ptrs, ptr[reg_const_params + GET_OFF(post_op_data)]); - - Xbyak::Label unroll_loop_label; - Xbyak::Label unroll_loop_end_label; - Xbyak::Label main_loop_label; - Xbyak::Label main_loop_end_label; - Xbyak::Label tail_loop_label; - Xbyak::Label tail_loop_end_label; - - if (isa == x64::avx512_core) { - vpxord(vmm_zero, vmm_zero, vmm_zero); - } - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] == 1) { - load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, true); - } - } - - size_t min_src_size = jep.dst_size; - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - min_src_size = std::min(min_src_size, jep.src_size[i]); - } - } - if (jep_.oc_size > 1) { - min_src_size = std::min(min_src_size, jep_.oc_size); - } - - if (min_src_size != jep.dst_size) { - bool is_valid_configuration = true; - if (jep.dst_size % min_src_size != 0) { - is_valid_configuration = false; - } - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) { - is_valid_configuration = false; - } - } - - if (jep_.oc_size > 1 && jep_.oc_size != min_src_size && jep_.oc_size != jep.dst_size) { - is_valid_configuration = false; - } - - if (!is_valid_configuration) { - OPENVINO_THROW("Eltwise jitter has invalid configuration for Eltwise node"); - } - - L(unroll_loop_label); - { - size_t loop_step = min_src_size; - size_t vec_step = cpu_isa_traits::vlen / exec_prc.size(); - - cmp(reg_work_amount, loop_step); - jl(unroll_loop_end_label, T_NEAR); - - for (size_t j = 0; j < min_src_size / vec_step; j++) { - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - load_vector(get_vmm_reg(i), - ptr[get_src_reg(i) + j * vec_step * jep.src_prc[i].size()], - jep.src_prc[i], - exec_prc, - false); - } - } - - compute_eltwise_op(); - - apply_post_ops(false, jep_.oc_size > 1 ? j * vec_step * sizeof(float) : 0); - - store_vector(ptr[reg_dst + j * vec_step * jep.dst_prc.size()], vmm_dst, exec_prc, jep.dst_prc); - } - - size_t tail_start = min_src_size - min_src_size % vec_step; - for (size_t j = tail_start; j < min_src_size; j++) { - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - load_scalar(get_xmm_reg(i), - ptr[get_src_reg(i) + j * jep.src_prc[i].size()], - jep.src_prc[i], - exec_prc); - } - } - - compute_eltwise_op(); - - apply_post_ops(true, jep_.oc_size > 1 ? j * sizeof(float) : 0); - - store_scalar(ptr[reg_dst + j * jep.dst_prc.size()], - xmm_dst, - exec_prc, - jep.dst_prc, - jep.do_output_saturation); - } - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] == jep.dst_size) { - add(get_src_reg(i), jep.src_prc[i].size() * loop_step); - } - } - - add(reg_dst, jep.dst_prc.size() * loop_step); - sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) { - add(reg_oc_off, loop_step * sizeof(float)); - } - - jmp(unroll_loop_label, T_NEAR); - } - - L(unroll_loop_end_label); - } - - if (min_src_size == jep.dst_size) { - L(main_loop_label); - { - size_t loop_step = cpu_isa_traits::vlen / exec_prc.size(); - - cmp(reg_work_amount, loop_step); - jl(main_loop_end_label, T_NEAR); - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, false); - } - } - - compute_eltwise_op(); - - apply_post_ops(false); - - store_vector(ptr[reg_dst], vmm_dst, exec_prc, jep.dst_prc); - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - add(get_src_reg(i), jep.src_prc[i].size() * loop_step); - } - } - - add(reg_dst, jep.dst_prc.size() * loop_step); - sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1) { - add(reg_oc_off, loop_step * sizeof(float)); - } - - jmp(main_loop_label, T_NEAR); - } - - L(main_loop_end_label); - } - - L(tail_loop_label); - { - size_t loop_step = 1; - - cmp(reg_work_amount, loop_step); - jl(tail_loop_end_label, T_NEAR); - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - load_scalar(get_xmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc); - } - } - - compute_eltwise_op(); - - apply_post_ops(true); - - store_scalar(ptr[reg_dst], xmm_dst, exec_prc, jep.dst_prc, jep.do_output_saturation); - - for (size_t i = 0; i < jep.inputs_number; i++) { - if (jep.src_size[i] != 1) { - add(get_src_reg(i), jep.src_prc[i].size() * loop_step); - } - } - - add(reg_dst, jep.dst_prc.size() * loop_step); - sub(reg_work_amount, loop_step); - if (jep_.oc_size > 1) { - add(reg_oc_off, loop_step * sizeof(float)); - } - - jmp(tail_loop_label, T_NEAR); - } - - L(tail_loop_end_label); - - this->postamble(); - - if (uni_vcvtneps2bf16) { - uni_vcvtneps2bf16->emit_data(); - } - - eltwise_emitter->emit_data(); - for (size_t i = 0; i < post_op_emitters.size(); i++) { - post_op_emitters[i]->emit_data(); - } - } - -private: - using Vmm = typename conditional3::type; - - Reg64 get_src_reg(int idx) { - return Reg64(r8.getIdx() + idx); - } - - Vmm get_vmm_reg(int idx) { - return Vmm(1 + idx); - } - - Vmm get_aux_vmm(int idx) { - return Vmm(10 + idx); - } - - Xmm get_xmm_reg(int idx) { - return Xmm(get_vmm_reg(idx).getIdx()); - } - - Reg64 reg_post_op_ptrs = rax; - Reg64 start_to_offsets = reg_post_op_ptrs; // rax - Reg64 reg_dst = rbx; - Reg64 reg_work_amount = rdx; - - Reg64 reg_oc_off = abi_not_param1; - Reg64 reg_const_params = abi_param1; - Reg64 reg_indexes = abi_param2; // reg_d_bias - - Reg8 reg_tmp_8 = Reg8(r15.getIdx()); - Reg16 reg_tmp_16 = Reg16(r15.getIdx()); - Reg32 reg_tmp_32 = Reg32(r15.getIdx()); - Reg64 reg_tmp_64 = Reg64(r15.getIdx()); - - Reg64 reg_d_weights = rbp; - Reg64 reg_d_bias = rsi; - - Vmm vmm_dst = Vmm(9); - Xmm xmm_dst = Xmm(9); - - Vmm vmm_d_weights = Vmm(12); - Vmm vmm_d_bias = Vmm(13); - Vmm vmm_zero = Vmm(15); - - std::shared_ptr uni_vcvtneps2bf16; - - std::shared_ptr eltwise_emitter = nullptr; - std::vector> post_op_emitters = {}; - - std::vector>> quantization_injectors = {}; - - const std::vector& eltwise_data_; - const std::vector& ops_list_; - const dnnl::post_ops& post_ops_; - - std::shared_ptr create_eltwise_emitter(const EltwiseData& data, ov::element::Type exec_prec) { - EltwiseEmitterContext ctx = {nullptr, this, isa, data, exec_prec}; - - OV_SWITCH(intel_cpu, - EltwiseEmitter, - ctx, - data.algo, - OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseGeluErf, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseGeluTanh, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), - OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), - OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), - OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), - OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), - OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), - OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), - OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter), - OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), - OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), - OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), - OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), - OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), - OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), - OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), - OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), - OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), - OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), - OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), - OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), - OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), - OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), - OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), - OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), - OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), - OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), - OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), - OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter), - OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), - OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter), - OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), - OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), - OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), - OV_CASE(Algorithm::EltwiseBitwiseAnd, jit_bitwise_and_emitter), - OV_CASE(Algorithm::EltwiseBitwiseNot, jit_bitwise_not_emitter), - OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), - OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); - - if (!ctx.emitter) { - OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); - } - - return ctx.emitter; - } - - inline void compute_eltwise_op() { - std::vector in_idxs; - std::vector aux_idxs; - for (size_t i = 0; i < eltwise_emitter->get_inputs_num(); i++) { - in_idxs.push_back(get_vmm_reg(i).getIdx()); - } - for (size_t i = 0; i < eltwise_emitter->aux_vecs_count(); i++) { - aux_idxs.push_back(get_aux_vmm(i).getIdx()); - } - - std::vector out_idxs; - out_idxs.push_back(vmm_dst.getIdx()); - - eltwise_emitter->emit_code(in_idxs, out_idxs, aux_idxs); - } - - inline void apply_post_ops(bool is_scalar, int offset = 0) { - int input_idx = eltwise_emitter->get_inputs_num(); - int eltwise_post_op_idx = 0; - int quantization_post_op_idx = 0; - for (size_t i = 1; i < ops_list_.size(); i++) { - if (ops_list_[i] == ov::intel_cpu::Type::Eltwise) { - std::vector in_idxs; - std::vector aux_idxs; - in_idxs.push_back(vmm_dst.getIdx()); - for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_num(); j++) { - in_idxs.push_back(get_vmm_reg(input_idx++).getIdx()); - } - for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->aux_vecs_count(); j++) { - aux_idxs.push_back(get_aux_vmm(j).getIdx()); - } - - std::vector out_idxs; - out_idxs.push_back(vmm_dst.getIdx()); - - post_op_emitters[eltwise_post_op_idx]->emit_code(in_idxs, out_idxs, aux_idxs); - - eltwise_post_op_idx++; - } else if (ops_list_[i] == ov::intel_cpu::Type::FakeQuantize) { - auto& p = post_ops_.get()->entry_[quantization_post_op_idx]; - bool do_dequantization = p.quantization.alg == dnnl::impl::alg_kind::quantization_quantize_dequantize; - bool do_rounding = do_dequantization || jep_.dst_prc == ov::element::f32 || i != ops_list_.size() - 1; - int s_idx = vmm_dst.getIdx(); - - size_t ptrs_table_off = - quantization_post_op_idx * quantization_injectors[quantization_post_op_idx]->memoryStep(); - - quantization_injectors[quantization_post_op_idx]->init_crop_ptrs(reg_post_op_ptrs + ptrs_table_off, - reg_oc_off); - quantization_injectors[quantization_post_op_idx]->compute_crop(s_idx, - s_idx + 1, - offset, - is_scalar, - jep_.oc_size == 1); - - quantization_injectors[quantization_post_op_idx]->init_input_scale_shift_ptrs( - reg_post_op_ptrs + ptrs_table_off, - reg_oc_off); - quantization_injectors[quantization_post_op_idx] - ->compute_input_scale_shift(s_idx, s_idx + 1, offset, do_rounding, is_scalar, jep_.oc_size == 1); - - quantization_injectors[quantization_post_op_idx]->init_output_scale_shift_ptrs( - reg_post_op_ptrs + ptrs_table_off, - reg_oc_off); - quantization_injectors[quantization_post_op_idx]->compute_output_scale_shift(s_idx, - s_idx + 1, - offset, - is_scalar, - jep_.oc_size == 1); - - quantization_post_op_idx++; - } else { - OPENVINO_THROW("Unexpected: Eltwise jit kernel: unexpected operation type"); - } - } - } - - inline void load_vector(Vmm vmm_src, - const Xbyak::Address& op, - ov::element::Type src_prc, - ov::element::Type dst_prc, - bool broadcast) { - Xmm xmm_src = Xmm(vmm_src.getIdx()); - - if (src_prc == dst_prc) { - if (broadcast) { - load_scalar(xmm_src, op, src_prc, dst_prc); - uni_vbroadcastss(vmm_src, xmm_src); - } else { - uni_vmovups(vmm_src, op); - } - return; - } - - if (broadcast) { - load_scalar(xmm_src, op, src_prc, dst_prc); - uni_vbroadcastss(vmm_src, xmm_src); - } else { - switch (src_prc) { - case ov::element::f32: - case ov::element::i32: - uni_vmovups(vmm_src, op); - break; - case ov::element::bf16: - vpmovzxwd(vmm_src, op); - uni_vpslld(vmm_src, vmm_src, 16); - break; - case ov::element::f16: - vcvtph2ps(vmm_src, op); - break; - case ov::element::u16: - uni_vpmovzxwd(vmm_src, op); - break; - case ov::element::i16: - uni_vpmovsxwd(vmm_src, op); - break; - case ov::element::i8: - uni_vpmovsxbd(vmm_src, op); - break; - case ov::element::u8: - uni_vpmovzxbd(vmm_src, op); - break; - default: - OPENVINO_THROW("unknown src_prc"); - } - - switch (dst_prc) { - case ov::element::f32: - if (!src_prc.is_real()) { - uni_vcvtdq2ps(vmm_src, vmm_src); - } - break; - case ov::element::i32: - if (src_prc.is_real()) { - uni_vcvtps2dq(vmm_src, vmm_src); - } - break; - default: - OPENVINO_THROW("unknown dst_prc"); - } - } - } - - inline void load_scalar(Xmm xmm_src, - const Xbyak::Address& op, - ov::element::Type src_prc, - ov::element::Type dst_prc) { - if (src_prc == dst_prc) { - switch (src_prc.size()) { - case 4: - uni_vmovss(xmm_src, op); - break; - case 1: - mov(reg_tmp_8, op); - movzx(reg_tmp_32, reg_tmp_8); - uni_vmovd(xmm_src, reg_tmp_32); - break; - default: - OPENVINO_THROW("unknown prc"); - } - return; - } - - switch (src_prc) { - case ov::element::f32: - case ov::element::i32: - uni_vmovss(xmm_src, op); - break; - case ov::element::bf16: - if (isa == x64::avx2_vnni_2) { - vbcstnebf162ps(xmm_src, op); - } else { - uni_vpinsrw(xmm_src, xmm_src, op, 0); - uni_vpslld(xmm_src, xmm_src, 16); - } - break; - case ov::element::f16: - if (isa == x64::avx2_vnni_2) { - vbcstnesh2ps(xmm_src, op); - } else { - vcvtph2ps(xmm_src, op); - } - break; - case ov::element::i16: - uni_vpinsrw(xmm_src, xmm_src, op, 0); - uni_vpmovsxwd(xmm_src, op); - break; - case ov::element::u16: - uni_vpinsrw(xmm_src, xmm_src, op, 0); - uni_vpmovzxwd(xmm_src, op); - break; - case ov::element::i8: - movsx(reg_tmp_32, op); - uni_vmovq(xmm_src, reg_tmp_64); - break; - case ov::element::u8: - movzx(reg_tmp_32, op); - uni_vmovq(xmm_src, reg_tmp_64); - break; - default: - OPENVINO_THROW("unknown src_prc"); - } - - switch (dst_prc) { - case ov::element::f32: - if (!src_prc.is_real()) { - uni_vcvtdq2ps(xmm_src, xmm_src); - } - break; - case ov::element::i32: - if (src_prc.is_real()) { - uni_vcvtps2dq(xmm_src, xmm_src); - } - break; - default: - OPENVINO_THROW("unknown dst_prc"); - } - } - - inline void store_vector(const Xbyak::Address& op, - Vmm vmm_dst, - ov::element::Type src_prc, - ov::element::Type dst_prc) { - Xmm xmm_dst = Xmm(vmm_dst.getIdx()); - Ymm ymm_dst = Ymm(vmm_dst.getIdx()); - - if (src_prc == dst_prc) { - uni_vmovups(op, vmm_dst); - return; - } - - switch (src_prc) { - case ov::element::f32: - if (!dst_prc.is_real()) { - uni_vcvtps2dq(vmm_dst, vmm_dst); - } - break; - case ov::element::i32: - if (dst_prc.is_real()) { - uni_vcvtdq2ps(vmm_dst, vmm_dst); - } - break; - default: - OPENVINO_THROW("unknown src_prc"); - } - - switch (dst_prc) { - case ov::element::f32: - case ov::element::i32: - uni_vmovups(op, vmm_dst); - break; - case ov::element::bf16: - if (isa == x64::avx512_core) { - uni_vcvtneps2bf16->emit_code({static_cast(vmm_dst.getIdx())}, - {static_cast(ymm_dst.getIdx())}); - vmovdqu16(op, ymm_dst); - } else { - uni_vcvtneps2bf16->emit_code({static_cast(vmm_dst.getIdx())}, - {static_cast(xmm_dst.getIdx())}); - uni_vmovdqu(op, xmm_dst); - } - break; - case ov::element::f16: - vcvtps2ph(op, vmm_dst, 0x4); - break; - case ov::element::i16: - if (isa == x64::avx512_core) { - vpmovsdw(op, vmm_dst); - } else { - uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vpermq(ymm_dst, ymm_dst, 0x08); - uni_vmovdqu(op, xmm_dst); - } else { - movq(op, xmm_dst); - } - } - break; - case ov::element::u16: - if (isa == x64::avx512_core) { - vpmaxsd(vmm_dst, vmm_zero, vmm_dst); - vpmovusdw(op, vmm_dst); - } else { - uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vpermq(ymm_dst, ymm_dst, 0x08); - uni_vmovdqu(op, xmm_dst); - } else { - movq(op, xmm_dst); - } - } - break; - case ov::element::i8: - if (isa == x64::avx512_core) { - vpmovsdb(op, vmm_dst); - } else { - uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vpermq(ymm_dst, ymm_dst, 0x08); - } - uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vmovq(op, xmm_dst); - } else { - movd(op, xmm_dst); - } - } - break; - case ov::element::u8: - if (isa == x64::avx512_core) { - vpmaxsd(vmm_dst, vmm_zero, vmm_dst); - vpmovusdb(op, vmm_dst); - } else { - uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vpermq(ymm_dst, ymm_dst, 0x08); - } - uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); - if (isa != x64::sse41) { - vmovq(op, xmm_dst); - } else { - movd(op, xmm_dst); - } - } - break; - default: - OPENVINO_THROW("unknown dst_prc"); - } - } - - inline void store_scalar(const Xbyak::Address& op, - Xmm xmm_dst, - ov::element::Type src_prc, - ov::element::Type dst_prc, - const bool do_output_saturation) { - if (src_prc == dst_prc) { - switch (src_prc.size()) { - case 4: - uni_vmovss(op, xmm_dst); - break; - case 1: - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_8); - break; - default: - OPENVINO_THROW("unknown prc"); - } - return; - } - - switch (src_prc) { - case ov::element::f32: - if (!dst_prc.is_real()) { - uni_vcvtps2dq(xmm_dst, xmm_dst); - } - break; - case ov::element::i32: - if (dst_prc.is_real()) { - uni_vcvtdq2ps(xmm_dst, xmm_dst); - } - break; - default: - OPENVINO_THROW("unknown src_prc"); - } - - switch (dst_prc) { - case ov::element::f32: - case ov::element::i32: - uni_vmovss(op, xmm_dst); - break; - case ov::element::bf16: - if (do_output_saturation) { - uni_vpsrld(xmm_dst, xmm_dst, 16); - } else { - uni_vcvtneps2bf16->emit_code({static_cast(xmm_dst.getIdx())}, - {static_cast(xmm_dst.getIdx())}); - } - uni_vpextrw(op, xmm_dst, 0x0); - break; - case ov::element::f16: - vcvtps2ph(xmm_dst, xmm_dst, 0x4); - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_16); - break; - case ov::element::i16: - uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst); - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_16); - break; - case ov::element::u16: - uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst); - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_16); - break; - case ov::element::i8: - uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst); - uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst); - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_8); - break; - case ov::element::u8: - uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst); - uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst); - movq(reg_tmp_64, xmm_dst); - mov(op, reg_tmp_8); - break; - default: - OPENVINO_THROW("unknown dst_prc"); - } - } -}; - -#endif // OPENVINO_ARCH_X86_64 - Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shared_ptr& op) { const auto const1 = ov::as_type_ptr(op->get_input_node_shared_ptr(0)); const auto const2 = ov::as_type_ptr(op->get_input_node_shared_ptr(1)); @@ -1718,12 +666,15 @@ class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { }); #if defined(OPENVINO_ARCH_X86_64) - if (mayiuse(x64::avx512_core)) { - _pKernel.reset(new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); - } else if (mayiuse(x64::avx2)) { - _pKernel.reset(new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); - } else if (mayiuse(x64::sse41)) { - _pKernel.reset(new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); + if (mayiuse(dnnl::impl::cpu::x64::avx512_core)) { + _pKernel.reset( + new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); + } else if (mayiuse(dnnl::impl::cpu::x64::avx2)) { + _pKernel.reset( + new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); + } else if (mayiuse(dnnl::impl::cpu::x64::sse41)) { + _pKernel.reset( + new jit_uni_eltwise_generic(jep, eltwise_data, ops_list, post_ops)); } else { OPENVINO_THROW("Can't create jit eltwise kernel"); } @@ -2404,8 +1355,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { bool canUseOptimizedImpl = mayiuse(dnnl::impl::cpu::aarch64::asimd) && (getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK); bool canUseOptimizedShapeAgnosticImpl = isDynamicNode() && canUseOptimizedImpl; -#else - bool canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK; +#elif defined(OPENVINO_ARCH_X86_64) + bool canUseOptimizedImpl = + mayiuse(dnnl::impl::cpu::x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK; // TODO: Add EltwiseLog algorithm support for JIT implementation canUseOptimizedImpl &= !one_of(getAlgorithm(), Algorithm::EltwiseLog, @@ -2413,6 +1365,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() { Algorithm::EltwiseBitwiseRightShift); bool canUseOptimizedShapeAgnosticImpl = isDynamicNode() && canUseOptimizedImpl; +#else + bool canUseOptimizedImpl = false; + bool canUseOptimizedShapeAgnosticImpl = false; #endif if (!canUseOptimizedImpl && !fusedWith.empty()) { @@ -2633,7 +1588,11 @@ void Eltwise::initSupportedPrimitiveDescriptors() { // same for disabled collapse dims } else if (lt == Blocked && shape.getRank() != 1 && (shape.getMinDims()[1] != Shape::UNDEFINED_DIM && shape.getMinDims()[1] > 1)) { - size_t blockSize = dnnl::impl::cpu::x64::mayiuse(x64::avx512_core) ? 16 : 8; +#if defined(OPENVINO_ARCH_X86_64) + size_t blockSize = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ? 16 : 8; +#else + size_t blockSize = 1; +#endif VectorDims blocks = dims; VectorDims order(blocks.size()); std::iota(order.begin(), order.end(), 0); @@ -2721,12 +1680,12 @@ void Eltwise::initSupportedPrimitiveDescriptors() { } else { OPENVINO_THROW("not supported architecture"); } -#else - if (mayiuse(x64::avx512_core)) { +#elif defined(OPENVINO_ARCH_X86_64) + if (mayiuse(dnnl::impl::cpu::x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; - } else if (mayiuse(x64::avx2)) { + } else if (mayiuse(dnnl::impl::cpu::x64::avx2)) { impl_type = impl_desc_type::jit_avx2; - } else if (mayiuse(x64::sse41)) { + } else if (mayiuse(dnnl::impl::cpu::x64::sse41)) { impl_type = impl_desc_type::jit_sse42; } #endif @@ -3392,10 +2351,12 @@ bool Eltwise::canFuse(const NodePtr& node) const { (!jitIsSupported(eltwise, eltwise->getAlpha(), eltwise->getBeta(), eltwise->getGamma()))) { return false; } -#else - if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) { +#elif defined(OPENVINO_ARCH_X86_64) + if (!mayiuse(dnnl::impl::cpu::x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) { return false; } +#else + return false; #endif // TODO: EltwiseLog is supported only via reference executor diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h index b8e544eb49fd88..5175ca61cbf246 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.h +++ b/src/plugins/intel_cpu/src/nodes/eltwise.h @@ -14,65 +14,13 @@ #include "dnnl_postops_composer_legacy.h" #include "executors/eltwise_list.hpp" #include "nodes/executors/eltwise.hpp" -#include "nodes/kernels/jit_eltwise_call_args_ptrs.hpp" - -#if defined(OPENVINO_ARCH_ARM64) -# include "kernels/aarch64/jit_uni_eltwise_generic.hpp" -#endif +#include "nodes/kernels/jit_eltwise_common.hpp" namespace ov { namespace intel_cpu { namespace node { -#ifndef OPENVINO_ARCH_ARM64 - -struct jit_eltwise_params { - size_t inputs_number; - size_t input_size; - - ov::element::Type src_prc[MAX_ELTWISE_INPUTS]; - ov::element::Type dst_prc; - - VectorDims dims; - VectorDims src_offsets[MAX_ELTWISE_INPUTS]; - VectorDims dst_offsets; - VectorDims oc_offsets; - - size_t src_size[MAX_ELTWISE_INPUTS]; - size_t dst_size; - size_t oc_size; - - size_t work_amount; - bool use_runtime_ptrs; - bool do_output_saturation; -}; - -struct jit_eltwise_call_args_indexes { - size_t indexes[MAX_ELTWISE_DIM_RANK]; -}; - -class Eltwise; - -struct jit_uni_eltwise_kernel { - void (*ker_)(const jit_eltwise_call_args_ptrs*, const jit_eltwise_call_args_indexes*); - - void operator()(const jit_eltwise_call_args_ptrs* const_args, const jit_eltwise_call_args_indexes* indexes) { - assert(ker_); - ker_(const_args, indexes); - } - - explicit jit_uni_eltwise_kernel(jit_eltwise_params jep) : ker_(nullptr), jep_(std::move(jep)) {} - virtual ~jit_uni_eltwise_kernel() {} - - virtual void create_ker() = 0; - - jit_eltwise_params jep_; -}; - -#endif - enum class EltwiseImplType { reference = 0, optimized = 1, optimizedShapeAgnostic = 2 }; - class Eltwise : public Node { public: class IEltwiseExecutor { @@ -218,16 +166,6 @@ class Eltwise : public Node { std::shared_ptr eltwiseExecPtr = nullptr; }; -class eltwise_precision_helper { -public: - static ov::element::Type get_precision(const size_t inputs_number, - const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], - const std::vector& eltwise_data); - -private: - static std::set> get_supported_precisions(const Algorithm& algo); -}; - } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index fdb08704647851..f12f2ead7a141f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -55,8 +55,22 @@ bool AclPoolingExecutor::isSupported(const TensorInfo& srcTensorInfo, DEBUG_LOG("NCHW + CEIL gives an accuracy problem in ACL AvgPool. ACL executor will not be created."); return false; } - DimensionRoundingType round = - (poolingAttrs.rounding == op::RoundingType::CEIL) ? DimensionRoundingType::CEIL : DimensionRoundingType::FLOOR; + DimensionRoundingType round; + switch (poolingAttrs.rounding) { + case op::RoundingType::FLOOR: + round = DimensionRoundingType::FLOOR; + break; + case op::RoundingType::CEIL: + round = DimensionRoundingType::CEIL; + break; + // CEIL_TORCH type is mapped to ACL CEIL type + case op::RoundingType::CEIL_TORCH: + round = DimensionRoundingType::CEIL; + break; + default: + DEBUG_LOG("Unknown rounding type: ", poolingAttrs.rounding); + return false; + } if (srcDimsSize == 5) { if (dstDescsSize > 1) { @@ -89,7 +103,12 @@ bool AclPoolingExecutor::isSupported(const TensorInfo& srcTensorInfo, pool_info->pool_type = pool_type; pool_info->exclude_padding = exclude_padding; if (dstDescsSize > 1) { - TensorInfo indTensorInfo = TensorInfo(shapeCast(*indDims), 1, arm_compute::DataType::U32, dataLayout); + auto indShape = shapeCast(*indDims); + if (dataLayout == arm_compute::DataLayout::NHWC) { + changeLayoutToNH_C({&indShape}); + } + // U32 is specified since this is the only data type supported by ACL + TensorInfo indTensorInfo = TensorInfo(indShape, 1, arm_compute::DataType::U32, dataLayout); arm_compute::Status s = arm_compute::NEPoolingLayer::validate(&srcTensorInfo, &dstTensorInfo, *pool_info, &indTensorInfo); if (!s) { @@ -178,10 +197,13 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, return false; } auto indDims = dstDescs[1]->getShape().getStaticDims(); - TensorInfo indTensorInfo = TensorInfo(shapeCast(indDims), - 1, - precisionToAclDataType(dstDescs[1]->getPrecision()), - getAclDataLayoutByMemoryDesc(dstDescs[1])); + auto indShape = shapeCast(indDims); + if (dstTensorInfo.data_layout() == arm_compute::DataLayout::NHWC) { + changeLayoutToNH_C({&indShape}); + } + // U32 is specified since this is the only data type supported by ACL + TensorInfo indTensorInfo = + TensorInfo(indShape, 1, arm_compute::DataType::U32, getAclDataLayoutByMemoryDesc(dstDescs[1])); indTensor.allocator()->init(indTensorInfo); exec_func = [this, pool_info]() -> std::unique_ptr { auto acl_op = std::make_unique(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp index 75b3d28eecf4aa..ea2c09516e9305 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.hpp @@ -79,7 +79,7 @@ class AclPoolingExecutorBuilder : public PoolingExecutorBuilder { return false; } - if (dstDescs.size() == 2u && dstDescs[1]->getPrecision() != ov::element::u32) { + if (dstDescs.size() == 2u && !one_of(dstDescs[1]->getPrecision(), ov::element::u32, ov::element::i32)) { DEBUG_LOG("AclPoolingExecutor supports U32 as indices precisions only. ", "Passed indices precision: ", dstDescs[1]->getPrecision()); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 0462e728917a18..6501b334097472 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -14,12 +14,6 @@ using namespace Xbyak_aarch64; using namespace dnnl::impl::cpu; using namespace dnnl::impl::cpu::aarch64; -void jit_uni_eltwise_kernel::operator()(const node::jit_eltwise_call_args_ptrs* const_args, - const jit_eltwise_call_args_indexes* indexes) { - assert(ker_); - ker_(const_args, indexes); -} - template jit_uni_eltwise_generic::jit_uni_eltwise_generic(jit_eltwise_params jep, std::vector eltwise_data, @@ -35,7 +29,8 @@ template void jit_uni_eltwise_generic::generate() { preamble(); - auto const exec_prc = eltwise_precision_helper::get_precision(jep_.inputs_number, jep_.src_prc, eltwise_data_); + static const std::vector exec_precisions_priority = {element::f16, element::f32}; + auto const exec_prc = eltwise_precision_helper::get_precision(jep_.inputs_number, jep_.src_prc, eltwise_data_, exec_precisions_priority); eltwise_emitter = create_eltwise_emitter(eltwise_data_.front(), exec_prc); for (size_t i = 1; i < eltwise_data_.size(); ++i) { @@ -52,11 +47,11 @@ void jit_uni_eltwise_generic::generate() { for (size_t i = 0; i < jep.inputs_number; i++) { ldr(start_to_offsets, ptr(reg_const_params, - static_cast(offsetof(node::jit_eltwise_call_args_ptrs, src_offsets) + + static_cast(offsetof(jit_eltwise_call_args_ptrs, src_offsets) + i * sizeof(size_t)))); ldr(get_src_reg(i), ptr(reg_const_params, - static_cast(offsetof(node::jit_eltwise_call_args_ptrs, src_ptr[0]) + i * sizeof(size_t)))); + static_cast(offsetof(jit_eltwise_call_args_ptrs, src_ptr[0]) + i * sizeof(size_t)))); XReg offset_reg = get_aux_gpr(0); // X_TMP_0; XReg index_reg = get_aux_gpr(1); // X_TMP_1; for (int j = 0; j < offset_count; j++) { @@ -67,8 +62,8 @@ void jit_uni_eltwise_generic::generate() { } ldr(start_to_offsets, - ptr(reg_const_params, static_cast(offsetof(node::jit_eltwise_call_args_ptrs, dst_offsets)))); - ldr(reg_dst, ptr(reg_const_params, static_cast(offsetof(node::jit_eltwise_call_args_ptrs, dst_ptr)))); + ptr(reg_const_params, static_cast(offsetof(jit_eltwise_call_args_ptrs, dst_offsets)))); + ldr(reg_dst, ptr(reg_const_params, static_cast(offsetof(jit_eltwise_call_args_ptrs, dst_ptr)))); XReg offset_reg = get_aux_gpr(0); // X_TMP_0; XReg index_reg = get_aux_gpr(1); // X_TMP_1; for (int j = 0; j < offset_count; j++) { @@ -80,7 +75,7 @@ void jit_uni_eltwise_generic::generate() { mov(reg_oc_off, 0); ldr(reg_work_amount, - ptr(reg_const_params, static_cast(offsetof(node::jit_eltwise_call_args_ptrs, work_amount)))); + ptr(reg_const_params, static_cast(offsetof(jit_eltwise_call_args_ptrs, work_amount)))); } else { auto init_ptrs_with_offsets = [this, offset_count, param2](XReg pointer, const std::vector& offsets) { for (int j = 0; j < offset_count; j++) { @@ -98,11 +93,11 @@ void jit_uni_eltwise_generic::generate() { for (size_t i = 0; i < jep.inputs_number; i++) { ldr(get_src_reg(i), ptr(param1, - static_cast(offsetof(node::jit_eltwise_call_args_ptrs, src_ptr) + i * sizeof(size_t)))); + static_cast(offsetof(jit_eltwise_call_args_ptrs, src_ptr) + i * sizeof(size_t)))); init_ptrs_with_offsets(get_src_reg(i), jep.src_offsets[i]); } - ldr(reg_dst, ptr(reg_const_params, static_cast(offsetof(node::jit_eltwise_call_args_ptrs, dst_ptr)))); + ldr(reg_dst, ptr(reg_const_params, static_cast(offsetof(jit_eltwise_call_args_ptrs, dst_ptr)))); init_ptrs_with_offsets(reg_dst, jep.dst_offsets); mov(reg_oc_off, 0); @@ -778,80 +773,21 @@ void jit_uni_eltwise_generic::apply_post_ops() { } } -namespace { +template struct jit_uni_eltwise_generic; +} // namespace aarch64 + +namespace { template struct SupportedPrecisions { void operator()(std::set>& precisions) { precisions = T::get_supported_precisions(); } }; - -static void set_intersection(const std::set>& precisions1, - const std::set>& precisions2, - std::set>& intersection) { - std::map intersection_types; - - for (auto it1 = precisions1.begin(); it1 != precisions1.end(); ++it1) { - for (auto it2 = precisions2.begin(); it2 != precisions2.end(); ++it2) { - const auto& it1_precisions = *it1; - // all element types are equal - if (it1_precisions[0] == (*it2)[0]) { - // first precisions size is used - intersection_types.emplace(it1_precisions[0], it1_precisions.size()); - } - } - } - - for (auto it = intersection_types.begin(); it != intersection_types.end(); ++it) { - intersection.insert(std::vector(it->second, it->first)); - } -} } // namespace -ov::element::Type eltwise_precision_helper::get_precision(const size_t inputs_number, - const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], - const std::vector& eltwise_data) { - ov::element::Type exec_prc = ov::element::undefined; - - const auto algorithm = eltwise_data.front().algo; - std::set> supported_precision_intersection = get_supported_precisions(algorithm); - for (size_t i = 1; i < eltwise_data.size(); ++i) { - std::set> prcs = get_supported_precisions(eltwise_data[i].algo); - std::set> prcs_intersect = {}; - - set_intersection(supported_precision_intersection, prcs, prcs_intersect); - - supported_precision_intersection = prcs_intersect; - } - - static const element::Type exec_precisions_priority[] = {element::f16, element::f32}; - - for (const auto prc : exec_precisions_priority) { - if (std::any_of(supported_precision_intersection.begin(), - supported_precision_intersection.end(), - [&prc](const std::vector& precisions) { - return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); - })) { - exec_prc = prc; - break; - } - } - - for (size_t i = 0; i < inputs_number; i++) { - if (src_prc[i] != exec_prc) { - exec_prc = ov::element::f32; - break; - } - } - - if (exec_prc == ov::element::undefined) { - OPENVINO_THROW("Eltwise jitter failed to specify execution precision for Eltwise node"); - } - - return exec_prc; -} +using namespace aarch64; std::set> eltwise_precision_helper::get_supported_precisions(const Algorithm& algo) { std::set> precisions; @@ -911,8 +847,5 @@ std::set> eltwise_precision_helper::get_supported_pre return precisions; } -template struct jit_uni_eltwise_generic; - -} // namespace aarch64 } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp index 107495a693431b..93958b7cad4755 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.hpp @@ -28,7 +28,7 @@ #include "emitters/plugin/aarch64/jit_eltwise_emitters.hpp" #include "emitters/plugin/aarch64/jit_emitter.hpp" -#include "nodes/kernels/jit_eltwise_call_args_ptrs.hpp" +#include "nodes/kernels/jit_eltwise_common.hpp" #include "utils/cpu_utils.hpp" #include "utils/general_utils.h" @@ -40,45 +40,6 @@ using namespace Xbyak_aarch64; using namespace dnnl::impl::cpu; using namespace dnnl::impl::cpu::aarch64; -struct jit_eltwise_params { - size_t inputs_number; - size_t input_size; - - ov::element::Type src_prc[MAX_ELTWISE_INPUTS]; - ov::element::Type dst_prc; - - VectorDims dims; - VectorDims src_offsets[MAX_ELTWISE_INPUTS]; - VectorDims dst_offsets; - VectorDims oc_offsets; - - size_t src_size[MAX_ELTWISE_INPUTS]; - size_t dst_size; - size_t oc_size; - - size_t work_amount; - bool use_runtime_ptrs; - bool do_output_saturation; -}; - -struct jit_eltwise_call_args_indexes { - size_t indexes[MAX_ELTWISE_DIM_RANK]; -}; - -struct jit_uni_eltwise_kernel { - void (*ker_)(const node::jit_eltwise_call_args_ptrs*, const jit_eltwise_call_args_indexes*); - - void operator()(const node::jit_eltwise_call_args_ptrs* const_args, const jit_eltwise_call_args_indexes* indexes); - - jit_uni_eltwise_kernel() {} - jit_uni_eltwise_kernel(jit_eltwise_params jep) : ker_(nullptr), jep_(std::move(jep)) {} - virtual ~jit_uni_eltwise_kernel() {} - - virtual void create_ker() = 0; - - jit_eltwise_params jep_; -}; - template struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { public: @@ -89,8 +50,6 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { std::vector ops_list, dnnl::post_ops post_ops); - jit_uni_eltwise_generic() {} - void create_ker() override { jit_generator::create_kernel(); ker_ = (decltype(ker_))jit_ker(); @@ -255,16 +214,6 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, jit_generator { std::vector> post_op_emitters; }; -class eltwise_precision_helper { -public: - static ov::element::Type get_precision(const size_t inputs_number, - const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], - const std::vector& eltwise_data); - -private: - static std::set> get_supported_precisions(const Algorithm& algo); -}; - } // namespace aarch64 } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_call_args_ptrs.hpp b/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_call_args_ptrs.hpp deleted file mode 100644 index 66f119ee839b14..00000000000000 --- a/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_call_args_ptrs.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include - -namespace ov { -namespace intel_cpu { -namespace node { - -#define MAX_ELTWISE_INPUTS 7 -#define MAX_ELTWISE_DIM_RANK 12 - -struct jit_eltwise_call_args_ptrs { - const void* src_ptr[MAX_ELTWISE_INPUTS]; - void* dst_ptr; - // ptr to array of post op inputs pointers (flat list) - const void** post_op_data; - - // shape agnostic kernel - size_t work_amount; - const void* src_offsets[MAX_ELTWISE_INPUTS]; - const void* dst_offsets; -}; - -} // namespace node -} // namespace intel_cpu -} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.cpp b/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.cpp new file mode 100644 index 00000000000000..0977a559fb132a --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "jit_eltwise_common.hpp" + +namespace ov { +namespace intel_cpu { + +static void set_intersection(const std::set>& precisions1, + const std::set>& precisions2, + std::set>& intersection) { + std::map intersection_types; + + for (auto it1 = precisions1.begin(); it1 != precisions1.end(); ++it1) { + for (auto it2 = precisions2.begin(); it2 != precisions2.end(); ++it2) { + const auto& it1_precisions = *it1; + // all element types are equal + if (it1_precisions[0] == (*it2)[0]) { + // first precisions size is used + intersection_types.emplace(it1_precisions[0], it1_precisions.size()); + } + } + } + + for (auto it = intersection_types.begin(); it != intersection_types.end(); ++it) { + intersection.insert(std::vector(it->second, it->first)); + } +} + +ov::element::Type eltwise_precision_helper::get_precision(const size_t inputs_number, + const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], + const std::vector& eltwise_data, + const std::vector& exec_precisions_priority) { + ov::element::Type exec_prc = ov::element::undefined; + + std::set> supported_precision_intersection = + get_supported_precisions(eltwise_data.front().algo); + + // for element-wise operations all inputs must to have the same precisions + auto has_same_precision = [](const std::vector& precisions) { + return std::all_of(precisions.begin(), precisions.end(), [&precisions](const element::Type precision) { + return precision == precisions[0]; + }); + }; + + assert(std::all_of(supported_precision_intersection.begin(), + supported_precision_intersection.end(), + has_same_precision)); + + for (size_t i = 1; i < eltwise_data.size(); ++i) { + std::set> prcs = get_supported_precisions(eltwise_data[i].algo); + std::set> prcs_intersect = {}; + + OPENVINO_ASSERT(std::all_of(prcs.begin(), prcs.end(), has_same_precision), + "for element-wise nodes all precisions have to be equal"); + + set_intersection(supported_precision_intersection, prcs, prcs_intersect); + + supported_precision_intersection = prcs_intersect; + } + + for (const auto prc : exec_precisions_priority) { + if (std::any_of(supported_precision_intersection.begin(), + supported_precision_intersection.end(), + [&prc, &src_prc](const std::vector& precisions) { + return (std::find(precisions.begin(), precisions.end(), prc) != precisions.end()) && + (src_prc[0] == prc); + })) { + exec_prc = prc; + break; + } + } + + for (size_t i = 0; i < inputs_number; i++) { + if (src_prc[i] != exec_prc) { + exec_prc = ov::element::f32; + break; + } + } + + if (exec_prc == ov::element::undefined) { + OPENVINO_THROW("Eltwise jitter failed to specify execution precision for Eltwise node"); + } + + return exec_prc; +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.hpp new file mode 100644 index 00000000000000..9e6ffa54e815cf --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/jit_eltwise_common.hpp @@ -0,0 +1,85 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "cpu_types.h" +#include "nodes/executors/eltwise.hpp" +#include "openvino/core/type/element_type.hpp" + +namespace ov { +namespace intel_cpu { + +#define MAX_ELTWISE_INPUTS 7 +#define MAX_ELTWISE_DIM_RANK 12 + +struct jit_eltwise_call_args_ptrs { + const void* src_ptr[MAX_ELTWISE_INPUTS]; + void* dst_ptr; + // ptr to array of post op inputs pointers (flat list) + const void** post_op_data; + + // shape agnostic kernel + size_t work_amount; + const void* src_offsets[MAX_ELTWISE_INPUTS]; + const void* dst_offsets; +}; + +struct jit_eltwise_params { + size_t inputs_number; + size_t input_size; + + ov::element::Type src_prc[MAX_ELTWISE_INPUTS]; + ov::element::Type dst_prc; + + VectorDims dims; + VectorDims src_offsets[MAX_ELTWISE_INPUTS]; + VectorDims dst_offsets; + VectorDims oc_offsets; + + size_t src_size[MAX_ELTWISE_INPUTS]; + size_t dst_size; + size_t oc_size; + + size_t work_amount; + bool use_runtime_ptrs; + bool do_output_saturation; +}; + +struct jit_eltwise_call_args_indexes { + size_t indexes[MAX_ELTWISE_DIM_RANK]; +}; + +struct jit_uni_eltwise_kernel { + void (*ker_)(const jit_eltwise_call_args_ptrs*, const jit_eltwise_call_args_indexes*); + + void operator()(const jit_eltwise_call_args_ptrs* const_args, const jit_eltwise_call_args_indexes* indexes) { + assert(ker_); + ker_(const_args, indexes); + } + + explicit jit_uni_eltwise_kernel(jit_eltwise_params jep) : ker_(nullptr), jep_(std::move(jep)) {} + virtual ~jit_uni_eltwise_kernel() {} + + virtual void create_ker() = 0; + + jit_eltwise_params jep_; +}; + +class eltwise_precision_helper { +public: + static ov::element::Type get_precision(const size_t inputs_number, + const ov::element::Type (&src_prc)[MAX_ELTWISE_INPUTS], + const std::vector& eltwise_data, + const std::vector& exec_precisions_priority); + +private: + static std::set> get_supported_precisions(const Algorithm& algo); +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.cpp new file mode 100644 index 00000000000000..9f8080f96b0468 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.cpp @@ -0,0 +1,947 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "jit_uni_eltwise_generic.hpp" + +#include +#include + +#include "emitters/plugin/x64/jit_dnnl_emitters.hpp" +#include "emitters/plugin/x64/jit_eltwise_emitters.hpp" +#include "nodes/eltwise.h" + +namespace ov { +namespace intel_cpu { +namespace x64 { + +using namespace Xbyak; +using namespace dnnl::impl::cpu; +using namespace dnnl::impl::cpu::x64; + +#define GET_OFF(field) offsetof(jit_eltwise_call_args_ptrs, field) + +template +jit_uni_eltwise_generic::jit_uni_eltwise_generic(const jit_eltwise_params& jep, + const std::vector& eltwise_data, + const std::vector& ops_list, + const dnnl::post_ops& post_ops) + : jit_uni_eltwise_kernel(jep), + jit_generator(jit_name()), + eltwise_data_(eltwise_data), + ops_list_(ops_list), + post_ops_(post_ops) {} + +template +void jit_uni_eltwise_generic::generate() { + static const std::vector exec_precisions_priority = + {element::u8, element::i8, element::u16, element::i16, element::bf16, element::i32, element::f32}; + auto const exec_prc = eltwise_precision_helper::get_precision(jep_.inputs_number, + jep_.src_prc, + eltwise_data_, + exec_precisions_priority); + + eltwise_emitter = create_eltwise_emitter(eltwise_data_.front(), exec_prc); + for (size_t i = 1; i < eltwise_data_.size(); ++i) { + post_op_emitters.push_back(create_eltwise_emitter(eltwise_data_[i], exec_prc)); + } + + const auto& p = post_ops_.get(); + for (int i = 0; i < post_ops_.len(); ++i) { + if (!p->entry_[i].is_quantization()) { + OPENVINO_THROW("Eltwise jitter error. Unsupported post op detected"); + } + quantization_injectors.push_back(std::make_shared>(this, + p->entry_[i], + vmm_d_weights, + vmm_d_bias, + reg_d_weights, + reg_d_bias)); + } + + if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) { + auto const mode = jep_.do_output_saturation ? jit_uni_vcvtneps2bf16::conversion_mode::saturation_mode + : jit_uni_vcvtneps2bf16::conversion_mode::default_mode; + uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa, element::bf16, mode)); + } + + const auto& jep = jep_; + + this->preamble(); + + const int offset_count = jep.input_size - 1; + + // ptrs initializing + if (jep.use_runtime_ptrs) { + for (size_t i = 0; i < jep.inputs_number; i++) { + mov(start_to_offsets, ptr[reg_const_params + GET_OFF(src_offsets) + i * sizeof(size_t)]); + mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]); + for (int j = 0; j < offset_count; j++) { + mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]); + imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); + add(get_src_reg(i), reg_tmp_64); + } + } + + mov(start_to_offsets, ptr[reg_const_params + GET_OFF(dst_offsets)]); + mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]); + for (int j = 0; j < offset_count; j++) { + mov(reg_tmp_64, ptr[start_to_offsets + j * sizeof(size_t)]); + imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); + add(reg_dst, reg_tmp_64); + } + + xor_(reg_oc_off, reg_oc_off); + + mov(reg_work_amount, ptr[reg_const_params + GET_OFF(work_amount)]); + } else { + auto init_ptrs_with_offsets = [this, offset_count](Reg64 pointer, const std::vector& offsets) { + for (int j = 0; j < offset_count; j++) { + if (jep_.dims[j] != 1 && offsets[j] != 0) { + mov(reg_tmp_64, offsets[j]); + imul(reg_tmp_64, ptr[reg_indexes + j * sizeof(size_t)]); + add(pointer, reg_tmp_64); + } + } + }; + + for (size_t i = 0; i < jep.inputs_number; i++) { + mov(get_src_reg(i), ptr[reg_const_params + GET_OFF(src_ptr[0]) + i * sizeof(size_t)]); + init_ptrs_with_offsets(get_src_reg(i), jep.src_offsets[i]); + } + + mov(reg_dst, ptr[reg_const_params + GET_OFF(dst_ptr)]); + init_ptrs_with_offsets(reg_dst, jep.dst_offsets); + + xor_(reg_oc_off, reg_oc_off); + init_ptrs_with_offsets(reg_oc_off, jep.oc_offsets); + + mov(reg_work_amount, jep.work_amount); + } + + mov(reg_post_op_ptrs, ptr[reg_const_params + GET_OFF(post_op_data)]); + + Xbyak::Label unroll_loop_label; + Xbyak::Label unroll_loop_end_label; + Xbyak::Label main_loop_label; + Xbyak::Label main_loop_end_label; + Xbyak::Label tail_loop_label; + Xbyak::Label tail_loop_end_label; + + if (isa == x64::avx512_core) { + vpxord(vmm_zero, vmm_zero, vmm_zero); + } + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] == 1) { + load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, true); + } + } + + size_t min_src_size = jep.dst_size; + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + min_src_size = std::min(min_src_size, jep.src_size[i]); + } + } + if (jep_.oc_size > 1) { + min_src_size = std::min(min_src_size, jep_.oc_size); + } + + if (min_src_size != jep.dst_size) { + bool is_valid_configuration = true; + if (jep.dst_size % min_src_size != 0) { + is_valid_configuration = false; + } + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1 && jep.src_size[i] != min_src_size && jep.src_size[i] != jep.dst_size) { + is_valid_configuration = false; + } + } + + if (jep_.oc_size > 1 && jep_.oc_size != min_src_size && jep_.oc_size != jep.dst_size) { + is_valid_configuration = false; + } + + if (!is_valid_configuration) { + OPENVINO_THROW("Eltwise jitter has invalid configuration for Eltwise node"); + } + + L(unroll_loop_label); + { + size_t loop_step = min_src_size; + size_t vec_step = cpu_isa_traits::vlen / exec_prc.size(); + + cmp(reg_work_amount, loop_step); + jl(unroll_loop_end_label, T_NEAR); + + for (size_t j = 0; j < min_src_size / vec_step; j++) { + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + load_vector(get_vmm_reg(i), + ptr[get_src_reg(i) + j * vec_step * jep.src_prc[i].size()], + jep.src_prc[i], + exec_prc, + false); + } + } + + compute_eltwise_op(); + + apply_post_ops(false, jep_.oc_size > 1 ? j * vec_step * sizeof(float) : 0); + + store_vector(ptr[reg_dst + j * vec_step * jep.dst_prc.size()], vmm_dst, exec_prc, jep.dst_prc); + } + + size_t tail_start = min_src_size - min_src_size % vec_step; + for (size_t j = tail_start; j < min_src_size; j++) { + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + load_scalar(get_xmm_reg(i), + ptr[get_src_reg(i) + j * jep.src_prc[i].size()], + jep.src_prc[i], + exec_prc); + } + } + + compute_eltwise_op(); + + apply_post_ops(true, jep_.oc_size > 1 ? j * sizeof(float) : 0); + + store_scalar(ptr[reg_dst + j * jep.dst_prc.size()], + xmm_dst, + exec_prc, + jep.dst_prc, + jep.do_output_saturation); + } + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] == jep.dst_size) { + add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } + + add(reg_dst, jep.dst_prc.size() * loop_step); + sub(reg_work_amount, loop_step); + if (jep_.oc_size > 1 && jep_.oc_size != min_src_size) { + add(reg_oc_off, loop_step * sizeof(float)); + } + + jmp(unroll_loop_label, T_NEAR); + } + + L(unroll_loop_end_label); + } + + if (min_src_size == jep.dst_size) { + L(main_loop_label); + { + size_t loop_step = cpu_isa_traits::vlen / exec_prc.size(); + + cmp(reg_work_amount, loop_step); + jl(main_loop_end_label, T_NEAR); + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + load_vector(get_vmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc, false); + } + } + + compute_eltwise_op(); + + apply_post_ops(false); + + store_vector(ptr[reg_dst], vmm_dst, exec_prc, jep.dst_prc); + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } + + add(reg_dst, jep.dst_prc.size() * loop_step); + sub(reg_work_amount, loop_step); + if (jep_.oc_size > 1) { + add(reg_oc_off, loop_step * sizeof(float)); + } + + jmp(main_loop_label, T_NEAR); + } + + L(main_loop_end_label); + } + + L(tail_loop_label); + { + size_t loop_step = 1; + + cmp(reg_work_amount, loop_step); + jl(tail_loop_end_label, T_NEAR); + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + load_scalar(get_xmm_reg(i), ptr[get_src_reg(i)], jep.src_prc[i], exec_prc); + } + } + + compute_eltwise_op(); + + apply_post_ops(true); + + store_scalar(ptr[reg_dst], xmm_dst, exec_prc, jep.dst_prc, jep.do_output_saturation); + + for (size_t i = 0; i < jep.inputs_number; i++) { + if (jep.src_size[i] != 1) { + add(get_src_reg(i), jep.src_prc[i].size() * loop_step); + } + } + + add(reg_dst, jep.dst_prc.size() * loop_step); + sub(reg_work_amount, loop_step); + if (jep_.oc_size > 1) { + add(reg_oc_off, loop_step * sizeof(float)); + } + + jmp(tail_loop_label, T_NEAR); + } + + L(tail_loop_end_label); + + this->postamble(); + + if (uni_vcvtneps2bf16) { + uni_vcvtneps2bf16->emit_data(); + } + + eltwise_emitter->emit_data(); + for (size_t i = 0; i < post_op_emitters.size(); i++) { + post_op_emitters[i]->emit_data(); + } +} + +namespace { +struct EltwiseEmitterContext { + std::shared_ptr emitter; + jit_generator* host; + cpu_isa_t host_isa; + const EltwiseData& opData; + ov::element::Type exec_prc; +}; + +template +struct EltwiseEmitter { + void operator()(EltwiseEmitterContext& ctx) { + ctx.emitter = std::make_shared(ctx.host, ctx.host_isa, ctx.exec_prc); + } +}; + +template <> +struct EltwiseEmitter { + void operator()(EltwiseEmitterContext& ctx) { + auto algKind = static_cast(ctx.opData.onednnAlgorithm); + ctx.emitter = std::make_shared(ctx.host, + ctx.host_isa, + algKind, + ctx.opData.alpha, + ctx.opData.beta, + ctx.exec_prc); + } +}; + +template <> +struct EltwiseEmitter { + void operator()(EltwiseEmitterContext& ctx) { + ctx.emitter = std::make_shared(ctx.host, + ctx.host_isa, + ctx.opData.alpha, + ctx.opData.beta, + ctx.opData.gamma, + ctx.exec_prc); + } +}; + +template <> +struct EltwiseEmitter { + void operator()(EltwiseEmitterContext& ctx) { + ctx.emitter = std::make_shared(ctx.host, + ctx.host_isa, + ctx.exec_prc, + ctx.opData.alpha, + ctx.opData.beta); + } +}; +} // namespace + +template +std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitter(const EltwiseData& data, + ov::element::Type exec_prec) { + EltwiseEmitterContext ctx = {nullptr, this, isa, data, exec_prec}; + + OV_SWITCH(intel_cpu, + EltwiseEmitter, + ctx, + data.algo, + OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseGeluErf, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseGeluTanh, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), + OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), + OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), + OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), + OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), + OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), + OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), + OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), + OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), + OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), + OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), + OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), + OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter), + OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), + OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter), + OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), + OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), + OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), + OV_CASE(Algorithm::EltwiseBitwiseAnd, jit_bitwise_and_emitter), + OV_CASE(Algorithm::EltwiseBitwiseNot, jit_bitwise_not_emitter), + OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), + OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); + + if (!ctx.emitter) { + OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); + } + + return ctx.emitter; +} + +template +void jit_uni_eltwise_generic::compute_eltwise_op() { + std::vector in_idxs; + std::vector aux_idxs; + for (size_t i = 0; i < eltwise_emitter->get_inputs_num(); i++) { + in_idxs.push_back(get_vmm_reg(i).getIdx()); + } + for (size_t i = 0; i < eltwise_emitter->aux_vecs_count(); i++) { + aux_idxs.push_back(get_aux_vmm(i).getIdx()); + } + + std::vector out_idxs; + out_idxs.push_back(vmm_dst.getIdx()); + + eltwise_emitter->emit_code(in_idxs, out_idxs, aux_idxs); +} + +template +void jit_uni_eltwise_generic::apply_post_ops(bool is_scalar, int offset) { + int input_idx = eltwise_emitter->get_inputs_num(); + int eltwise_post_op_idx = 0; + int quantization_post_op_idx = 0; + for (size_t i = 1; i < ops_list_.size(); i++) { + if (ops_list_[i] == ov::intel_cpu::Type::Eltwise) { + std::vector in_idxs; + std::vector aux_idxs; + in_idxs.push_back(vmm_dst.getIdx()); + for (size_t j = 1; j < post_op_emitters[eltwise_post_op_idx]->get_inputs_num(); j++) { + in_idxs.push_back(get_vmm_reg(input_idx++).getIdx()); + } + for (size_t j = 0; j < post_op_emitters[eltwise_post_op_idx]->aux_vecs_count(); j++) { + aux_idxs.push_back(get_aux_vmm(j).getIdx()); + } + + std::vector out_idxs; + out_idxs.push_back(vmm_dst.getIdx()); + + post_op_emitters[eltwise_post_op_idx]->emit_code(in_idxs, out_idxs, aux_idxs); + + eltwise_post_op_idx++; + } else if (ops_list_[i] == ov::intel_cpu::Type::FakeQuantize) { + auto& p = post_ops_.get()->entry_[quantization_post_op_idx]; + bool do_dequantization = p.quantization.alg == dnnl::impl::alg_kind::quantization_quantize_dequantize; + bool do_rounding = do_dequantization || jep_.dst_prc == ov::element::f32 || i != ops_list_.size() - 1; + int s_idx = vmm_dst.getIdx(); + + size_t ptrs_table_off = + quantization_post_op_idx * quantization_injectors[quantization_post_op_idx]->memoryStep(); + + quantization_injectors[quantization_post_op_idx]->init_crop_ptrs(reg_post_op_ptrs + ptrs_table_off, + reg_oc_off); + quantization_injectors[quantization_post_op_idx]->compute_crop(s_idx, + s_idx + 1, + offset, + is_scalar, + jep_.oc_size == 1); + + quantization_injectors[quantization_post_op_idx]->init_input_scale_shift_ptrs( + reg_post_op_ptrs + ptrs_table_off, + reg_oc_off); + quantization_injectors[quantization_post_op_idx] + ->compute_input_scale_shift(s_idx, s_idx + 1, offset, do_rounding, is_scalar, jep_.oc_size == 1); + + quantization_injectors[quantization_post_op_idx]->init_output_scale_shift_ptrs( + reg_post_op_ptrs + ptrs_table_off, + reg_oc_off); + quantization_injectors[quantization_post_op_idx]->compute_output_scale_shift(s_idx, + s_idx + 1, + offset, + is_scalar, + jep_.oc_size == 1); + + quantization_post_op_idx++; + } else { + OPENVINO_THROW("Unexpected: Eltwise jit kernel: unexpected operation type"); + } + } +} + +template +void jit_uni_eltwise_generic::load_vector(Vmm vmm_src, + const Xbyak::Address& op, + ov::element::Type src_prc, + ov::element::Type dst_prc, + bool broadcast) { + Xmm xmm_src = Xmm(vmm_src.getIdx()); + + if (src_prc == dst_prc) { + if (broadcast) { + load_scalar(xmm_src, op, src_prc, dst_prc); + uni_vbroadcastss(vmm_src, xmm_src); + } else { + uni_vmovups(vmm_src, op); + } + return; + } + + if (broadcast) { + load_scalar(xmm_src, op, src_prc, dst_prc); + uni_vbroadcastss(vmm_src, xmm_src); + } else { + switch (src_prc) { + case ov::element::f32: + case ov::element::i32: + uni_vmovups(vmm_src, op); + break; + case ov::element::bf16: + vpmovzxwd(vmm_src, op); + uni_vpslld(vmm_src, vmm_src, 16); + break; + case ov::element::f16: + vcvtph2ps(vmm_src, op); + break; + case ov::element::u16: + uni_vpmovzxwd(vmm_src, op); + break; + case ov::element::i16: + uni_vpmovsxwd(vmm_src, op); + break; + case ov::element::i8: + uni_vpmovsxbd(vmm_src, op); + break; + case ov::element::u8: + uni_vpmovzxbd(vmm_src, op); + break; + default: + OPENVINO_THROW("unknown src_prc"); + } + + switch (dst_prc) { + case ov::element::f32: + if (!src_prc.is_real()) { + uni_vcvtdq2ps(vmm_src, vmm_src); + } + break; + case ov::element::i32: + if (src_prc.is_real()) { + uni_vcvtps2dq(vmm_src, vmm_src); + } + break; + default: + OPENVINO_THROW("unknown dst_prc"); + } + } +} + +template +void jit_uni_eltwise_generic::load_scalar(Xmm xmm_src, + const Xbyak::Address& op, + ov::element::Type src_prc, + ov::element::Type dst_prc) { + if (src_prc == dst_prc) { + switch (src_prc.size()) { + case 4: + uni_vmovss(xmm_src, op); + break; + case 1: + mov(reg_tmp_8, op); + movzx(reg_tmp_32, reg_tmp_8); + uni_vmovd(xmm_src, reg_tmp_32); + break; + default: + OPENVINO_THROW("unknown prc"); + } + return; + } + + switch (src_prc) { + case ov::element::f32: + case ov::element::i32: + uni_vmovss(xmm_src, op); + break; + case ov::element::bf16: + if (isa == x64::avx2_vnni_2) { + vbcstnebf162ps(xmm_src, op); + } else { + uni_vpinsrw(xmm_src, xmm_src, op, 0); + uni_vpslld(xmm_src, xmm_src, 16); + } + break; + case ov::element::f16: + if (isa == x64::avx2_vnni_2) { + vbcstnesh2ps(xmm_src, op); + } else { + vcvtph2ps(xmm_src, op); + } + break; + case ov::element::i16: + uni_vpinsrw(xmm_src, xmm_src, op, 0); + uni_vpmovsxwd(xmm_src, op); + break; + case ov::element::u16: + uni_vpinsrw(xmm_src, xmm_src, op, 0); + uni_vpmovzxwd(xmm_src, op); + break; + case ov::element::i8: + movsx(reg_tmp_32, op); + uni_vmovq(xmm_src, reg_tmp_64); + break; + case ov::element::u8: + movzx(reg_tmp_32, op); + uni_vmovq(xmm_src, reg_tmp_64); + break; + default: + OPENVINO_THROW("unknown src_prc"); + } + + switch (dst_prc) { + case ov::element::f32: + if (!src_prc.is_real()) { + uni_vcvtdq2ps(xmm_src, xmm_src); + } + break; + case ov::element::i32: + if (src_prc.is_real()) { + uni_vcvtps2dq(xmm_src, xmm_src); + } + break; + default: + OPENVINO_THROW("unknown dst_prc"); + } +} + +template +void jit_uni_eltwise_generic::store_vector(const Xbyak::Address& op, + Vmm vmm_dst, + ov::element::Type src_prc, + ov::element::Type dst_prc) { + Xmm xmm_dst = Xmm(vmm_dst.getIdx()); + Ymm ymm_dst = Ymm(vmm_dst.getIdx()); + + if (src_prc == dst_prc) { + uni_vmovups(op, vmm_dst); + return; + } + + switch (src_prc) { + case ov::element::f32: + if (!dst_prc.is_real()) { + uni_vcvtps2dq(vmm_dst, vmm_dst); + } + break; + case ov::element::i32: + if (dst_prc.is_real()) { + uni_vcvtdq2ps(vmm_dst, vmm_dst); + } + break; + default: + OPENVINO_THROW("unknown src_prc"); + } + + switch (dst_prc) { + case ov::element::f32: + case ov::element::i32: + uni_vmovups(op, vmm_dst); + break; + case ov::element::bf16: + if (isa == x64::avx512_core) { + uni_vcvtneps2bf16->emit_code({static_cast(vmm_dst.getIdx())}, + {static_cast(ymm_dst.getIdx())}); + vmovdqu16(op, ymm_dst); + } else { + uni_vcvtneps2bf16->emit_code({static_cast(vmm_dst.getIdx())}, + {static_cast(xmm_dst.getIdx())}); + uni_vmovdqu(op, xmm_dst); + } + break; + case ov::element::f16: + vcvtps2ph(op, vmm_dst, 0x4); + break; + case ov::element::i16: + if (isa == x64::avx512_core) { + vpmovsdw(op, vmm_dst); + } else { + uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + uni_vmovdqu(op, xmm_dst); + } else { + movq(op, xmm_dst); + } + } + break; + case ov::element::u16: + if (isa == x64::avx512_core) { + vpmaxsd(vmm_dst, vmm_zero, vmm_dst); + vpmovusdw(op, vmm_dst); + } else { + uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + uni_vmovdqu(op, xmm_dst); + } else { + movq(op, xmm_dst); + } + } + break; + case ov::element::i8: + if (isa == x64::avx512_core) { + vpmovsdb(op, vmm_dst); + } else { + uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + } + uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vmovq(op, xmm_dst); + } else { + movd(op, xmm_dst); + } + } + break; + case ov::element::u8: + if (isa == x64::avx512_core) { + vpmaxsd(vmm_dst, vmm_zero, vmm_dst); + vpmovusdb(op, vmm_dst); + } else { + uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vpermq(ymm_dst, ymm_dst, 0x08); + } + uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst); + if (isa != x64::sse41) { + vmovq(op, xmm_dst); + } else { + movd(op, xmm_dst); + } + } + break; + default: + OPENVINO_THROW("unknown dst_prc"); + } +} + +template +void jit_uni_eltwise_generic::store_scalar(const Xbyak::Address& op, + Xmm xmm_dst, + ov::element::Type src_prc, + ov::element::Type dst_prc, + const bool do_output_saturation) { + if (src_prc == dst_prc) { + switch (src_prc.size()) { + case 4: + uni_vmovss(op, xmm_dst); + break; + case 1: + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_8); + break; + default: + OPENVINO_THROW("unknown prc"); + } + return; + } + + switch (src_prc) { + case ov::element::f32: + if (!dst_prc.is_real()) { + uni_vcvtps2dq(xmm_dst, xmm_dst); + } + break; + case ov::element::i32: + if (dst_prc.is_real()) { + uni_vcvtdq2ps(xmm_dst, xmm_dst); + } + break; + default: + OPENVINO_THROW("unknown src_prc"); + } + + switch (dst_prc) { + case ov::element::f32: + case ov::element::i32: + uni_vmovss(op, xmm_dst); + break; + case ov::element::bf16: + if (do_output_saturation) { + uni_vpsrld(xmm_dst, xmm_dst, 16); + } else { + uni_vcvtneps2bf16->emit_code({static_cast(xmm_dst.getIdx())}, + {static_cast(xmm_dst.getIdx())}); + } + uni_vpextrw(op, xmm_dst, 0x0); + break; + case ov::element::f16: + vcvtps2ph(xmm_dst, xmm_dst, 0x4); + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_16); + break; + case ov::element::i16: + uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst); + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_16); + break; + case ov::element::u16: + uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst); + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_16); + break; + case ov::element::i8: + uni_vpackssdw(xmm_dst, xmm_dst, xmm_dst); + uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst); + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_8); + break; + case ov::element::u8: + uni_vpackusdw(xmm_dst, xmm_dst, xmm_dst); + uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst); + movq(reg_tmp_64, xmm_dst); + mov(op, reg_tmp_8); + break; + default: + OPENVINO_THROW("unknown dst_prc"); + } +} + +template struct jit_uni_eltwise_generic; +template struct jit_uni_eltwise_generic; +template struct jit_uni_eltwise_generic; + +} // namespace x64 + +namespace { +template +struct SupportedPrecisions { + void operator()(std::set>& precisions) { + precisions = T::get_supported_precisions(); + } +}; +} // namespace + +std::set> eltwise_precision_helper::get_supported_precisions(const Algorithm& algo) { + std::set> precisions; + + OV_SWITCH(intel_cpu, + SupportedPrecisions, + precisions, + algo, + OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseGeluErf, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseGeluTanh, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseElu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseTanh, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSigmoid, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseMish, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseHsigmoid, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_dnnl_aux_emitter), + OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), + OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), + OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), + OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), + OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), + OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), + OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), + OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), + OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), + OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), + OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), + OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), + OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter), + OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), + OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter), + OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), + OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), + OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), + OV_CASE(Algorithm::EltwiseBitwiseAnd, jit_bitwise_and_emitter), + OV_CASE(Algorithm::EltwiseBitwiseNot, jit_bitwise_not_emitter), + OV_CASE(Algorithm::EltwiseBitwiseOr, jit_bitwise_or_emitter), + OV_CASE(Algorithm::EltwiseBitwiseXor, jit_bitwise_xor_emitter)); + + if (precisions.empty()) { + OPENVINO_THROW("Unsupported operation type for Eltwise emitter"); + } + + return precisions; +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.hpp new file mode 100644 index 00000000000000..e7cb6066815d9a --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_uni_eltwise_generic.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "cpu/x64/cpu_isa_traits.hpp" +#include "cpu/x64/injectors/jit_uni_quantization_injector.hpp" +#include "cpu/x64/jit_generator.hpp" +#include "emitters/plugin/x64/jit_bf16_emitters.hpp" +#include "emitters/plugin/x64/jit_emitter.hpp" +#include "nodes/executors/eltwise.hpp" +#include "nodes/kernels/jit_eltwise_common.hpp" +#include "onednn/dnnl.h" +#include "utils/cpu_utils.hpp" +#include "utils/general_utils.h" + +namespace ov { +namespace intel_cpu { +namespace x64 { + +template +struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public dnnl::impl::cpu::x64::jit_generator { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic) + + jit_uni_eltwise_generic(const jit_eltwise_params& jep, + const std::vector& eltwise_data, + const std::vector& ops_list, + const dnnl::post_ops& post_ops); + + void create_ker() override { + jit_generator::create_kernel(); + ker_ = (decltype(ker_))jit_ker(); + } + + void generate() override; + +private: + using Vmm = typename dnnl::impl::utils::conditional3::type; + + inline Xbyak::Reg64 get_src_reg(int idx) { + return Xbyak::Reg64(r8.getIdx() + idx); + } + + inline Vmm get_vmm_reg(int idx) { + return Vmm(1 + idx); + } + + inline Vmm get_aux_vmm(int idx) { + return Vmm(10 + idx); + } + + inline Xbyak::Xmm get_xmm_reg(int idx) { + return Xbyak::Xmm(get_vmm_reg(idx).getIdx()); + } + + Xbyak::Reg64 reg_post_op_ptrs = rax; + Xbyak::Reg64 start_to_offsets = reg_post_op_ptrs; // rax + Xbyak::Reg64 reg_dst = rbx; + Xbyak::Reg64 reg_work_amount = rdx; + + static constexpr auto abi_param_regs = dnnl::impl::cpu::x64::abi_param_regs; + static constexpr auto abi_not_param_reg = dnnl::impl::cpu::x64::abi_not_param_reg; + Xbyak::Reg64 reg_oc_off = abi_not_param1; + Xbyak::Reg64 reg_const_params = abi_param1; + Xbyak::Reg64 reg_indexes = abi_param2; // reg_d_bias + + Xbyak::Reg8 reg_tmp_8 = Xbyak::Reg8(r15.getIdx()); + Xbyak::Reg16 reg_tmp_16 = Xbyak::Reg16(r15.getIdx()); + Xbyak::Reg32 reg_tmp_32 = Xbyak::Reg32(r15.getIdx()); + Xbyak::Reg64 reg_tmp_64 = Xbyak::Reg64(r15.getIdx()); + + Xbyak::Reg64 reg_d_weights = rbp; + Xbyak::Reg64 reg_d_bias = rsi; + + Vmm vmm_dst = Vmm(9); + Xbyak::Xmm xmm_dst = Xbyak::Xmm(9); + + Vmm vmm_d_weights = Vmm(12); + Vmm vmm_d_bias = Vmm(13); + Vmm vmm_zero = Vmm(15); + + std::shared_ptr uni_vcvtneps2bf16; + + std::shared_ptr eltwise_emitter = nullptr; + std::vector> post_op_emitters = {}; + + std::vector>> quantization_injectors = + {}; + + const std::vector& eltwise_data_; + const std::vector& ops_list_; + const dnnl::post_ops& post_ops_; + + std::shared_ptr create_eltwise_emitter(const EltwiseData& data, ov::element::Type exec_prec); + + void compute_eltwise_op(); + void apply_post_ops(bool is_scalar, int offset = 0); + + void load_vector(Vmm vmm_src, + const Xbyak::Address& op, + ov::element::Type src_prc, + ov::element::Type dst_prc, + bool broadcast); + void load_scalar(Xbyak::Xmm xmm_src, + const Xbyak::Address& op, + ov::element::Type src_prc, + ov::element::Type dst_prc); + + void store_vector(const Xbyak::Address& op, Vmm vmm_dst, ov::element::Type src_prc, ov::element::Type dst_prc); + void store_scalar(const Xbyak::Address& op, + Xbyak::Xmm xmm_dst, + ov::element::Type src_prc, + ov::element::Type dst_prc, + const bool do_output_saturation); +}; + +} // namespace x64 +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 28ca59bc6bc9e2..bff8d1193a15fc 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -158,6 +158,15 @@ bool Pooling::isSupportedOperation(const std::shared_ptr& op, st errorMessage = "Supported ops are MaxPool-1, MaxPool-8, MaxPool-14, AvgPool-1 and AvgPool-14"; return false; } +#if defined(OV_CPU_WITH_ACL) + if (ov::as_type_ptr(op) || + ov::as_type_ptr(op)) { + if (ov::as_type_ptr(op)->get_kernel() != ov::Shape(2,2)) { + errorMessage = "Pooling indices returning source tensor coordinates is only supported for pool size 2x2"; + return false; + } + } +#endif } catch (...) { return false; } diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index e1386ccf79b326..cb683e27490e24 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -1120,17 +1120,18 @@ void Transformations::MainSnippets(void) { ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n)); + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n) || ov::is_type(n) || + ov::is_type(n)); #else // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant, // and CPU Plugin does not support Mish for x64 diff --git a/src/plugins/intel_cpu/src/utils/verbose.cpp b/src/plugins/intel_cpu/src/utils/verbose.cpp index 53779ce516e580..dcc9e0b60b4e37 100644 --- a/src/plugins/intel_cpu/src/utils/verbose.cpp +++ b/src/plugins/intel_cpu/src/utils/verbose.cpp @@ -105,36 +105,48 @@ void Verbose::printInfo() { written_total += size; }; - auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) { + auto formatMemDesc = [&](const MemoryDescPtr& desc, std::string& prefix) { prefix = colorize(BLUE, prefix); written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " "); shift(written); written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str()); shift(written); - std::string fmt_str = dnnl::impl::md2fmt_str("", desc, dnnl::impl::format_kind_t::dnnl_format_kind_undef); + std::string fmt_str = {}; + std::string dim_str = {}; + if (DnnlExtensionUtils::ElementTypeToDataType(desc->getPrecision(), DnnlExtensionUtils::nothrow_tag{})) { + if (auto dnnl_desc = MemoryDescUtils::convertToDnnlMemoryDesc(desc)->getDnnlDesc()) { + fmt_str = dnnl::impl::md2fmt_str("", dnnl_desc.get(), dnnl::impl::format_kind_t::dnnl_format_kind_undef); + std::string dim_str = dnnl::impl::md2dim_str(dnnl_desc.get()); + } else { + fmt_str = "empty"; + } + } else { + fmt_str = desc->getPrecision().to_string(); + if (const auto& dims = desc->getShape().getDims(); !dims.empty()) { + dim_str = dim2str(dims.front()); + std::for_each(++(dims.begin()), dims.end(), [&dim_str](size_t dim) { + dim_str.append("x" + std::to_string(dim)); + }); + } + } written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", fmt_str.c_str()); shift(written); written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":"); shift(written); - std::string dim_str = dnnl::impl::md2dim_str(desc); written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", dim_str.c_str()); shift(written); }; for (size_t i = 0; i < node->getParentEdges().size(); i++) { std::string prefix("src:" + std::to_string(i) + ':'); - formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(node->getParentEdgeAt(i)->getMemory().getDesc().clone()) - ->getDnnlDesc() - .get(), - prefix); + const auto& desc = node->getParentEdgeAt(i)->getMemory().getDescPtr(); + formatMemDesc(desc, prefix); } for (size_t i = 0; i < node->getChildEdges().size(); i++) { std::string prefix("dst:" + std::to_string(i) + ':'); - formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(node->getChildEdgeAt(i)->getMemory().getDesc().clone()) - ->getDnnlDesc() - .get(), - prefix); + const auto& desc = node->getChildEdgeAt(i)->getMemory().getDescPtr(); + formatMemDesc(desc, prefix); } std::string post_ops; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/pooling.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/pooling.cpp new file mode 100644 index 00000000000000..1e760d74e8a8a4 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/pooling.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "custom/single_layer_tests/classes/pooling.hpp" +#include "utils/cpu_test_utils.hpp" +#include "utils/fusing_test_utils.hpp" +#include "utils/filter_cpu_info.hpp" + +using namespace CPUTestUtils; + +namespace ov { +namespace test { +namespace Pooling { + +const std::vector& paramsMaxV144D_2x2kernel = { + maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {1, 1}, {0, 0}, {0, 0}, + ov::element::Type_t::i32, 0, + ov::op::RoundingType::CEIL_TORCH, ov::op::PadType::SAME_UPPER }, + maxPoolV8SpecificParams{ {2, 2}, {2, 2}, {1, 1}, {0, 0}, {0, 0}, + ov::element::Type_t::i32, 0, + ov::op::RoundingType::CEIL_TORCH, ov::op::PadType::SAME_LOWER } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV14_CPU_4D_2x2Kernel, MaxPoolingV14LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV144D_2x2kernel), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::ValuesIn(filterCPUInfo(vecCpuConfigsFusing_4D())), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MaxPoolingV14LayerCPUTest::getTestCaseName); + +const std::vector& paramsMaxV144D_non2x2kernel = { + maxPoolV8SpecificParams{ {11, 7}, {2, 2}, {1, 1}, {2, 2}, {2, 2}, + ov::element::Type_t::i32, 0, + ov::op::RoundingType::CEIL_TORCH, ov::op::PadType::EXPLICIT}, +}; + +//The test checks that fallback to nGraph works for ACL non-supported cases +INSTANTIATE_TEST_SUITE_P(smoke_MaxPoolV14_CPU_4D_non2x2Kernel_ref, MaxPoolingV14LayerCPUTest, + ::testing::Combine( + ::testing::ValuesIn(paramsMaxV144D_non2x2kernel), + ::testing::ValuesIn(inputShapes4D()), + ::testing::ValuesIn((inpOutPrecision())), + ::testing::Values(CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MaxPoolingV14LayerCPUTest::getTestCaseName); + +} // namespace Pooling +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 11c40d5067cedf..fe027ba8bbc664 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -460,7 +460,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_Snippets.*\[.*\?.*\].*)"); retVector.emplace_back(R"(smoke_Snippets_Eltwise.*\[1.1..10.1..8.1..4\].*)"); // smoke_Snippets test cases are not supported on arm64 platforms, except for smoke_Snippets_Eltwise - retVector.emplace_back(R"(smoke_Snippets(?!_Eltwise|_Convert).*)"); + retVector.emplace_back(R"(smoke_Snippets(?!_Eltwise|_Convert|_FQDecomposition_).*)"); // arm snippets doesn't support sve_128 that required by dnnl injector jit_uni_eltwise_injector_f32 yet retVector.emplace_back(R"(smoke_Snippets_Eltwise_TwoResults.*)"); retVector.emplace_back(R"(smoke_Snippets_Eltwise/TwoInputsAndOutputs.*)"); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp index 4e2cba4cebc6b8..8c3dbc0f7ed108 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/fake_quantize_decomposition_test.cpp @@ -73,8 +73,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::ValuesIn(testValuesDecompositionScalars), ::testing::ValuesIn(operations), - // reorder (nChw[16|8]c) + MaxPool + Subgraph + reorder(nchw) - ::testing::Values(std::pair{4, 1}), + ::testing::Values(std::pair{1, 1}), ::testing::Values(ov::test::utils::DEVICE_CPU)), FakeQuantizeDecompositionTest::getTestCaseName); @@ -84,8 +83,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::ValuesIn(testValuesDecompositionPerChannel), ::testing::ValuesIn(operations), - // reorder (nChw[16|8]c) + MaxPool + reorder(nChw[16|8]c) x6 + Subgraph + reorder(nchw) - ::testing::Values(std::pair{10, 1}), + ::testing::Values(std::pair{1, 1}), ::testing::Values(ov::test::utils::DEVICE_CPU)), FakeQuantizeDecompositionTest::getTestCaseName); @@ -95,13 +93,12 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::ValuesIn(testValuesDecompositionPerChannelInput), ::testing::ValuesIn(operations), - // reorder (nChw[16|8]c) + MaxPool + reorder(nChw[16|8]c) x4 + Subgraph + reorder(nchw) - ::testing::Values(std::pair{8, 1}), + ::testing::Values(std::pair{1, 1}), ::testing::Values(ov::test::utils::DEVICE_CPU)), FakeQuantizeDecompositionTest::getTestCaseName); } // namespace decompositionInSubgraph - +#ifdef OPENVINO_ARCH_X86_64 namespace legacyFuse { const std::vector testValuesLegacyFuse = { { @@ -144,11 +141,11 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::ValuesIn(testValuesLegacyFuse), ::testing::ValuesIn(operations), - // reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + Convolution(with internal weight reordering) + reorder(nchw) - ::testing::Values(std::pair{5, 0}), + // reorder (nChw[16|8]c) + Convolution(with internal weight reordering) + reorder(nchw) + ::testing::Values(std::pair{3, 0}), ::testing::Values(ov::test::utils::DEVICE_CPU)), FakeQuantizeDecompositionTest::getTestCaseName); - } // namespace legacyFuse +#endif } // namespace diff --git a/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp index 091f9e0d1bcc2f..60b81c38f1e87c 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/eltwise_node_test.cpp @@ -10,6 +10,11 @@ using namespace ov::intel_cpu; class EltwisePrecisionHelperTest : public testing::Test {}; +namespace EltwisePrecisionHelper { +static const std::vector exec_precisions_priority = + {ov::element::u8, ov::element::bf16, ov::element::i32, ov::element::f32}; +} // namespace EltwisePrecisionHelper + TEST(EltwisePrecisionHelperTest, get_precision_mixed) { ov::element::Type src_prc[MAX_ELTWISE_INPUTS]; const size_t inputs_size = 4ull; @@ -22,7 +27,8 @@ TEST(EltwisePrecisionHelperTest, get_precision_mixed) { {Algorithm::EltwiseMulAdd} }; - const auto precision = ov::intel_cpu::node::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data); + const auto precision = + ov::intel_cpu::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data, EltwisePrecisionHelper::exec_precisions_priority); ASSERT_EQ(ov::element::i32, precision); } @@ -38,6 +44,7 @@ TEST(EltwisePrecisionHelperTest, get_precision_single) { {Algorithm::EltwiseMod} }; - const auto precision = ov::intel_cpu::node::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data); + const auto precision = + ov::intel_cpu::eltwise_precision_helper::get_precision(inputs_size, src_prc, eltwise_data, EltwisePrecisionHelper::exec_precisions_priority); ASSERT_EQ(ov::element::f32, precision); } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index bcf468cea97d03..04d9a573ec1227 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2672,6 +2672,7 @@ bool primitive_inst::is_valid_fusion() const { const auto& outer_dep = _deps[outer_dep_idx]; const auto& outer_dep_pshape = outer_dep.first->_impl_params->get_output_layout().get_partial_shape(); + size_t outer_dep_pshape_count = outer_dep_pshape.is_static() ? ov::shape_size(outer_dep_pshape.to_shape()) : 0; auto merged_shape = out_pshape; bool can_broadcast = true; if (fd.is_type()) @@ -2679,7 +2680,8 @@ bool primitive_inst::is_valid_fusion() const { // Check if broadcast happens more than single axis. // Current gemm_tiled_opt kernel FUSED_OP_LOAD macro cannot support broadcast on dynamic dimension. - if (_node->is_type() && can_broadcast == true && merged_shape.rank().get_length() >= outer_dep_pshape.rank().get_length()) { + if (_node->is_type() && can_broadcast == true && merged_shape.rank().get_length() >= outer_dep_pshape.rank().get_length() && + outer_dep_pshape_count != 1) { uint8_t broadcast_more_than_single_axis = 0; auto updated_outer_dep_pshape = ov::PartialShape(outer_dep_pshape); @@ -2715,7 +2717,7 @@ bool primitive_inst::is_valid_fusion() const { cldnn::format::dimension(data_layout.format), false); - if (gemm_dims[0] != data_dims[0]) + if (gemm_dims[0] != data_dims[0] && outer_dep_pshape_count != 1) return false; } else if (_node->is_type() && _node->get_preferred_impl_type() == impl_types::onednn) { const auto& fc_layout = _impl_params->get_output_layout(); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 425e7fc6265ee0..579331aa149d92 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -990,7 +990,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->disable(); pass_config->disable(); pass_config->disable(); - pass_config->disable(); pass_config->set_callback( [](const std::shared_ptr &node) -> bool { @@ -1009,7 +1008,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto params = LayerTransformation::Params(false, infer_precision, {infer_precision}, true, true); auto lpt_pass = manager.register_pass(supportedPrecisions, perTensorQuantization, params); lpt_pass->add_main(); - lpt_pass->add_main(); lpt_pass->add_main(); // Move up remained scalar-multiply layers diff --git a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp index 1fa303656f80a5..6cec2bf8666633 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/gemm_fusion_test.cpp @@ -432,6 +432,40 @@ TEST_P(gemm_2in_add, eltwise_postop_scalar) { execute(p, false, true); } +TEST_P(gemm_2in_add, eltwise_postop_scalar_dynamic) { + auto p = GetParam(); + + if (engine.get_device_info().supports_immad) { + ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn }; + cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } })); + cfg_fused.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); + } + + auto add_data_layout = get_output_layout(p); + auto add_data_size = add_data_layout.get_partial_shape(); + for (size_t i = 0; i < add_data_size.size(); i++) + add_data_size[i] = 1; + add_data_layout.set_partial_shape(add_data_size); + + auto in_layout0 = get_input_layout(p, 0); + auto in_layout1 = get_input_layout(p, 1); + + in_layout0.set_partial_shape(ov::PartialShape::dynamic(p.in_shapes[0].size())); + in_layout1.set_partial_shape(ov::PartialShape::dynamic(p.in_shapes[1].size())); + + create_topologies( + input_layout("input0", in_layout0), + input_layout("input1", in_layout1), + data("add_data", get_mem(add_data_layout, 0.5f)), + gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()), + eltwise("add_prim", { input_info("gemm_prim"), input_info("add_data") }, p.eltwise_m, p.default_type), + reorder("reorder_bfyx", input_info("add_prim"), p.default_format, data_types::f32) + ); + + tolerance = default_tolerance(p.default_type); + execute(p, true, true); +} + INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_add, ::testing::ValuesIn(std::vector{ // gemm_test_params{ CASE_GEMM_2IN_FP16_3, 3, 4, "", broadcast_kinds::none, eltwise_mode::sum }, // TODO: check why failed in eltwise_postop_dynamic gemm_test_params{ CASE_GEMM_2IN_FP16_4, 3, 4, "", broadcast_kinds::none, eltwise_mode::sum }, diff --git a/src/tests/functional/plugin/shared/src/snippets/fake_quantize_decomposition_test.cpp b/src/tests/functional/plugin/shared/src/snippets/fake_quantize_decomposition_test.cpp index 6d738751767405..43668ee49cdae5 100644 --- a/src/tests/functional/plugin/shared/src/snippets/fake_quantize_decomposition_test.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/fake_quantize_decomposition_test.cpp @@ -61,7 +61,7 @@ void FakeQuantizeDecompositionTest::SetUp() { values.inputType, values.fakeQuantizeShapes, values.zeroPoint, - ov::test::snippets::FunctionHelper::makePrerequisitesOriginal(), + {}, op); }