diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
index 999be845caac31..89cae91851104b 100644
--- a/.github/workflows/windows_vs2019_release.yml
+++ b/.github/workflows/windows_vs2019_release.yml
@@ -45,7 +45,7 @@ jobs:
           repo_token: ${{ secrets.GITHUB_TOKEN }}
           skip_when_only_listed_labels_set: 'docs'
           skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*'
-          
+
       - name: Get target branch
         id: set_target_branch
         run: |
@@ -192,7 +192,7 @@ jobs:
           sparse-checkout: |
             src/bindings/js
           path: 'openvino'
-        
+
       - name: Download OpenVINO artifacts (JS)
         uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
@@ -223,7 +223,7 @@ jobs:
         run: call npm test
 
       - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
+        uses: microsoft/setup-msbuild@6fb02220983dee41ce7ae257b6f4d8f9bf5ed4ce # v2
 
       - name: E2E of openvino-node package
         working-directory: ${{ env.OPENVINO_JS_DIR }}/node
diff --git a/.github/workflows/workflows_scans.yml b/.github/workflows/workflows_scans.yml
index 1a3d091544e784..ace73b18751606 100644
--- a/.github/workflows/workflows_scans.yml
+++ b/.github/workflows/workflows_scans.yml
@@ -18,6 +18,37 @@ concurrency:
 permissions: read-all
 
 jobs:
+  codeql:
+    name: github_actions_workflows_scan/codeql
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners
+    # Consider using larger runners for possible analysis time improvements.
+    runs-on: ubuntu-22.04
+    timeout-minutes: 60
+    permissions:
+      security-events: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        timeout-minutes: 15
+        with:
+          submodules: 'false'
+          sparse-checkout: .github/workflows
+
+      # Initializes the CodeQL tools for scanning.
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9
+        with:
+          languages: "actions"
+          build-mode: "none"
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9
+        with:
+          category: "/language:actions"
+
   semgrep:
     name: github_actions_workflows_scan/semgrep
     runs-on: ubuntu-latest
diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst
index f898ddaf42ba03..70af3ce17566e6 100644
--- a/docs/articles_en/about-openvino/release-notes-openvino.rst
+++ b/docs/articles_en/about-openvino/release-notes-openvino.rst
@@ -105,7 +105,7 @@ Deprecation And Support
 Using deprecated features and components is not advised. They are available to enable a smooth
 transition to new solutions and will be discontinued in the future. To keep using discontinued
 features, you will have to revert to the last LTS OpenVINO version supporting them.
-For more details, refer to the `OpenVINO Legacy Features and Components <https://docs.openvino.ai/2024/documentation/legacy-features.html>__`
+For more details, refer to the `OpenVINO Legacy Features and Components <https://docs.openvino.ai/2025/documentation/legacy-features.html>__`
 page.
 
 
diff --git a/docs/articles_en/get-started/learn-openvino/openvino-samples/get-started-demos.rst b/docs/articles_en/get-started/learn-openvino/openvino-samples/get-started-demos.rst
index f61ccf5cacd2f3..32dde2fd2a324b 100644
--- a/docs/articles_en/get-started/learn-openvino/openvino-samples/get-started-demos.rst
+++ b/docs/articles_en/get-started/learn-openvino/openvino-samples/get-started-demos.rst
@@ -262,7 +262,7 @@ You need a model that is specific for your inference task. You can get it from o
 Convert the Model
 --------------------
 
-If Your model requires conversion, check the `article <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/get-started-demos.html>`__ for information how to do it.
+If Your model requires conversion, check the :doc:`article <../../../openvino-workflow/model-preparation>` for information how to do it.
 
 .. _download-media:
 
diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst
index 232e0f2c2a66b9..80c98e1b857522 100644
--- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst
@@ -5,8 +5,9 @@ LLM Weight Compression
    :maxdepth: 1
    :hidden:
 
-   weight-compression/microscaling-quantization
    weight-compression/4-bit-weight-quantization
+   weight-compression/microscaling-quantization
+   
 
 
 Weight compression enhances the efficiency of models by reducing their memory footprint,
@@ -16,14 +17,13 @@ Unlike full model quantization, where both weights and activations are quantized
 only targets weights, keeping activations as floating-point numbers. This means preserving most
 of the model's accuracy while improving its
 speed and reducing its size. The reduction in size is especially noticeable with larger models.
-For instance the 7 billion parameter Llama 2 model can be reduced
-from about 25GB to 4GB using 4-bit weight compression.
+For instance the 8 billion parameter Llama 3 model can be reduced
+from about 16.1 GB to 4.8 GB using 4-bit weight quantization on top of bfloat16 model.
 
 .. note::
 
-   With smaller language models (i.e. less than 1B parameters), weight
+   With smaller language models (i.e. less than 1B parameters), low-bit weight
    compression may result in more accuracy reduction than with larger models.
-   Therefore, weight compression is recommended for use with LLMs only.
 
 LLMs and other GenAI models that require
 extensive memory to store the weights during inference can benefit
@@ -36,7 +36,7 @@ from weight compression as it:
 * improves inference speed by reducing the latency of memory access when computing the
   operations with weights, for example, Linear layers. The weights are smaller and thus
   faster to load from memory;
-* unlike quantization, does not require sample data to calibrate the range of
+* unlike full static quantization, does not require sample data to calibrate the range of
   activation values.
 
 Currently, `NNCF <https://github.com/openvinotoolkit/nncf>`__
@@ -64,7 +64,7 @@ by running the following command:
    pip install optimum[openvino]
 
 **8-bit weight quantization** offers a good balance between reducing the size and lowering the
-accuracy of a model. It usually results in significant improvements for transformer-based models
+accuracy of a model. It usually results in significant improvements for Transformer-based models
 and guarantees good model performance for a vast majority of supported CPU and GPU platforms.
 By default, weights are compressed asymmetrically to "INT8_ASYM" mode.
 
@@ -223,17 +223,6 @@ depending on the model.
       For more details, refer to the article on how to
       :doc:`infer LLMs using Optimum Intel <../../../openvino-workflow-generative/inference-with-optimum-intel>`.
 
-The code snippet below shows how to do 4-bit quantization of the model weights represented
-in OpenVINO IR using NNCF:
-
-.. tab-set::
-
-   .. tab-item:: OpenVINO
-      :sync: openvino
-
-      .. doxygensnippet:: docs/optimization_guide/nncf/code/weight_compression_openvino.py
-         :language: python
-         :fragment: [compression_4bit]
 
 Refer to the article about
 :doc:`4-bit weight quantization <./weight-compression/4-bit-weight-quantization>`
diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst
index ae9bc7d7b8b4a3..3994e5550c4e2f 100644
--- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression/4-bit-weight-quantization.rst
@@ -133,7 +133,12 @@ trade-offs after optimization:
   There are three modes: INT8_ASYM, INT8_SYM, and NONE, which retains
   the original floating-point precision of the model weights (``INT8_ASYM`` is default value).
 
-|
+
+
+.. tip::
+
+   NNCF allows stacking the supported optimization methods. For example, AWQ, Scale Estimation
+   and GPTQ methods can be enabled all together to achieve better accuracy.
 
 4-bit Weight Quantization with GPTQ
 ###################################
diff --git a/docs/articles_en/openvino-workflow/model-optimization.rst b/docs/articles_en/openvino-workflow/model-optimization.rst
index f5a5f97341e960..e44cf556329bd1 100644
--- a/docs/articles_en/openvino-workflow/model-optimization.rst
+++ b/docs/articles_en/openvino-workflow/model-optimization.rst
@@ -21,24 +21,24 @@ In OpenVINO, the default optimization tool is NNCF (Neural Network Compression F
 It is a `set of compression algorithms <https://github.com/openvinotoolkit/nncf/blob/develop/README.md>`__,
 organized as a Python package, that make your models smaller and faster. Note that NNCF
 is **not part of the OpenVINO package**, so it needs to be installed separately. It supports
-models in **PyTorch**, **TensorFlow** , **ONNX**, and **OpenVINO IR** formats, offering
+models in **OpenVINO IR**, **PyTorch**, **ONNX**, and **TensorFlow** formats, offering
 the following main optimizations:
 
 .. image:: ../assets/images/WHAT_TO_USE.svg
 
 
 | :doc:`Weight Compression <model-optimization-guide/weight-compression>`:
-|      an easy-to-use method for Large Language Model footprint reduction and inference
+|      An easy-to-use method for Large Language Model footprint reduction and inference
        acceleration.
 
 | :doc:`Post-training Quantization <model-optimization-guide/quantizing-models-post-training>`:
-|      designed to optimize deep learning models by applying 8-bit integer quantization. Being
+|      Designed to optimize deep learning models by applying 8-bit integer quantization. Being
        the easiest way to optimize a model it does not require its retraining or fine-tuning
        but may result in a drop in accuracy. If the accuracy-performance tradeoff is not
        acceptable, Training-time Optimization may be a better option.
 
 | :doc:`Training-time Optimization <model-optimization-guide/compressing-models-during-training>`:
-|      involves a suite of advanced methods such as Structured or Unstructured Pruning, as well
+|      Involves a suite of advanced methods such as Structured or Unstructured Pruning, as well
        as Quantization-aware Training. This kind of optimization requires the use of the model's
        original framework, for NNCF, it is either PyTorch or TensorFlow.
 
@@ -54,13 +54,7 @@ Recommended workflows
   3. If the accuracy drop is unacceptable, use quantization-aware training instead. It will give
      you the same level of performance boost, with a smaller impact on accuracy.
 
-* **Weight compression** works **only with LLMs**. Do not try to use it with other models.
-* For **visual-multimodal** use cases, the encoder / decoder split approach may be recommended.
-
-
-
-
-
+* **Weight compression** works with **LLMs**, **VLMs** and other Transformer-based models.
 
 
 
diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst
index 8cf851bfcd9c16..23e93612b2cad5 100644
--- a/docs/notebooks/convert-to-openvino-with-output.rst
+++ b/docs/notebooks/convert-to-openvino-with-output.rst
@@ -54,7 +54,7 @@ OpenVINO IR format
 
 
 OpenVINO `Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 is the proprietary model format of OpenVINO. It is produced after
 converting a model with model conversion API. Model conversion API
 translates the frequently used deep learning operations to their
diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst
index b9f4024dcb8f0f..047c9d4a733552 100644
--- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst
+++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst
@@ -941,7 +941,7 @@ advance and fill it in as the inference requests are executed.
 Let’s compare the models and plot the results.
 
    **Note**: To get a more accurate benchmark, use the `Benchmark Python
-   Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
+   Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
 
 .. code:: ipython3
 
diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst
index 13da68e4db5001..265707b944aa95 100644
--- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst
+++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst
@@ -623,7 +623,7 @@ Compare Performance of the FP32 IR Model and Quantized Models
 
 To measure the inference performance of the ``FP32`` and ``INT8``
 models, we use `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
 - OpenVINO’s inference performance measurement tool. Benchmark tool is a
 command line application, part of OpenVINO development tools, that can
 be run in the notebook with ``! benchmark_app`` or
diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst
index 6215f42113cacd..ef848cc586e016 100644
--- a/docs/notebooks/ddcolor-image-colorization-with-output.rst
+++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst
@@ -499,7 +499,7 @@ Compare inference time of the FP16 and INT8 models
 
 To measure the inference performance of OpenVINO FP16 and INT8 models,
 use `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
 
    **NOTE**: For the most accurate performance estimation, it is
    recommended to run ``benchmark_app`` in a terminal/command prompt
diff --git a/docs/notebooks/depth-anything-v2-with-output.rst b/docs/notebooks/depth-anything-v2-with-output.rst
index e5106f8b8781db..f45fe775883264 100644
--- a/docs/notebooks/depth-anything-v2-with-output.rst
+++ b/docs/notebooks/depth-anything-v2-with-output.rst
@@ -977,7 +977,7 @@ Compare inference time of the FP16 and INT8 models
 
 To measure the inference performance of OpenVINO FP16 and INT8 models,
 use `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
 
    **NOTE**: For the most accurate performance estimation, it is
    recommended to run ``benchmark_app`` in a terminal/command prompt
diff --git a/docs/notebooks/depth-anything-with-output.rst b/docs/notebooks/depth-anything-with-output.rst
index 49f15f430f9746..144a54d6e6f4ac 100644
--- a/docs/notebooks/depth-anything-with-output.rst
+++ b/docs/notebooks/depth-anything-with-output.rst
@@ -940,7 +940,7 @@ Compare inference time of the FP16 and INT8 models
 
 To measure the inference performance of OpenVINO FP16 and INT8 models,
 use `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
 
    **NOTE**: For the most accurate performance estimation, it is
    recommended to run ``benchmark_app`` in a terminal/command prompt
diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst
index 0071e2dca60e74..32915c9a16dc95 100644
--- a/docs/notebooks/fast-segment-anything-with-output.rst
+++ b/docs/notebooks/fast-segment-anything-with-output.rst
@@ -100,29 +100,29 @@ Imports
 
     import ipywidgets as widgets
     from pathlib import Path
-    
+
     import openvino as ov
     import torch
     from PIL import Image
     from ultralytics import FastSAM
-    
+
     # Fetch skip_kernel_extension module
     import requests
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py",
     )
     open("skip_kernel_extension.py", "w").write(r.text)
     # Fetch `notebook_utils` module
     import requests
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
     )
-    
+
     open("notebook_utils.py", "w").write(r.text)
     from notebook_utils import download_file, device_widget
-    
+
     %load_ext skip_kernel_extension
 
 FastSAM in Ultralytics
@@ -142,7 +142,7 @@ model and generate a segmentation map.
 
     model_name = "FastSAM-x"
     model = FastSAM(model_name)
-    
+
     # Run inference on an image
     image_uri = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg"
     image_uri = download_file(image_uri)
@@ -169,7 +169,7 @@ model and generate a segmentation map.
 
 .. parsed-literal::
 
-    
+
     image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 638.3ms
     Speed: 3.4ms preprocess, 638.3ms inference, 500.4ms postprocess per image at shape (1, 3, 768, 1024)
 
@@ -210,16 +210,16 @@ tracing. The FastSAM model itself is based on YOLOv8 model.
 .. parsed-literal::
 
     Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz)
-    
+
     PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB)
-    
+
     OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4...
     OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB)
-    
+
     Export complete (9.2s)
     Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything
-    Predict:         yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024  
-    Validate:        yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml  
+    Predict:         yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024
+    Validate:        yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml
     Visualize:       https://netron.app
 
 
@@ -230,7 +230,7 @@ Embedding the converted models into the original pipeline
 
 OpenVINO™ Runtime Python API is used to compile the model in OpenVINO IR
 format. The
-`Core <https://docs.openvino.ai/2024/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__
+`Core <https://docs.openvino.ai/2025/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__
 class provides access to the OpenVINO Runtime API. The ``core`` object,
 which is an instance of the ``Core`` class represents the API and it is
 used to compile the model.
@@ -250,7 +250,7 @@ from the dropdown list:
 .. code:: ipython3
 
     device = device_widget()
-    
+
     device
 
 
@@ -288,12 +288,12 @@ object, so we need to redefine the magic ``__call__`` method.
         def __init__(self, ov_model, device="CPU", stride=32, ov_config=None) -> None:
             ov_config = ov_config or {}
             self.model = core.compile_model(ov_model, device, ov_config)
-    
+
             self.stride = stride
             self.pt = False
             self.fp16 = False
             self.names = {0: "object"}
-    
+
         def __call__(self, im, **_):
             result = self.model(im)
             return torch.from_numpy(result[0]), torch.from_numpy(result[1])
@@ -306,7 +306,7 @@ pipeline.
     ov_config = {}
     if "GPU" in device.value or ("AUTO" in device.value and "GPU" in core.available_devices):
         ov_config = {"GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"}
-    
+
     wrapped_model = OVWrapper(
         ov_model_path,
         device=device.value,
@@ -314,13 +314,13 @@ pipeline.
         ov_config=ov_config,
     )
     model.predictor.model = wrapped_model
-    
+
     ov_results = model(image_uri, device=device.value, retina_masks=True, imgsz=1024, conf=0.6, iou=0.9)
 
 
 .. parsed-literal::
 
-    
+
     image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.5ms
     Speed: 6.1ms preprocess, 498.5ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024)
 
@@ -363,7 +363,7 @@ The optimization process contains the following steps:
         description="Quantization",
         disabled=False,
     )
-    
+
     do_quantize
 
 
@@ -399,20 +399,20 @@ repo <-with-output.html>`__.
 .. code:: ipython3
 
     %%skip not $do_quantize.value
-    
+
     import pickle
     from contextlib import contextmanager
     from zipfile import ZipFile
-    
+
     import cv2
     from tqdm.autonotebook import tqdm
-    
+
     import nncf
-    
-    
+
+
     COLLECT_CALIBRATION_DATA = False
     calibration_data = []
-    
+
     @contextmanager
     def calibration_data_collection():
         global COLLECT_CALIBRATION_DATA
@@ -421,58 +421,58 @@ repo <-with-output.html>`__.
             yield
         finally:
             COLLECT_CALIBRATION_DATA = False
-    
-    
+
+
     class NNCFWrapper:
         def __init__(self, ov_model, stride=32) -> None:
             self.model = core.read_model(ov_model)
             self.compiled_model = core.compile_model(self.model, device_name="CPU")
-    
+
             self.stride = stride
             self.pt = False
             self.fp16 = False
             self.names = {0: "object"}
-    
+
         def __call__(self, im, **_):
             if COLLECT_CALIBRATION_DATA:
                 calibration_data.append(im)
-    
+
             result = self.compiled_model(im)
             return torch.from_numpy(result[0]), torch.from_numpy(result[1])
-    
+
     # Fetch data from the web and descibe a dataloader
     DATA_URL = "https://ultralytics.com/assets/coco128.zip"
     OUT_DIR = Path('.')
-    
+
     download_file(DATA_URL, directory=OUT_DIR, show_progress=True)
-    
+
     if not (OUT_DIR / "coco128/images/train2017").exists():
         with ZipFile('coco128.zip', "r") as zip_ref:
             zip_ref.extractall(OUT_DIR)
-    
+
     class COCOLoader(torch.utils.data.Dataset):
         def __init__(self, images_path):
             self.images = list(Path(images_path).iterdir())
-    
+
         def __getitem__(self, index):
             if isinstance(index, slice):
                 return [self.read_image(image_path) for image_path in self.images[index]]
             return self.read_image(self.images[index])
-    
+
         def read_image(self, image_path):
             image = cv2.imread(str(image_path))
             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
             return image
-    
+
         def __len__(self):
             return len(self.images)
-    
-    
+
+
     def collect_calibration_data_for_decoder(model, calibration_dataset_size: int,
                                              calibration_cache_path: Path):
         global calibration_data
-    
-    
+
+
         if not calibration_cache_path.exists():
             coco_dataset = COCOLoader(OUT_DIR / 'coco128/images/train2017')
             with calibration_data_collection():
@@ -484,10 +484,10 @@ repo <-with-output.html>`__.
         else:
             with open(calibration_cache_path, "rb") as f:
                 calibration_data = pickle.load(f)
-    
+
         return calibration_data
-    
-    
+
+
     def quantize(model, save_model_path: Path, calibration_cache_path: Path,
                  calibration_dataset_size: int, preset: nncf.QuantizationPreset):
         calibration_data = collect_calibration_data_for_decoder(
@@ -508,10 +508,10 @@ repo <-with-output.html>`__.
             )
         )
         ov.save_model(quantized_ov_decoder, save_model_path)
-    
+
     wrapped_model = NNCFWrapper(ov_model_path, stride=model.predictor.model.stride)
     model.predictor.model = wrapped_model
-    
+
     calibration_dataset_size = 128
     quantized_model_path = Path(f"{model_name}_quantized") / "FastSAM-x.xml"
     calibration_cache_path = Path(f"calibration_data/coco{calibration_dataset_size}.pkl")
@@ -553,7 +553,7 @@ repo <-with-output.html>`__.
     INFO:nncf:Not adding activation input quantizer for operation: 347 __module.model.22/aten::add/Add
     INFO:nncf:Not adding activation input quantizer for operation: 359 __module.model.22/aten::add/Add_1
     371 __module.model.22/aten::div/Divide
-    
+
     INFO:nncf:Not adding activation input quantizer for operation: 360 __module.model.22/aten::sub/Subtract_1
     INFO:nncf:Not adding activation input quantizer for operation: 382 __module.model.22/aten::mul/Multiply
 
@@ -595,15 +595,15 @@ calibration dataset to measure the performance.
 .. code:: ipython3
 
     %%skip not $do_quantize.value
-    
+
     import datetime
-    
+
     coco_dataset = COCOLoader(OUT_DIR / 'coco128/images/train2017')
     calibration_dataset_size = 128
-    
+
     wrapped_model = OVWrapper(ov_model_path, device=device.value, stride=model.predictor.model.stride)
     model.predictor.model = wrapped_model
-    
+
     start_time = datetime.datetime.now()
     for image in tqdm(coco_dataset, desc="Measuring inference time"):
         model(image, retina_masks=True, imgsz=1024, conf=0.6, iou=0.9, verbose=False)
@@ -627,10 +627,10 @@ calibration dataset to measure the performance.
 .. code:: ipython3
 
     %%skip not $do_quantize.value
-    
+
     quantized_wrapped_model = OVWrapper(quantized_model_path, device=device.value, stride=model.predictor.model.stride)
     model.predictor.model = quantized_wrapped_model
-    
+
     start_time = datetime.datetime.now()
     for image in tqdm(coco_dataset, desc="Measuring inference time"):
         model(image, retina_masks=True, imgsz=1024, conf=0.6, iou=0.9, verbose=False)
@@ -670,8 +670,8 @@ bounding boxes on input image.
     import cv2
     import numpy as np
     import matplotlib.pyplot as plt
-    
-    
+
+
     def fast_process(
         annotations,
         image,
@@ -684,12 +684,12 @@ bounding boxes on input image.
     ):
         original_h = image.height
         original_w = image.width
-    
+
         if better_quality:
             for i, mask in enumerate(annotations):
                 mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
                 annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
-    
+
         inner_mask = fast_show_mask(
             annotations,
             plt.gca(),
@@ -699,7 +699,7 @@ bounding boxes on input image.
             target_height=original_h,
             target_width=original_w,
         )
-    
+
         if with_contours:
             contour_all = []
             temp = np.zeros((original_h, original_w, 1))
@@ -717,18 +717,18 @@ bounding boxes on input image.
             cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2 // scale)
             color = np.array([0 / 255, 0 / 255, 255 / 255, 0.9])
             contour_mask = temp / 255 * color.reshape(1, 1, -1)
-    
+
         image = image.convert("RGBA")
         overlay_inner = Image.fromarray((inner_mask * 255).astype(np.uint8), "RGBA")
         image.paste(overlay_inner, (0, 0), overlay_inner)
-    
+
         if with_contours:
             overlay_contour = Image.fromarray((contour_mask * 255).astype(np.uint8), "RGBA")
             image.paste(overlay_contour, (0, 0), overlay_contour)
-    
+
         return image
-    
-    
+
+
     # CPU post process
     def fast_show_mask(
         annotation,
@@ -746,7 +746,7 @@ bounding boxes on input image.
         areas = np.sum(annotation, axis=(1, 2))
         sorted_indices = np.argsort(areas)[::1]
         annotation = annotation[sorted_indices]
-    
+
         index = (annotation != 0).argmax(axis=0)
         if random_color:
             color = np.random.random((mask_sum, 1, 1, 3))
@@ -755,20 +755,20 @@ bounding boxes on input image.
         transparency = np.ones((mask_sum, 1, 1, 1)) * 0.6
         visual = np.concatenate([color, transparency], axis=-1)
         mask_image = np.expand_dims(annotation, -1) * visual
-    
+
         mask = np.zeros((height, weight, 4))
-    
+
         h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing="ij")
         indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
-    
+
         mask[h_indices, w_indices, :] = mask_image[indices]
         if bbox is not None:
             x1, y1, x2, y2 = bbox
             ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor="b", linewidth=1))
-    
+
         if not retinamask:
             mask = cv2.resize(mask, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
-    
+
         return mask
 
 This is the main callback function that is called to segment an image
@@ -779,8 +779,8 @@ based on user input.
     object_points = []
     background_points = []
     bbox_points = []
-    
-    
+
+
     def segment(
         image,
         model_type,
@@ -796,14 +796,14 @@ based on user input.
             model.predictor.model = quantized_wrapped_model
         else:
             model.predictor.model = wrapped_model
-    
+
         input_size = int(input_size)
         w, h = image.size
         scale = input_size / max(w, h)
         new_w = int(w * scale)
         new_h = int(h * scale)
         image = image.resize((new_w, new_h))
-    
+
         results = model(
             image,
             retina_masks=use_retina,
@@ -811,14 +811,14 @@ based on user input.
             conf=conf_threshold,
             imgsz=input_size,
         )
-    
+
         masks = results[0].masks.data
         # Calculate annotations
         if not (object_points or bbox_points):
             annotations = masks.cpu().numpy()
         else:
             annotations = []
-    
+
         if object_points:
             all_points = object_points + background_points
             labels = [1] * len(object_points) + [0] * len(background_points)
@@ -841,20 +841,20 @@ based on user input.
                 x = max(min(x, new_w), 0)
                 y = max(min(y, new_h), 0)
                 scaled_bbox_points.append((x, y))
-    
+
             for i in range(0, len(scaled_bbox_points) - 1, 2):
                 x0, y0, x1, y1 = *scaled_bbox_points[i], *scaled_bbox_points[i + 1]
-    
+
                 intersection_area = torch.sum(masks[:, y0:y1, x0:x1], dim=(1, 2))
                 masks_area = torch.sum(masks, dim=(1, 2))
                 bbox_area = (y1 - y0) * (x1 - x0)
-    
+
                 union = bbox_area + masks_area - intersection_area
                 iou = intersection_area / union
                 max_iou_index = torch.argmax(iou)
-    
+
                 annotations.append(masks[max_iou_index].cpu().numpy())
-    
+
         return fast_process(
             annotations=np.array(annotations),
             image=image,
@@ -871,11 +871,11 @@ based on user input.
     if not Path("gradio_helper.py").exists():
         r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/fast-segment-anything/gradio_helper.py")
         open("gradio_helper.py", "w").write(r.text)
-    
+
     from gradio_helper import make_demo
-    
+
     demo = make_demo(fn=segment, quantized=do_quantize.value)
-    
+
     try:
         demo.queue().launch(debug=False)
     except Exception:
@@ -888,7 +888,7 @@ based on user input.
 .. parsed-literal::
 
     Running on local URL:  http://127.0.0.1:7860
-    
+
     To create a public link, set `share=True` in `launch()`.
 
 
diff --git a/docs/notebooks/gpu-device-with-output.rst b/docs/notebooks/gpu-device-with-output.rst
index b06b55c8af8919..3e10cf4ab7c8bf 100644
--- a/docs/notebooks/gpu-device-with-output.rst
+++ b/docs/notebooks/gpu-device-with-output.rst
@@ -99,10 +99,10 @@ cards <https://www.intel.com/content/www/us/en/products/details/discrete-gpus/ar
 and `Intel® Data Center GPU Flex
 Series <https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html>`__.
 To get started, first `install
-OpenVINO <https://docs.openvino.ai/2024/get-started/install-openvino.html>`__
+OpenVINO <https://docs.openvino.ai/2025/get-started/install-openvino.html>`__
 on a system equipped with one or more Intel GPUs. Follow the `GPU
 configuration
-instructions <https://docs.openvino.ai/2024/get-started/configurations/configurations-intel-gpu.html>`__
+instructions <https://docs.openvino.ai/2025/get-started/install-openvino/configurations/configurations-intel-gpu.html>`__
 to configure OpenVINO to work with your GPU. Then, read on to learn how
 to accelerate inference with GPUs in OpenVINO!
 
@@ -160,7 +160,7 @@ section.
 
 If the GPUs are installed correctly on the system and still do not
 appear in the list, follow the steps described
-`here <https://docs.openvino.ai/2024/get-started/configurations/configurations-intel-gpu.html>`__
+`here <https://docs.openvino.ai/2025/get-started/install-openvino/configurations/configurations-intel-gpu.html>`__
 to configure your GPU drivers to work with OpenVINO. Once we have the
 GPUs working with OpenVINO, we can proceed with the next sections.
 
@@ -509,7 +509,7 @@ hints.PerformanceMode.CUMULATIVE_THROUGHPUT}) \`
    how to set up an asynchronous pipeline that takes advantage of
    parallelism to increase throughput.** To learn more, see
    `Asynchronous
-   Inferencing <https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html>`__
+   Inferencing <https://docs.openvino.ai/2025/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html>`__
    in OpenVINO as well as the `Asynchronous Inference
    notebook <async-api-with-output.html>`__.
 
@@ -535,7 +535,7 @@ Note that benchmark_app only requires the model path to run but both the
 device and hint arguments will be useful to us. For more advanced
 usages, the tool itself has other options that can be checked by running
 ``benchmark_app -h`` or reading the
-`docs <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
+`docs <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
 The following example shows how to benchmark a simple model, using a GPU
 with a latency focus:
 
@@ -1439,8 +1439,8 @@ corresponding documentation:
 -  `Query Device
    Properties <https://docs.openvino.ai/2025/openvino-workflow/running-inference/inference-devices-and-modes/query-device-properties.html>`__
 -  `Configurations for GPUs with
-   OpenVINO <https://docs.openvino.ai/2024/get-started/configurations/configurations-intel-gpu.html>`__
+   OpenVINO <https://docs.openvino.ai/2025/get-started/configurations/configurations-intel-gpu.html>`__
 -  `Benchmark Python
-   Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
+   Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
 -  `Asynchronous
-   Inferencing <https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html>`__
+   Inferencing <https://docs.openvino.ai/2025/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html>`__
diff --git a/docs/notebooks/hello-npu-with-output.rst b/docs/notebooks/hello-npu-with-output.rst
index 0160e5f23c534c..109f78a14f1b28 100644
--- a/docs/notebooks/hello-npu-with-output.rst
+++ b/docs/notebooks/hello-npu-with-output.rst
@@ -77,7 +77,7 @@ other devices, for more streamlined resource management.
 
 Note that the NPU plugin is included in PIP installation of OpenVINO™
 and you need to `install a proper NPU
-driver <https://docs.openvino.ai/2024/get-started/configurations/configurations-intel-npu.html>`__
+driver <https://docs.openvino.ai/2025/get-started/configurations/configurations-intel-npu.html>`__
 to use it successfully.
 
 | **Supported Platforms**:
@@ -483,7 +483,7 @@ Note that benchmark_app only requires the model path to run but both
 device and hint arguments will be useful to us. For more advanced
 usages, the tool itself has other options that can be checked by running
 ``benchmark_app -h`` or reading the
-`docs <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
+`docs <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__.
 The following example shows us to benchmark a simple model, using a NPU
 with latency focus:
 
diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst
index fb3efbdbe9ef1f..0dc9f5dbc766b3 100644
--- a/docs/notebooks/image-classification-quantization-with-output.rst
+++ b/docs/notebooks/image-classification-quantization-with-output.rst
@@ -398,7 +398,7 @@ Compare Performance of the Original and Quantized Models
 
 Finally, measure the inference performance of the ``FP32`` and ``INT8``
 models, using `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
 - an inference performance measurement tool in OpenVINO.
 
    **NOTE**: For more accurate performance, it is recommended to run
diff --git a/docs/notebooks/knowledge-graphs-conve-with-output.rst b/docs/notebooks/knowledge-graphs-conve-with-output.rst
index 55c32a97c97e93..c8a642e7fc4107 100644
--- a/docs/notebooks/knowledge-graphs-conve-with-output.rst
+++ b/docs/notebooks/knowledge-graphs-conve-with-output.rst
@@ -570,7 +570,7 @@ Benchmark the converted OpenVINO model using benchmark app
 The OpenVINO toolkit provides a benchmarking application to gauge the
 platform specific runtime performance that can be obtained under optimal
 configuration parameters for a given model. For more details refer to:
-https://docs.openvino.ai/2024/learn-openvino/openvino-samples/benchmark-tool.html
+https://docs.openvino.ai/2025/learn-openvino/openvino-samples/benchmark-tool.html
 
 Here, we use the benchmark application to obtain performance estimates
 under optimal configuration for the knowledge graph model inference. We
diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst
index 65564423fa67a3..8da52d1cf9230c 100644
--- a/docs/notebooks/language-quantize-bert-with-output.rst
+++ b/docs/notebooks/language-quantize-bert-with-output.rst
@@ -496,7 +496,7 @@ Frames Per Second (FPS) for images.
 
 Finally, measure the inference performance of OpenVINO ``FP32`` and
 ``INT8`` models. For this purpose, use `Benchmark
-Tool <https://docs.openvino.ai/2024/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
+Tool <https://docs.openvino.ai/2025/get-started/learn-openvino/openvino-samples/benchmark-tool.html>`__
 in OpenVINO.
 
    **Note**: The ``benchmark_app`` tool is able to measure the
diff --git a/docs/notebooks/latent-consistency-models-image-generation-with-output.rst b/docs/notebooks/latent-consistency-models-image-generation-with-output.rst
index 37dd96826cd038..fc66a8274d3623 100644
--- a/docs/notebooks/latent-consistency-models-image-generation-with-output.rst
+++ b/docs/notebooks/latent-consistency-models-image-generation-with-output.rst
@@ -95,11 +95,11 @@ Prerequisites
 
     from pathlib import Path
     import requests
-    
+
     utility_files = [Path("notebook_utils.py"), Path("skip_kernel_extension.py"), Path("cmd_helper.py")]
-    
+
     base_utils_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/"
-    
+
     for utility_file in utility_files:
         if not utility_file.exists():
             r = requests.get(base_utils_url + utility_file.name)
@@ -119,7 +119,7 @@ fine-tune of `Stable-Diffusion
 v1-5 <https://huggingface.co/runwayml/stable-diffusion-v1-5>`__ using
 Latent Consistency Distillation (LCD) approach discussed above. This
 model is also integrated into
-`Diffusers <https://huggingface.co/docs/diffusers/index>`__ library. 
+`Diffusers <https://huggingface.co/docs/diffusers/index>`__ library.
 Diffusers is the go-to library for state-of-the-art pretrained diffusion
 models for generating images, audio, and even 3D structures of
 molecules. This allows us to compare running original Stable Diffusion
@@ -129,16 +129,16 @@ and distilled using LCD. The distillation approach efficiently converts
 a pre-trained guided diffusion model into a latent consistency model by
 solving an augmented PF-ODE.
 
-For simplifying model export we will utilize Optimum Intel library. 
+For simplifying model export we will utilize Optimum Intel library.
 `Optimum Intel <https://huggingface.co/docs/optimum/intel/index>`__ is
-the interface between the 
+the interface between the
 `Transformers <https://huggingface.co/docs/transformers/index>`__ and
 `Diffusers <https://huggingface.co/docs/diffusers/index>`__ libraries
 and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use
 `interface <https://huggingface.co/docs/optimum/intel/openvino/export>`__
 for exporting models to `OpenVINO Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 The command bellow demonstrates basic command for model export with
@@ -165,10 +165,10 @@ this step we will use fp16 as base model export precision.
 .. code:: ipython3
 
     from cmd_helper import optimum_cli
-    
+
     model_id = "SimianLuo/LCM_Dreamshaper_v7"
     model_path = Path(model_id.split("/")[-1] + "_ov")
-    
+
     if not model_path.exists():
         optimum_cli(model_id, model_path, additional_args={"weight-format": "fp16"})
 
@@ -227,9 +227,9 @@ inference. Select desired inference device from dropdown list bellow.
 .. code:: ipython3
 
     from notebook_utils import device_widget
-    
+
     device = device_widget()
-    
+
     device
 
 
@@ -244,7 +244,7 @@ inference. Select desired inference device from dropdown list bellow.
 .. code:: ipython3
 
     from optimum.intel.openvino import OVDiffusionPipeline
-    
+
     ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value)
 
 
@@ -277,10 +277,10 @@ Now, let’s see model in action
 .. code:: ipython3
 
     import torch
-    
+
     prompt = "a beautiful pink unicorn, 8k"
     num_inference_steps = 4
-    
+
     images = ov_pipe(
         prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0, height=512, width=512, generator=torch.Generator().manual_seed(1234567)
     ).images
@@ -308,7 +308,7 @@ Nice. As you can see, the picture has quite a high definition 🔥.
 .. code:: ipython3
 
     import gc
-    
+
     del ov_pipe
     gc.collect();
 
@@ -344,11 +344,11 @@ improve model inference speed.
 .. code:: ipython3
 
     from notebook_utils import quantization_widget
-    
+
     skip_for_device = "GPU" in device.value
     to_quantize = quantization_widget(not skip_for_device)
     int8_model_path = model_path.parent / (model_path.name + "_int8")
-    
+
     to_quantize
 
 
@@ -380,36 +380,36 @@ model inputs for calibration we should customize ``CompiledModel``.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     import datasets
     from tqdm.notebook import tqdm
     from transformers import set_seed
     from typing import Any, Dict, List
     import openvino as ov
     import numpy as np
-    
+
     set_seed(1)
-    
+
     class CompiledModelDecorator(ov.CompiledModel):
         def __init__(self, compiled_model, prob: float, data_cache: List[Any] = None):
             super().__init__(compiled_model)
             self.data_cache = data_cache if data_cache else []
             self.prob = np.clip(prob, 0, 1)
-    
+
         def __call__(self, *args, **kwargs):
             if np.random.rand() >= self.prob:
                 self.data_cache.append(*args)
             return super().__call__(*args, **kwargs)
-    
+
     def collect_calibration_data(lcm_pipeline, subset_size: int) -> List[Dict]:
         original_unet = lcm_pipeline.unet.request
         lcm_pipeline.unet.request = CompiledModelDecorator(original_unet, prob=0.3)
-    
+
         dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True).shuffle(seed=42)
         lcm_pipeline.set_progress_bar_config(disable=True)
         safety_checker = lcm_pipeline.safety_checker
         lcm_pipeline.safety_checker = None
-    
+
         # Run inference for data collection
         pbar = tqdm(total=subset_size)
         diff = 0
@@ -430,7 +430,7 @@ model inputs for calibration we should customize ``CompiledModel``.
                 break
             pbar.update(collected_subset_size - diff)
             diff = collected_subset_size
-    
+
         calibration_dataset = lcm_pipeline.unet.request.data_cache
         lcm_pipeline.set_progress_bar_config(disable=False)
         lcm_pipeline.unet.request = original_unet
@@ -440,11 +440,11 @@ model inputs for calibration we should customize ``CompiledModel``.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     import logging
     logging.basicConfig(level=logging.WARNING)
     logger = logging.getLogger(__name__)
-    
+
     if not int8_model_path.exists():
         subset_size = 200
         ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value)
@@ -472,12 +472,12 @@ Create a quantized model from the pre-trained converted OpenVINO model.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     import nncf
     from nncf.scopes import IgnoredScope
     import shutil
     core = ov.Core()
-    
+
     if not int8_model_path.exists():
         unet = core.read_model(model_path / "unet/openvino_model.xml")
         quantized_unet = nncf.quantize(
@@ -546,7 +546,7 @@ Create a quantized model from the pre-trained converted OpenVINO model.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     int8_pipe = OVDiffusionPipeline.from_pretrained(int8_model_path, device=device.value)
 
 Let us check predictions with the quantized UNet using the same input
@@ -555,12 +555,12 @@ data.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     from IPython.display import display
-    
+
     prompt = "a beautiful pink unicorn, 8k"
     num_inference_steps = 4
-    
+
     images = int8_pipe(
         prompt=prompt,
         num_inference_steps=num_inference_steps,
@@ -569,7 +569,7 @@ data.
         width=512,
         generator=torch.Generator().manual_seed(1234567)
     ).images
-    
+
     display(images[0])
 
 
@@ -598,9 +598,9 @@ pipelines, we use median inference time on calibration subset.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     import time
-    
+
     validation_size = 10
     calibration_dataset = datasets.load_dataset("google-research-datasets/conceptual_captions", split="train", trust_remote_code=True)
     validation_data = []
@@ -609,7 +609,7 @@ pipelines, we use median inference time on calibration subset.
             break
         prompt = batch["caption"]
         validation_data.append(prompt)
-    
+
     def calculate_inference_time(pipeline, calibration_dataset):
         inference_time = []
         pipeline.set_progress_bar_config(disable=True)
@@ -632,14 +632,14 @@ pipelines, we use median inference time on calibration subset.
 .. code:: ipython3
 
     %%skip not $to_quantize.value
-    
+
     int8_latency = calculate_inference_time(int8_pipe, validation_data)
     del int8_pipe
     gc.collect()
     ov_pipe = OVDiffusionPipeline.from_pretrained(model_path, device=device.value)
     fp_latency = calculate_inference_time(ov_pipe, validation_data)
     print(f"Performance speed up: {fp_latency / int8_latency:.3f}")
-    
+
     del ov_pipe
     gc.collect();
 
@@ -658,11 +658,11 @@ Compare UNet file size
 
     UNET_OV_PATH = model_path / "unet/openvino_model.xml"
     UNET_INT8_OV_PATH = int8_model_path / "unet/openvino_model.xml"
-    
+
     if UNET_INT8_OV_PATH.exists():
         fp16_ir_model_size = UNET_OV_PATH.with_suffix(".bin").stat().st_size / 1024
         quantized_model_size = UNET_INT8_OV_PATH.with_suffix(".bin").stat().st_size / 1024
-    
+
         print(f"FP16 model size: {fp16_ir_model_size:.2f} KB")
         print(f"INT8 model size: {quantized_model_size:.2f} KB")
         print(f"Model compression rate: {fp16_ir_model_size / quantized_model_size:.3f}")
@@ -722,10 +722,10 @@ generation process.
 .. code:: ipython3
 
     import ipywidgets as widgets
-    
+
     int8_can_be_used = int8_model_path.exists() and "GPU" not in device.value
     use_quantized_model = widgets.Checkbox(value=int8_can_be_used, description="Use INT8 model", disabled=not int8_can_be_used)
-    
+
     use_quantized_model
 
 
@@ -740,9 +740,9 @@ generation process.
 .. code:: ipython3
 
     import openvino_genai as ov_genai
-    
+
     used_model_path = model_path if not use_quantized_model.value else int8_model_path
-    
+
     pipe = ov_genai.Text2ImagePipeline(used_model_path, device.value)
 
 .. code:: ipython3
@@ -750,30 +750,30 @@ generation process.
     from PIL import Image
     import torch
     import openvino as ov
-    
-    
+
+
     class Generator(ov_genai.Generator):
         def __init__(self, seed):
             ov_genai.Generator.__init__(self)
             self.generator = torch.Generator(device="cpu").manual_seed(seed)
-    
+
         def next(self):
             return torch.randn(1, generator=self.generator, dtype=torch.float32).item()
-    
+
         def randn_tensor(self, shape: ov.Shape):
             torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32)
             return ov.Tensor(torch_tensor.numpy())
-    
-    
+
+
     prompt = "a beautiful pink unicorn, 8k"
     num_inference_steps = 4
-    
+
     random_generator = Generator(1234567)
-    
+
     image_tensor = pipe.generate(prompt, width=512, height=512, num_inference_steps=4, num_images_per_prompt=1, generator=random_generator)
-    
+
     image = Image.fromarray(image_tensor.data[0])
-    
+
     image
 
 
@@ -793,16 +793,16 @@ Interactive demo
     import random
     import gradio as gr
     import numpy as np
-    
+
     MAX_SEED = np.iinfo(np.int32).max
-    
-    
+
+
     def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
         if randomize_seed:
             seed = random.randint(0, MAX_SEED)
         return seed
-    
-    
+
+
     def generate(
         prompt: str,
         seed: int = 0,
@@ -828,11 +828,11 @@ Interactive demo
             url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/latent-consistency-models-image-generation/gradio_helper.py"
         )
         open("gradio_helper.py", "w").write(r.text)
-    
+
     from gradio_helper import make_demo_lcm
-    
+
     demo = make_demo_lcm(fn=generate)
-    
+
     try:
         demo.queue().launch(debug=False)
     except Exception:
diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst
index 4a57c2fbbed178..fc343a5f306e50 100644
--- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst
+++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst
@@ -682,7 +682,7 @@ is the interface between the
 and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use cli interface for exporting models to `OpenVINO
 Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 .. raw:: html
diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst
index e3c7ff97af919a..09a2f0e1a572d6 100644
--- a/docs/notebooks/llm-chatbot-with-output.rst
+++ b/docs/notebooks/llm-chatbot-with-output.rst
@@ -609,7 +609,7 @@ the interface between the
 and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use cli interface for exporting models to `OpenVINO
 Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 The command bellow demonstrates basic command for model export with
diff --git a/docs/notebooks/llm-rag-langchain-with-output.rst b/docs/notebooks/llm-rag-langchain-with-output.rst
index 1dec9cb2fb6659..bc5c54abbf2880 100644
--- a/docs/notebooks/llm-rag-langchain-with-output.rst
+++ b/docs/notebooks/llm-rag-langchain-with-output.rst
@@ -97,22 +97,22 @@ Install required dependencies
 
     import os
     import requests
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
     )
     with open("notebook_utils.py", "w") as f:
         f.write(r.text)
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py",
     )
     open("pip_helper.py", "w").write(r.text)
-    
+
     from pip_helper import pip_install
-    
+
     os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false"
-    
+
     pip_install("--pre", "-U", "openvino>=2024.2.0", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly")
     pip_install("--pre", "-U", "openvino-tokenizers[transformers]", "--extra-index-url", "https://storage.openvinotoolkit.org/simple/wheels/nightly")
     pip_install(
@@ -147,16 +147,16 @@ Install required dependencies
     import requests
     import shutil
     import io
-    
+
     # fetch model configuration
-    
+
     config_shared_path = Path("../../utils/llm_config.py")
     config_dst_path = Path("llm_config.py")
     text_example_en_path = Path("text_example_en.pdf")
     text_example_cn_path = Path("text_example_cn.pdf")
     text_example_en = "https://github.com/openvinotoolkit/openvino_notebooks/files/15039728/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final.pdf"
     text_example_cn = "https://github.com/openvinotoolkit/openvino_notebooks/files/15039713/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final_CH.pdf"
-    
+
     if not config_dst_path.exists():
         if config_shared_path.exists():
             try:
@@ -175,13 +175,13 @@ Install required dependencies
             r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py")
             with open("llm_config.py", "w", encoding="utf-8") as f:
                 f.write(r.text)
-    
+
     if not text_example_en_path.exists():
         r = requests.get(url=text_example_en)
         content = io.BytesIO(r.content)
         with open("text_example_en.pdf", "wb") as f:
             f.write(content.read())
-    
+
     if not text_example_cn_path.exists():
         r = requests.get(url=text_example_cn)
         content = io.BytesIO(r.content)
@@ -262,16 +262,16 @@ quality.
         SUPPORTED_RERANK_MODELS,
         SUPPORTED_LLM_MODELS,
     )
-    
+
     model_languages = list(SUPPORTED_LLM_MODELS)
-    
+
     model_language = widgets.Dropdown(
         options=model_languages,
         value=model_languages[0],
         description="Model Language:",
         disabled=False,
     )
-    
+
     model_language
 
 
@@ -286,14 +286,14 @@ quality.
 .. code:: ipython3
 
     llm_model_ids = [model_id for model_id, model_config in SUPPORTED_LLM_MODELS[model_language.value].items() if model_config.get("rag_prompt_template")]
-    
+
     llm_model_id = widgets.Dropdown(
         options=llm_model_ids,
         value=llm_model_ids[-1],
         description="Model:",
         disabled=False,
     )
-    
+
     llm_model_id
 
 
@@ -317,13 +317,13 @@ quality.
 
 
 `Optimum Intel <https://huggingface.co/docs/optimum/intel/index>`__ is
-the interface between the 
+the interface between the
 `Transformers <https://huggingface.co/docs/transformers/index>`__ and
 `Diffusers <https://huggingface.co/docs/diffusers/index>`__ libraries
 and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use cli interface for exporting models to `OpenVINO
 Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 The command bellow demonstrates basic command for model export with
@@ -374,7 +374,7 @@ sacrifice of the model size and inference latency.
 .. code:: ipython3
 
     from IPython.display import Markdown, display
-    
+
     prepare_int4_model = widgets.Checkbox(
         value=True,
         description="Prepare INT4 model",
@@ -390,7 +390,7 @@ sacrifice of the model size and inference latency.
         description="Prepare FP16 model",
         disabled=False,
     )
-    
+
     display(prepare_int4_model)
     display(prepare_int8_model)
     display(prepare_fp16_model)
@@ -461,8 +461,8 @@ with INT4 precision.
     fp16_model_dir = Path(llm_model_id.value) / "FP16"
     int8_model_dir = Path(llm_model_id.value) / "INT8_compressed_weights"
     int4_model_dir = Path(llm_model_id.value) / "INT4_compressed_weights"
-    
-    
+
+
     def convert_to_fp16():
         if (fp16_model_dir / "openvino_model.xml").exists():
             return
@@ -474,8 +474,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     def convert_to_int8():
         if (int8_model_dir / "openvino_model.xml").exists():
             return
@@ -488,8 +488,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     def convert_to_int4():
         compression_configs = {
             "zephyr-7b-beta": {
@@ -559,7 +559,7 @@ with INT4 precision.
                 "ratio": 0.8,
             },
         }
-    
+
         model_compression_params = compression_configs.get(llm_model_id.value, compression_configs["default"])
         if (int4_model_dir / "openvino_model.xml").exists():
             return
@@ -577,8 +577,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     if prepare_fp16_model.value:
         convert_to_fp16()
     if prepare_int8_model.value:
@@ -593,7 +593,7 @@ Let’s compare model size for different compression types
     fp16_weights = fp16_model_dir / "openvino_model.bin"
     int8_weights = int8_model_dir / "openvino_model.bin"
     int4_weights = int4_model_dir / "openvino_model.bin"
-    
+
     if fp16_weights.exists():
         print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB")
     for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]):
@@ -619,14 +619,14 @@ filter them out according the LLM you selected.
 .. code:: ipython3
 
     embedding_model_id = list(SUPPORTED_EMBEDDING_MODELS[model_language.value])
-    
+
     embedding_model_id = widgets.Dropdown(
         options=embedding_model_id,
         value=embedding_model_id[0],
         description="Embedding Model:",
         disabled=False,
     )
-    
+
     embedding_model_id
 
 
@@ -656,7 +656,7 @@ OpenVINO embedding model and tokenizer can be exported by
 
     export_command_base = "optimum-cli export openvino --model {} --task feature-extraction".format(embedding_model_configuration["model_id"])
     export_command = export_command_base + " " + str(embedding_model_id.value)
-    
+
     if not Path(embedding_model_id.value).exists():
         ! $export_command
 
@@ -668,14 +668,14 @@ Convert rerank model using Optimum-CLI
 .. code:: ipython3
 
     rerank_model_id = list(SUPPORTED_RERANK_MODELS)
-    
+
     rerank_model_id = widgets.Dropdown(
         options=rerank_model_id,
         value=rerank_model_id[0],
         description="Rerank Model:",
         disabled=False,
     )
-    
+
     rerank_model_id
 
 
@@ -706,7 +706,7 @@ task with ``optimum-cli``.
 
     export_command_base = "optimum-cli export openvino --model {} --task text-classification".format(rerank_model_configuration["model_id"])
     export_command = export_command_base + " " + str(rerank_model_id.value)
-    
+
     if not Path(rerank_model_id.value).exists():
         ! $export_command
 
@@ -726,9 +726,9 @@ Select device for embedding model inference
 .. code:: ipython3
 
     from notebook_utils import device_widget
-    
+
     embedding_device = device_widget()
-    
+
     embedding_device
 
 
@@ -761,9 +761,9 @@ model to NPU device.
 .. code:: ipython3
 
     from notebook_utils import optimize_bge_embedding
-    
+
     USING_NPU = embedding_device.value == "NPU"
-    
+
     npu_embedding_dir = embedding_model_id.value + "-npu"
     npu_embedding_path = Path(npu_embedding_dir) / "openvino_model.xml"
     if USING_NPU and not Path(npu_embedding_dir).exists():
@@ -778,7 +778,7 @@ Select device for rerank model inference
 .. code:: ipython3
 
     rerank_device = device_widget()
-    
+
     rerank_device
 
 
@@ -813,9 +813,9 @@ Select device for LLM model inference
 .. code:: ipython3
 
     from notebook_utils import device_widget
-    
+
     llm_device = device_widget("CPU", exclude=["NPU"])
-    
+
     llm_device
 
 
@@ -861,7 +861,7 @@ of LangChain.
 .. code:: ipython3
 
     from langchain_community.embeddings import OpenVINOBgeEmbeddings
-    
+
     embedding_model_name = npu_embedding_dir if USING_NPU else embedding_model_id.value
     batch_size = 1 if USING_NPU else 4
     embedding_model_kwargs = {"device": embedding_device.value, "compile": False}
@@ -870,7 +870,7 @@ of LangChain.
         "normalize_embeddings": embedding_model_configuration["normalize_embeddings"],
         "batch_size": batch_size,
     }
-    
+
     embedding = OpenVINOBgeEmbeddings(
         model_name_or_path=embedding_model_name,
         model_kwargs=embedding_model_kwargs,
@@ -879,7 +879,7 @@ of LangChain.
     if USING_NPU:
         embedding.ov_model.reshape(1, 512)
     embedding.ov_model.compile()
-    
+
     text = "This is a test document."
     embedding_result = embedding.embed_query(text)
     embedding_result[:3]
@@ -917,11 +917,11 @@ class of LangChain.
 .. code:: ipython3
 
     from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker
-    
+
     rerank_model_name = rerank_model_id.value
     rerank_model_kwargs = {"device": rerank_device.value}
     rerank_top_n = 2
-    
+
     reranker = OpenVINOReranker(
         model_name_or_path=rerank_model_name,
         model_kwargs=rerank_model_kwargs,
@@ -953,14 +953,14 @@ inference framework.
         available_models.append("INT8")
     if fp16_model_dir.exists():
         available_models.append("FP16")
-    
+
     model_to_run = widgets.Dropdown(
         options=available_models,
         value=available_models[0],
         description="Model to run:",
         disabled=False,
     )
-    
+
     model_to_run
 
 
@@ -982,12 +982,12 @@ inference framework.
 .. code:: ipython3
 
     from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
-    
+
     import openvino.properties as props
     import openvino.properties.hint as hints
     import openvino.properties.streams as streams
-    
-    
+
+
     if model_to_run.value == "INT4":
         model_dir = int4_model_dir
     elif model_to_run.value == "INT8":
@@ -995,17 +995,17 @@ inference framework.
     else:
         model_dir = fp16_model_dir
     print(f"Loading model from {model_dir}")
-    
+
     ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
-    
+
     if "GPU" in llm_device.value and "qwen2-7b-instruct" in llm_model_id.value:
         ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO"
-    
+
     # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy
     # issues caused by this, which we avoid by setting precision hint to "f32".
     if llm_model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and llm_device.value in ["GPU", "AUTO"]:
         ov_config["INFERENCE_PRECISION_HINT"] = "f32"
-    
+
     llm = HuggingFacePipeline.from_model_id(
         model_id=str(model_dir),
         task="text-generation",
@@ -1017,10 +1017,10 @@ inference framework.
         },
         pipeline_kwargs={"max_new_tokens": 2},
     )
-    
+
     if llm.pipeline.tokenizer.eos_token_id:
         llm.pipeline.tokenizer.pad_token_id = llm.pipeline.tokenizer.eos_token_id
-    
+
     llm.invoke("2 + 2 =")
 
 
@@ -1111,13 +1111,13 @@ The most common full sequence from raw data to answer looks like:
         UnstructuredPowerPointLoader,
         UnstructuredWordDocumentLoader,
     )
-    
-    
+
+
     class ChineseTextSplitter(CharacterTextSplitter):
         def __init__(self, pdf: bool = False, **kwargs):
             super().__init__(**kwargs)
             self.pdf = pdf
-    
+
         def split_text(self, text: str) -> List[str]:
             if self.pdf:
                 text = re.sub(r"\n{3,}", "\n", text)
@@ -1130,16 +1130,16 @@ The most common full sequence from raw data to answer looks like:
                 elif ele:
                     sent_list.append(ele)
             return sent_list
-    
-    
+
+
     TEXT_SPLITERS = {
         "Character": CharacterTextSplitter,
         "RecursiveCharacter": RecursiveCharacterTextSplitter,
         "Markdown": MarkdownTextSplitter,
         "Chinese": ChineseTextSplitter,
     }
-    
-    
+
+
     LOADERS = {
         ".csv": (CSVLoader, {}),
         ".doc": (UnstructuredWordDocumentLoader, {}),
@@ -1154,7 +1154,7 @@ The most common full sequence from raw data to answer looks like:
         ".pptx": (UnstructuredPowerPointLoader, {}),
         ".txt": (TextLoader, {"encoding": "utf8"}),
     }
-    
+
     if model_language.value == "English":
         text_example_path = "text_example_en.pdf"
     else:
@@ -1179,72 +1179,72 @@ which will help to create a chain to connect RAG components including:
     from langchain.retrievers import ContextualCompressionRetriever
     from threading import Thread
     import gradio as gr
-    
+
     stop_tokens = llm_model_configuration.get("stop_tokens")
     rag_prompt_template = llm_model_configuration["rag_prompt_template"]
-    
-    
+
+
     class StopOnTokens(StoppingCriteria):
         def __init__(self, token_ids):
             self.token_ids = token_ids
-    
+
         def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
             for stop_id in self.token_ids:
                 if input_ids[0][-1] == stop_id:
                     return True
             return False
-    
-    
+
+
     if stop_tokens is not None:
         if isinstance(stop_tokens[0], str):
             stop_tokens = llm.pipeline.tokenizer.convert_tokens_to_ids(stop_tokens)
-    
+
         stop_tokens = [StopOnTokens(stop_tokens)]
-    
-    
+
+
     def load_single_document(file_path: str) -> List[Document]:
         """
         helper for loading a single document
-    
+
         Params:
           file_path: document path
         Returns:
           documents loaded
-    
+
         """
         ext = "." + file_path.rsplit(".", 1)[-1]
         if ext in LOADERS:
             loader_class, loader_args = LOADERS[ext]
             loader = loader_class(file_path, **loader_args)
             return loader.load()
-    
+
         raise ValueError(f"File does not exist '{ext}'")
-    
-    
+
+
     def default_partial_text_processor(partial_text: str, new_text: str):
         """
         helper for updating partially generated answer, used by default
-    
+
         Params:
           partial_text: text buffer for storing previosly generated text
           new_text: text update for the current step
         Returns:
           updated text string
-    
+
         """
         partial_text += new_text
         return partial_text
-    
-    
+
+
     text_processor = llm_model_configuration.get("partial_text_processor", default_partial_text_processor)
-    
-    
+
+
     def create_vectordb(
         docs, spliter_name, chunk_size, chunk_overlap, vector_search_top_k, vector_rerank_top_n, run_rerank, search_method, score_threshold, progress=gr.Progress()
     ):
         """
         Initialize a vector database
-    
+
         Params:
           doc: orignal documents provided by user
           spliter_name: spliter method
@@ -1255,24 +1255,24 @@ which will help to create a chain to connect RAG components including:
           run_rerank: whether run reranker
           search_method: top k search method
           score_threshold: score threshold when selecting 'similarity_score_threshold' method
-    
+
         """
         global db
         global retriever
         global combine_docs_chain
         global rag_chain
-    
+
         if vector_rerank_top_n > vector_search_top_k:
             gr.Warning("Search top k must >= Rerank top n")
-    
+
         documents = []
         for doc in docs:
             if type(doc) is not str:
                 doc = doc.name
             documents.extend(load_single_document(doc))
-    
+
         text_splitter = TEXT_SPLITERS[spliter_name](chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    
+
         texts = text_splitter.split_documents(documents)
         db = FAISS.from_documents(texts, embedding)
         if search_method == "similarity_score_threshold":
@@ -1285,32 +1285,32 @@ which will help to create a chain to connect RAG components including:
             retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=retriever)
         prompt = PromptTemplate.from_template(rag_prompt_template)
         combine_docs_chain = create_stuff_documents_chain(llm, prompt)
-    
+
         rag_chain = create_retrieval_chain(retriever, combine_docs_chain)
-    
+
         return "Vector database is Ready"
-    
-    
+
+
     def update_retriever(vector_search_top_k, vector_rerank_top_n, run_rerank, search_method, score_threshold):
         """
         Update retriever
-    
+
         Params:
           vector_search_top_k: Vector search top k
           vector_rerank_top_n: Search rerank top n
           run_rerank: whether run reranker
           search_method: top k search method
           score_threshold: score threshold when selecting 'similarity_score_threshold' method
-    
+
         """
         global db
         global retriever
         global combine_docs_chain
         global rag_chain
-    
+
         if vector_rerank_top_n > vector_search_top_k:
             gr.Warning("Search top k must >= Rerank top n")
-    
+
         if search_method == "similarity_score_threshold":
             search_kwargs = {"k": vector_search_top_k, "score_threshold": score_threshold}
         else:
@@ -1320,14 +1320,14 @@ which will help to create a chain to connect RAG components including:
             retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=retriever)
             reranker.top_n = vector_rerank_top_n
         rag_chain = create_retrieval_chain(retriever, combine_docs_chain)
-    
+
         return "Vector database is Ready"
-    
-    
+
+
     def bot(history, temperature, top_p, top_k, repetition_penalty, hide_full_prompt, do_rag):
         """
         callback function for running chatbot on submit button click
-    
+
         Params:
           history: conversation history
           temperature:  parameter for control the level of creativity in AI-generated text.
@@ -1337,7 +1337,7 @@ which will help to create a chain to connect RAG components including:
           repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.
           hide_full_prompt: whether to show searching results in promopt.
           do_rag: whether do RAG when generating texts.
-    
+
         """
         streamer = TextIteratorStreamer(
             llm.pipeline.tokenizer,
@@ -1356,7 +1356,7 @@ which will help to create a chain to connect RAG components including:
         )
         if stop_tokens is not None:
             pipeline_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens)
-    
+
         llm.pipeline_kwargs = pipeline_kwargs
         if do_rag:
             t1 = Thread(target=rag_chain.invoke, args=({"input": history[-1][0]},))
@@ -1364,19 +1364,19 @@ which will help to create a chain to connect RAG components including:
             input_text = rag_prompt_template.format(input=history[-1][0], context="")
             t1 = Thread(target=llm.invoke, args=(input_text,))
         t1.start()
-    
+
         # Initialize an empty string to store the generated text
         partial_text = ""
         for new_text in streamer:
             partial_text = text_processor(partial_text, new_text)
             history[-1][1] = partial_text
             yield history
-    
-    
+
+
     def request_cancel():
         llm.pipeline.model.request.cancel()
-    
-    
+
+
     # initialize the vector store with example document
     create_vectordb(
         [text_example_path],
@@ -1406,9 +1406,9 @@ Next we can create a Gradio UI and run demo.
     if not Path("gradio_helper.py").exists():
         r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-rag-langchain/gradio_helper.py")
         open("gradio_helper.py", "w").write(r.text)
-    
+
     from gradio_helper import make_demo
-    
+
     demo = make_demo(
         load_doc_fn=create_vectordb,
         run_fn=bot,
@@ -1417,7 +1417,7 @@ Next we can create a Gradio UI and run demo.
         model_name=llm_model_id.value,
         language=model_language.value,
     )
-    
+
     try:
         demo.queue().launch()
     except Exception:
diff --git a/docs/notebooks/llm-rag-llamaindex-with-output.rst b/docs/notebooks/llm-rag-llamaindex-with-output.rst
index b3c7f4e004c7af..cd39804c651665 100644
--- a/docs/notebooks/llm-rag-llamaindex-with-output.rst
+++ b/docs/notebooks/llm-rag-llamaindex-with-output.rst
@@ -98,22 +98,22 @@ Install required dependencies
 
     import os
     import requests
-    
+
     os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false"
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
     )
     with open("notebook_utils.py", "w") as f:
         f.write(r.text)
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py",
     )
     open("pip_helper.py", "w").write(r.text)
-    
+
     from pip_helper import pip_install
-    
+
     pip_install(
         "-q",
         "--extra-index-url",
@@ -149,16 +149,16 @@ Install required dependencies
     import requests
     import shutil
     import io
-    
+
     # fetch model configuration
-    
+
     config_shared_path = Path("../../utils/llm_config.py")
     config_dst_path = Path("llm_config.py")
     text_example_en_path = Path("text_example_en.pdf")
     text_example_cn_path = Path("text_example_cn.pdf")
     text_example_en = "https://github.com/openvinotoolkit/openvino_notebooks/files/15039728/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final.pdf"
     text_example_cn = "https://github.com/openvinotoolkit/openvino_notebooks/files/15039713/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final_CH.pdf"
-    
+
     if not config_dst_path.exists():
         if config_shared_path.exists():
             try:
@@ -177,14 +177,14 @@ Install required dependencies
             r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py")
             with open("llm_config.py", "w", encoding="utf-8") as f:
                 f.write(r.text)
-    
-    
+
+
     if not text_example_en_path.exists():
         r = requests.get(url=text_example_en)
         content = io.BytesIO(r.content)
         with open("text_example_en.pdf", "wb") as f:
             f.write(content.read())
-    
+
     if not text_example_cn_path.exists():
         r = requests.get(url=text_example_cn)
         content = io.BytesIO(r.content)
@@ -260,16 +260,16 @@ quality.
         SUPPORTED_RERANK_MODELS,
         SUPPORTED_LLM_MODELS,
     )
-    
+
     model_languages = list(SUPPORTED_LLM_MODELS)
-    
+
     model_language = widgets.Dropdown(
         options=model_languages,
         value=model_languages[0],
         description="Model Language:",
         disabled=False,
     )
-    
+
     model_language
 
 
@@ -284,14 +284,14 @@ quality.
 .. code:: ipython3
 
     llm_model_ids = [model_id for model_id, model_config in SUPPORTED_LLM_MODELS[model_language.value].items() if model_config.get("rag_prompt_template")]
-    
+
     llm_model_id = widgets.Dropdown(
         options=llm_model_ids,
         value=llm_model_ids[-1],
         description="Model:",
         disabled=False,
     )
-    
+
     llm_model_id
 
 
@@ -315,13 +315,13 @@ quality.
 
 
 `Optimum Intel <https://huggingface.co/docs/optimum/intel/index>`__ is
-the interface between the 
+the interface between the
 `Transformers <https://huggingface.co/docs/transformers/index>`__ and
 `Diffusers <https://huggingface.co/docs/diffusers/index>`__ libraries
 and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use cli interface for exporting models to `OpenVINO
 Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 The command bellow demonstrates basic command for model export with
@@ -372,7 +372,7 @@ sacrifice of the model size and inference latency.
 .. code:: ipython3
 
     from IPython.display import Markdown, display
-    
+
     prepare_int4_model = widgets.Checkbox(
         value=True,
         description="Prepare INT4 model",
@@ -388,7 +388,7 @@ sacrifice of the model size and inference latency.
         description="Prepare FP16 model",
         disabled=False,
     )
-    
+
     display(prepare_int4_model)
     display(prepare_int8_model)
     display(prepare_fp16_model)
@@ -459,8 +459,8 @@ with INT4 precision.
     fp16_model_dir = Path(llm_model_id.value) / "FP16"
     int8_model_dir = Path(llm_model_id.value) / "INT8_compressed_weights"
     int4_model_dir = Path(llm_model_id.value) / "INT4_compressed_weights"
-    
-    
+
+
     def convert_to_fp16():
         if (fp16_model_dir / "openvino_model.xml").exists():
             return
@@ -472,8 +472,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     def convert_to_int8():
         if (int8_model_dir / "openvino_model.xml").exists():
             return
@@ -486,8 +486,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     def convert_to_int4():
         compression_configs = {
             "zephyr-7b-beta": {
@@ -557,7 +557,7 @@ with INT4 precision.
                 "ratio": 0.8,
             },
         }
-    
+
         model_compression_params = compression_configs.get(llm_model_id.value, compression_configs["default"])
         if (int4_model_dir / "openvino_model.xml").exists():
             return
@@ -575,8 +575,8 @@ with INT4 precision.
         display(Markdown("**Export command:**"))
         display(Markdown(f"`{export_command}`"))
         ! $export_command
-    
-    
+
+
     if prepare_fp16_model.value:
         convert_to_fp16()
     if prepare_int8_model.value:
@@ -591,7 +591,7 @@ Let’s compare model size for different compression types
     fp16_weights = fp16_model_dir / "openvino_model.bin"
     int8_weights = int8_model_dir / "openvino_model.bin"
     int4_weights = int4_model_dir / "openvino_model.bin"
-    
+
     if fp16_weights.exists():
         print(f"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB")
     for precision, compressed_weights in zip([8, 4], [int8_weights, int4_weights]):
@@ -617,14 +617,14 @@ filter them out according the LLM you selected.
 .. code:: ipython3
 
     embedding_model_id = list(SUPPORTED_EMBEDDING_MODELS[model_language.value])
-    
+
     embedding_model_id = widgets.Dropdown(
         options=embedding_model_id,
         value=embedding_model_id[0],
         description="Embedding Model:",
         disabled=False,
     )
-    
+
     embedding_model_id
 
 
@@ -654,7 +654,7 @@ OpenVINO embedding model and tokenizer can be exported by
 
     export_command_base = "optimum-cli export openvino --model {} --task feature-extraction".format(embedding_model_configuration["model_id"])
     export_command = export_command_base + " " + str(embedding_model_id.value)
-    
+
     if not Path(embedding_model_id.value).exists():
         ! $export_command
 
@@ -666,14 +666,14 @@ Convert rerank model using Optimum-CLI
 .. code:: ipython3
 
     rerank_model_id = list(SUPPORTED_RERANK_MODELS)
-    
+
     rerank_model_id = widgets.Dropdown(
         options=rerank_model_id,
         value=rerank_model_id[0],
         description="Rerank Model:",
         disabled=False,
     )
-    
+
     rerank_model_id
 
 
@@ -704,7 +704,7 @@ task with ``optimum-cli``.
 
     export_command_base = "optimum-cli export openvino --model {} --task text-classification".format(rerank_model_configuration["model_id"])
     export_command = export_command_base + " " + str(rerank_model_id.value)
-    
+
     if not Path(rerank_model_id.value).exists():
         ! $export_command
 
@@ -724,7 +724,7 @@ Select device for embedding model inference
 .. code:: ipython3
 
     embedding_device = device_widget()
-    
+
     embedding_device
 
 
@@ -752,10 +752,10 @@ model to NPU device.
 .. code:: ipython3
 
     USING_NPU = embedding_device.value == "NPU"
-    
+
     npu_embedding_dir = embedding_model_id.value + "-npu"
     npu_embedding_path = Path(npu_embedding_dir) / "openvino_model.xml"
-    
+
     if USING_NPU and not Path(npu_embedding_dir).exists():
         shutil.copytree(embedding_model_id.value, npu_embedding_dir)
         optimize_bge_embedding(Path(embedding_model_id.value) / "openvino_model.xml", npu_embedding_path)
@@ -768,7 +768,7 @@ Select device for rerank model inference
 .. code:: ipython3
 
     rerank_device = device_widget()
-    
+
     rerank_device
 
 
@@ -836,17 +836,17 @@ class of LlamaIndex.
 .. code:: ipython3
 
     from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
-    
+
     embedding_model_name = npu_embedding_dir if USING_NPU else embedding_model_id.value
     batch_size = 1 if USING_NPU else 4
-    
+
     embedding = OpenVINOEmbedding(
         model_id_or_path=embedding_model_name, embed_batch_size=batch_size, device=embedding_device.value, model_kwargs={"compile": False}
     )
     if USING_NPU:
         embedding._model.reshape(1, 512)
     embedding._model.compile()
-    
+
     embeddings = embedding.get_text_embedding("Hello World!")
     print(len(embeddings))
     print(embeddings[:5])
@@ -877,7 +877,7 @@ class of LlamaIndex.
 .. code:: ipython3
 
     from llama_index.postprocessor.openvino_rerank import OpenVINORerank
-    
+
     reranker = OpenVINORerank(model_id_or_path=rerank_model_id.value, device=rerank_device.value, top_n=2)
 
 
@@ -905,14 +905,14 @@ inference framework.
         available_models.append("INT8")
     if fp16_model_dir.exists():
         available_models.append("FP16")
-    
+
     model_to_run = widgets.Dropdown(
         options=available_models,
         value=available_models[0],
         description="Model to run:",
         disabled=False,
     )
-    
+
     model_to_run
 
 
@@ -932,12 +932,12 @@ inference on it.
 .. code:: ipython3
 
     from llama_index.llms.openvino import OpenVINOLLM
-    
+
     import openvino.properties as props
     import openvino.properties.hint as hints
     import openvino.properties.streams as streams
-    
-    
+
+
     if model_to_run.value == "INT4":
         model_dir = int4_model_dir
     elif model_to_run.value == "INT8":
@@ -945,20 +945,20 @@ inference on it.
     else:
         model_dir = fp16_model_dir
     print(f"Loading model from {model_dir}")
-    
+
     ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
-    
+
     stop_tokens = llm_model_configuration.get("stop_tokens")
     completion_to_prompt = llm_model_configuration.get("completion_to_prompt")
-    
+
     if "GPU" in llm_device.value and "qwen2-7b-instruct" in llm_model_id.value:
         ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO"
-    
+
     # On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy
     # issues caused by this, which we avoid by setting precision hint to "f32".
     if llm_model_id.value == "red-pajama-3b-chat" and "GPU" in core.available_devices and llm_device.value in ["GPU", "AUTO"]:
         ov_config["INFERENCE_PRECISION_HINT"] = "f32"
-    
+
     llm = OpenVINOLLM(
         model_id_or_path=str(model_dir),
         context_window=3900,
@@ -968,7 +968,7 @@ inference on it.
         completion_to_prompt=completion_to_prompt,
         device_map=llm_device.value,
     )
-    
+
     response = llm.complete("2 + 2 =")
     print(str(response))
 
@@ -976,7 +976,7 @@ inference on it.
 .. parsed-literal::
 
     /home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.11/site-packages/pydantic/_internal/_fields.py:161: UserWarning: Field "model_id" has conflict with protected namespace "model_".
-    
+
     You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.
       warnings.warn(
 
@@ -1053,42 +1053,42 @@ The most common full sequence from raw data to answer looks like:
     from transformers import StoppingCriteria, StoppingCriteriaList
     import faiss
     import torch
-    
+
     if model_language.value == "English":
         text_example_path = "text_example_en.pdf"
     else:
         text_example_path = "text_example_cn.pdf"
-    
-    
+
+
     class StopOnTokens(StoppingCriteria):
         def __init__(self, token_ids):
             self.token_ids = token_ids
-    
+
         def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
             for stop_id in self.token_ids:
                 if input_ids[0][-1] == stop_id:
                     return True
             return False
-    
-    
+
+
     if stop_tokens is not None:
         if isinstance(stop_tokens[0], str):
             stop_tokens = llm._tokenizer.convert_tokens_to_ids(stop_tokens)
         stop_tokens = [StopOnTokens(stop_tokens)]
-    
+
     loader = PyMuPDFReader()
     documents = loader.load(file_path=text_example_path)
-    
+
     # dimensions of embedding model
     d = embedding._model.request.outputs[0].get_partial_shape()[2].get_length()
     faiss_index = faiss.IndexFlatL2(d)
     Settings.embed_model = embedding
-    
+
     llm.max_new_tokens = 2048
     if stop_tokens is not None:
         llm._stopping_criteria = StoppingCriteriaList(stop_tokens)
     Settings.llm = llm
-    
+
     vector_store = FaissVectorStore(faiss_index=faiss_index)
     storage_context = StorageContext.from_defaults(vector_store=vector_store)
     index = VectorStoreIndex.from_documents(
@@ -1116,7 +1116,7 @@ The most common full sequence from raw data to answer looks like:
         query = "What can Intel vPro® Enterprise systems offer?"
     else:
         query = "英特尔博锐® Enterprise系统提供哪些功能？"
-    
+
     streaming_response = query_engine.query(query)
     streaming_response.print_response_stream()
 
@@ -1131,19 +1131,19 @@ The most common full sequence from raw data to answer looks like:
 
 .. parsed-literal::
 
-    
-    
+
+
     Intel vPro® Enterprise systems can offer a range of advanced security features to protect network infrastructure. These include network security appliances, secure access service edge (SASE), next-generation firewall (NGFW), real-time deep packet inspection, antivirus, intrusion prevention and detection, and SSL/TLS inspection. These systems support more devices, users, and key capabilities such as real-time threat detection while processing higher network throughput. They also drive advanced security features for growing network infrastructure with enhanced power efficiency and density.
-    
+
     Intel QuickAssist Technology (Intel QAT) accelerates and offloads key encryption/compression workloads from the CPU to free up CPU cycles. Trusted execution environments (TEEs) with Intel Software Guard Extensions (Intel SGX) and Intel Trust Domain Extensions (Intel TDX) help protect network workloads and encryption keys across edge-to-cloud infrastructure.
-    
+
     In industrial and energy sectors, Intel vPro® Enterprise systems improve manageability and help reduce the operational costs of automation and control systems. Hardened platforms ensure system reliability in extreme conditions, and high core density provides more dedicated resources to VMs.
-    
+
     Intel vPro® Enterprise systems also offer higher performance per watt, one-core density, and faster DDR5 memory bandwidth to enhance throughput and efficiency for edge security workloads. Intel QuickAssist Technology (Intel QAT) accelerates and offloads key encryption/compression workloads from the CPU to free up CPU cycles. Trusted execution environments (TEEs) with Intel Software Guard Extensions (Intel SGX) and Intel Trust Domain Extensions (Intel TDX) harden platforms from unauthorized access.
-    
+
     Cache Allocation Technology (CAT) within the Intel® Resource Director Technology (Intel® RDT) framework enables performance prioritization for key applications to help meet real-time deterministic requirements.
-    
-    
+
+
 
 
 Gradio Demo
@@ -1159,16 +1159,16 @@ First we can check the default prompt template in LlamaIndex pipeline.
 .. code:: ipython3
 
     prompts_dict = query_engine.get_prompts()
-    
-    
+
+
     def display_prompt_dict(prompts_dict):
         for k, p in prompts_dict.items():
             text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
             display(Markdown(text_md))
             print(p.get_template())
             display(Markdown("<br><br>"))
-    
-    
+
+
     display_prompt_dict(prompts_dict)
 
 
@@ -1184,7 +1184,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
     ---------------------
     Given the context information and not prior knowledge, answer the query.
     Query: {query_str}
-    Answer: 
+    Answer:
 
 
 
@@ -1204,7 +1204,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
     {context_msg}
     ------------
     Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
-    Refined Answer: 
+    Refined Answer:
 
 
 
@@ -1216,35 +1216,35 @@ First we can check the default prompt template in LlamaIndex pipeline.
     from langchain.text_splitter import RecursiveCharacterTextSplitter
     from llama_index.core.node_parser import LangchainNodeParser
     import gradio as gr
-    
+
     TEXT_SPLITERS = {
         "SentenceSplitter": SentenceSplitter,
         "RecursiveCharacter": RecursiveCharacterTextSplitter,
     }
-    
-    
+
+
     def default_partial_text_processor(partial_text: str, new_text: str):
         """
         helper for updating partially generated answer, used by default
-    
+
         Params:
           partial_text: text buffer for storing previosly generated text
           new_text: text update for the current step
         Returns:
           updated text string
-    
+
         """
         partial_text += new_text
         return partial_text
-    
-    
+
+
     text_processor = llm_model_configuration.get("partial_text_processor", default_partial_text_processor)
-    
-    
+
+
     def create_vectordb(doc, spliter_name, chunk_size, chunk_overlap, vector_search_top_k, vector_rerank_top_n, run_rerank):
         """
         Initialize a vector database
-    
+
         Params:
           doc: orignal documents provided by user
           chunk_size:  size of a single sentence chunk
@@ -1252,14 +1252,14 @@ First we can check the default prompt template in LlamaIndex pipeline.
           vector_search_top_k: Vector search top k
           vector_rerank_top_n: Rerrank top n
           run_rerank: whether to run reranker
-    
+
         """
         global query_engine
         global index
-    
+
         if vector_rerank_top_n > vector_search_top_k:
             gr.Warning("Search top k must >= Rerank top n")
-    
+
         loader = PyMuPDFReader()
         documents = loader.load(file_path=doc.name)
         spliter = TEXT_SPLITERS[spliter_name](chunk_size=chunk_size, chunk_overlap=chunk_overlap)
@@ -1268,7 +1268,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
         faiss_index = faiss.IndexFlatL2(d)
         vector_store = FaissVectorStore(faiss_index=faiss_index)
         storage_context = StorageContext.from_defaults(vector_store=vector_store)
-    
+
         index = VectorStoreIndex.from_documents(
             documents,
             storage_context=storage_context,
@@ -1279,37 +1279,37 @@ First we can check the default prompt template in LlamaIndex pipeline.
             query_engine = index.as_query_engine(streaming=True, similarity_top_k=vector_search_top_k, node_postprocessors=[reranker])
         else:
             query_engine = index.as_query_engine(streaming=True, similarity_top_k=vector_search_top_k)
-    
+
         return "Vector database is Ready"
-    
-    
+
+
     def update_retriever(vector_search_top_k, vector_rerank_top_n, run_rerank):
         """
         Update retriever
-    
+
         Params:
           vector_search_top_k: size of searching results
           vector_rerank_top_n:  size of rerank results
           run_rerank: whether run rerank step
-    
+
         """
         global query_engine
         global index
-    
+
         if vector_rerank_top_n > vector_search_top_k:
             gr.Warning("Search top k must >= Rerank top n")
-    
+
         if run_rerank:
             reranker.top_n = vector_rerank_top_n
             query_engine = index.as_query_engine(streaming=True, similarity_top_k=vector_search_top_k, node_postprocessors=[reranker])
         else:
             query_engine = index.as_query_engine(streaming=True, similarity_top_k=vector_search_top_k)
-    
-    
+
+
     def bot(history, temperature, top_p, top_k, repetition_penalty, do_rag):
         """
         callback function for running chatbot on submit button click
-    
+
         Params:
           history: conversation history
           temperature:  parameter for control the level of creativity in AI-generated text.
@@ -1318,7 +1318,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
           top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.
           repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.
           do_rag: whether do RAG when generating texts.
-    
+
         """
         llm.generate_kwargs = dict(
             temperature=temperature,
@@ -1327,7 +1327,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
             top_k=top_k,
             repetition_penalty=repetition_penalty,
         )
-    
+
         partial_text = ""
         if do_rag:
             streaming_response = query_engine.query(history[-1][0])
@@ -1341,8 +1341,8 @@ First we can check the default prompt template in LlamaIndex pipeline.
                 partial_text = text_processor(partial_text, new_text.delta)
                 history[-1][1] = partial_text
                 yield history
-    
-    
+
+
     def request_cancel():
         llm._model.request.cancel()
 
@@ -1351,9 +1351,9 @@ First we can check the default prompt template in LlamaIndex pipeline.
     if not Path("gradio_helper.py").exists():
         r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-rag-llamaindex/gradio_helper.py")
         open("gradio_helper.py", "w").write(r.text)
-    
+
     from gradio_helper import make_demo
-    
+
     demo = make_demo(
         load_doc_fn=create_vectordb,
         run_fn=bot,
@@ -1362,7 +1362,7 @@ First we can check the default prompt template in LlamaIndex pipeline.
         model_name=llm_model_id.value,
         language=model_language.value,
     )
-    
+
     try:
         demo.queue().launch()
     except Exception:
diff --git a/docs/notebooks/mllama-3.2-with-output.rst b/docs/notebooks/mllama-3.2-with-output.rst
index 14a7a819a8b5ac..1e1417b476feb9 100644
--- a/docs/notebooks/mllama-3.2-with-output.rst
+++ b/docs/notebooks/mllama-3.2-with-output.rst
@@ -514,7 +514,7 @@ blog <https://blog.openvino.ai/blog-posts/large-language-model-graph-customizati
 -  **Using Remote tensors for GPU** - Coping data on device and back
    into host memory can become bottleneck for efficient execution
    multi-model pipeline on GPU. `Remote Tensor
-   API <https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/remote-tensor.html>`__
+   API <https://docs.openvino.ai/2025/documentation/openvino-extensibility/openvino-plugin-library/remote-tensor.html>`__
    provides functionality for low-level GPU memory management, we can
    use this feature for sharing cross-attention keys and values between
    Image Encoder and Language Model.
diff --git a/docs/notebooks/model-server-with-output.rst b/docs/notebooks/model-server-with-output.rst
index d5a9347a46e807..34b4570439a6a7 100644
--- a/docs/notebooks/model-server-with-output.rst
+++ b/docs/notebooks/model-server-with-output.rst
@@ -104,10 +104,10 @@ image and a message.
 
 .. parsed-literal::
 
-    
+
     Hello from Docker!
     This message shows that your installation appears to be working correctly.
-    
+
     To generate this message, Docker took the following steps:
      1. The Docker client contacted the Docker daemon.
      2. The Docker daemon pulled the "hello-world" image from the Docker Hub.
@@ -116,16 +116,16 @@ image and a message.
         executable that produces the output you are currently reading.
      4. The Docker daemon streamed that output to the Docker client, which sent it
         to your terminal.
-    
+
     To try something more ambitious, you can run an Ubuntu container with:
      $ docker run -it ubuntu bash
-    
+
     Share images, automate workflows, and more with a free Docker ID:
      https://hub.docker.com/
-    
+
     For more examples and ideas, visit:
      https://docs.docker.com/get-started/
-    
+
 
 
 Step 2: Preparing a Model Repository
@@ -186,21 +186,21 @@ following rules:
 .. code:: ipython3
 
     import os
-    
+
     # Fetch `notebook_utils` module
     import requests
-    
+
     r = requests.get(
         url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
     )
-    
+
     open("notebook_utils.py", "w").write(r.text)
     from notebook_utils import download_file
-    
+
     dedicated_dir = "models"
     model_name = "detection"
     model_version = "1"
-    
+
     MODEL_DIR = f"{dedicated_dir}/{model_name}/{model_version}"
     XML_PATH = "horizontal-text-detection-0001.xml"
     BIN_PATH = "horizontal-text-detection-0001.bin"
@@ -211,7 +211,7 @@ following rules:
     model_bin_url = (
         "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/horizontal-text-detection-0001.bin"
     )
-    
+
     download_file(model_xml_url, XML_PATH, MODEL_DIR)
     download_file(model_bin_url, BIN_PATH, MODEL_DIR)
 
@@ -246,14 +246,14 @@ Searching for an available serving port in local.
 .. code:: ipython3
 
     import socket
-    
+
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock.bind(("localhost", 0))
     sock.listen(1)
     port = sock.getsockname()[1]
     sock.close()
     print(f"Port {port} is available")
-    
+
     os.environ["port"] = str(port)
 
 
@@ -286,7 +286,7 @@ Check whether the OVMS container is running normally:
 
 The required Model Server parameters are listed below. For additional
 configuration options, see the `Model Server Parameters
-section <https://docs.openvino.ai/2024/ovms_docs_parameters.html>`__.
+section <https://docs.openvino.ai/2025/openvino-workflow/model-server/ovms_docs_parameters.html>`__.
 
 .. raw:: html
 
@@ -754,7 +754,7 @@ Request Model Status
 .. code:: ipython3
 
     address = "localhost:" + str(port)
-    
+
     # Bind the grpc address to the client object
     client = make_grpc_client(address)
     model_status = client.get_model_status(model_name=model_name)
@@ -794,16 +794,16 @@ Load input image
         "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg",
         directory="data",
     )
-    
+
     # Text detection models expect an image in BGR format.
     image = cv2.imread(str(image_filename))
     fp_image = image.astype("float32")
-    
+
     # Resize the image to meet network expected input sizes.
     input_shape = model_metadata["inputs"]["image"]["shape"]
     height, width = input_shape[2], input_shape[3]
     resized_image = cv2.resize(fp_image, (height, width))
-    
+
     # Reshape to the network input shape.
     input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
     plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
@@ -835,10 +835,10 @@ Request Prediction on a Numpy Array
 .. code:: ipython3
 
     inputs = {"image": input_image}
-    
+
     # Run inference on model server and receive the result data
     boxes = client.predict(inputs=inputs, model_name=model_name)["boxes"]
-    
+
     # Remove zero only boxes.
     boxes = boxes[~np.all(boxes == 0, axis=1)]
     print(boxes)
@@ -866,17 +866,17 @@ Visualization
     def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
         # Define colors for boxes and descriptions.
         colors = {"red": (255, 0, 0), "green": (0, 255, 0)}
-    
+
         # Fetch the image shapes to calculate a ratio.
         (real_y, real_x), (resized_y, resized_x) = (
             bgr_image.shape[:2],
             resized_image.shape[:2],
         )
         ratio_x, ratio_y = real_x / resized_x, real_y / resized_y
-    
+
         # Convert the base image from BGR to RGB format.
         rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
-    
+
         # Iterate through non-zero boxes.
         for box in boxes:
             # Pick a confidence factor from the last place in an array.
@@ -888,10 +888,10 @@ Visualization
                 (x_min, y_min, x_max, y_max) = [
                     (int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x)) for idx, corner_position in enumerate(box[:-1])
                 ]
-    
+
                 # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
                 rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)
-    
+
                 # Add text to the image based on position and confidence.
                 # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
                 if conf_labels:
@@ -905,7 +905,7 @@ Visualization
                         1,
                         cv2.LINE_AA,
                     )
-    
+
         return rgb_image
 
 .. code:: ipython3
@@ -946,6 +946,6 @@ References
 
 
 1. `OpenVINO™ Model Server
-   documentation <https://docs.openvino.ai/2024/ovms_what_is_openvino_model_server.html>`__
+   documentation <https://docs.openvino.ai/2025/openvino-workflow/model-server/ovms_what_is_openvino_model_server.html>`__
 2. `OpenVINO™ Model Server GitHub
    repository <https://github.com/openvinotoolkit/model_server/>`__
diff --git a/docs/notebooks/multilora-image-generation-with-output.rst b/docs/notebooks/multilora-image-generation-with-output.rst
index e2da1edafdd8f6..cba6380442b7ca 100644
--- a/docs/notebooks/multilora-image-generation-with-output.rst
+++ b/docs/notebooks/multilora-image-generation-with-output.rst
@@ -116,7 +116,7 @@ and OpenVINO to accelerate end-to-end pipelines on Intel architectures.
 It provides ease-to-use
 `interface <https://huggingface.co/docs/optimum/intel/openvino/export>`__
 for exporting models to `OpenVINO Intermediate Representation
-(IR) <https://docs.openvino.ai/2024/documentation/openvino-ir-format.html>`__
+(IR) <https://docs.openvino.ai/2025/documentation/openvino-ir-format.html>`__
 format.
 
 Applying LoRA to Original Diffusers pipeline before conversion
diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst
index 463223b51bdf6a..afe971c8b7002a 100644
--- a/docs/notebooks/music-generation-with-output.rst
+++ b/docs/notebooks/music-generation-with-output.rst
@@ -542,7 +542,7 @@ Embedding the converted models into the original pipeline
 
 OpenVINO™ Runtime Python API is used to compile the model in OpenVINO IR
 format. The
-`Core <https://docs.openvino.ai/2024/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__
+`Core <https://docs.openvino.ai/2025/api/ie_python_api/_autosummary/openvino.runtime.Core.html>`__
 class provides access to the OpenVINO Runtime API. The ``core`` object,
 which is an instance of the ``Core`` class represents the API and it is
 used to compile the model.
diff --git a/docs/notebooks/optimize-preprocessing-with-output.rst b/docs/notebooks/optimize-preprocessing-with-output.rst
index 81e94ff3595923..a22d06d19ddee8 100644
--- a/docs/notebooks/optimize-preprocessing-with-output.rst
+++ b/docs/notebooks/optimize-preprocessing-with-output.rst
@@ -260,7 +260,7 @@ Graph modifications of a model shall be performed after the model is
 read from a drive and before it is loaded on the actual device.
 
 Pre-processing support following operations (please, see more details
-`here <https://docs.openvino.ai/2024/api/c_cpp_api/group__ov__dev__exec__model.html#_CPPv3N2ov10preprocess15PreProcessStepsD0Ev>`__)
+`here <https://docs.openvino.ai/2025/api/c_cpp_api/group__ov__dev__exec__model.html#_CPPv3N2ov10preprocess15PreProcessStepsD0Ev>`__)
 
 -  Mean/Scale Normalization
 -  Converting Precision
@@ -295,7 +295,7 @@ Create ``PrePostProcessor`` Object
 
 
 The
-`PrePostProcessor() <https://docs.openvino.ai/2024/api/c_cpp_api/classov_1_1preprocess_1_1_pre_post_processor.html>`__
+`PrePostProcessor() <https://docs.openvino.ai/2025/api/c_cpp_api/classov_1_1preprocess_1_1_pre_post_processor.html>`__
 class enables specifying the preprocessing and postprocessing steps for
 a model.
 
@@ -320,7 +320,7 @@ about user’s input tensor will be initialized to same data
 (type/shape/etc) as model’s input parameter. User application can
 override particular parameters according to application’s data. Refer to
 the following
-`page <https://docs.openvino.ai/2024/api/c_cpp_api/group__ov__dev__exec__model.html#_CPPv3N2ov10preprocess9InputInfo6tensorEv>`__
+`page <https://docs.openvino.ai/2025/api/c_cpp_api/group__ov__dev__exec__model.html#_CPPv3N2ov10preprocess9InputInfo6tensorEv>`__
 for more information about parameters for overriding.
 
 Below is all the specified input information:
diff --git a/src/common/util/include/openvino/util/common_util.hpp b/src/common/util/include/openvino/util/common_util.hpp
index a11adf29cd14f1..15ec5d8f27d588 100644
--- a/src/common/util/include/openvino/util/common_util.hpp
+++ b/src/common/util/include/openvino/util/common_util.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <algorithm>
+#include <array>
 #include <cctype>
 #include <cstring>
 #include <numeric>
@@ -173,5 +174,11 @@ inline void erase_if(Container& data, const PredicateT& predicate) {
 
 std::string filter_lines_by_prefix(const std::string& str, const std::string& prefix);
 
+template <class T = void, class... Args>
+constexpr std::array<std::conditional_t<std::is_void_v<T>, std::common_type_t<Args...>, T>, sizeof...(Args)> make_array(
+    Args&&... args) {
+    return {std::forward<Args>(args)...};
+}
+
 }  // namespace util
 }  // namespace ov
diff --git a/src/core/include/openvino/core/type/element_type.hpp b/src/core/include/openvino/core/type/element_type.hpp
index 960b318b81262c..b454d886107e7c 100644
--- a/src/core/include/openvino/core/type/element_type.hpp
+++ b/src/core/include/openvino/core/type/element_type.hpp
@@ -70,11 +70,11 @@ enum class Type_t {
 /// \ingroup ov_element_cpp_api
 class OPENVINO_API Type {
 public:
-    Type() = default;
-    Type(const Type&) = default;
+    constexpr Type() = default;
+    constexpr Type(const Type&) = default;
     constexpr Type(const Type_t t) : m_type{t} {}
     explicit Type(const std::string& type);
-    Type& operator=(const Type&) = default;
+    constexpr Type& operator=(const Type&) = default;
     std::string c_type_string() const;
     size_t size() const;
     size_t hash() const;
@@ -95,6 +95,8 @@ class OPENVINO_API Type {
     // The name of this type, the enum name of this type
     std::string get_type_name() const;
     friend OPENVINO_API std::ostream& operator<<(std::ostream&, const Type&);
+
+    OPENVINO_DEPRECATED("This function is deprecated and will be removed in 2026.0.")
     static std::vector<const Type*> get_known_types();
 
     /// \brief Checks whether this element type is merge-compatible with `t`.
@@ -137,129 +139,131 @@ using TypeVector = std::vector<Type>;
 
 /// \brief undefined element type
 /// \ingroup ov_element_cpp_api
-constexpr Type undefined(Type_t::undefined);
+inline constexpr Type undefined(Type_t::undefined);
 /// \brief dynamic element type
 /// \ingroup ov_element_cpp_api
-constexpr Type dynamic(Type_t::dynamic);
+inline constexpr Type dynamic(Type_t::dynamic);
 /// \brief boolean element type
 /// \ingroup ov_element_cpp_api
-constexpr Type boolean(Type_t::boolean);
+inline constexpr Type boolean(Type_t::boolean);
 /// \brief bf16 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type bf16(Type_t::bf16);
+inline constexpr Type bf16(Type_t::bf16);
 /// \brief f16 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f16(Type_t::f16);
+inline constexpr Type f16(Type_t::f16);
 /// \brief f32 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f32(Type_t::f32);
+inline constexpr Type f32(Type_t::f32);
 /// \brief f64 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f64(Type_t::f64);
+inline constexpr Type f64(Type_t::f64);
 /// \brief i4 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type i4(Type_t::i4);
+inline constexpr Type i4(Type_t::i4);
 /// \brief i8 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type i8(Type_t::i8);
+inline constexpr Type i8(Type_t::i8);
 /// \brief i16 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type i16(Type_t::i16);
+inline constexpr Type i16(Type_t::i16);
 /// \brief i32 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type i32(Type_t::i32);
+inline constexpr Type i32(Type_t::i32);
 /// \brief i64 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type i64(Type_t::i64);
+inline constexpr Type i64(Type_t::i64);
 /// \brief binary element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u1(Type_t::u1);
+inline constexpr Type u1(Type_t::u1);
 /// \brief u2 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u2(Type_t::u2);
+inline constexpr Type u2(Type_t::u2);
 /// \brief u3 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u3(Type_t::u3);
+inline constexpr Type u3(Type_t::u3);
 /// \brief u4 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u4(Type_t::u4);
+inline constexpr Type u4(Type_t::u4);
 /// \brief u6 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u6(Type_t::u6);
+inline constexpr Type u6(Type_t::u6);
 /// \brief u8 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u8(Type_t::u8);
+inline constexpr Type u8(Type_t::u8);
 /// \brief u16 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u16(Type_t::u16);
+inline constexpr Type u16(Type_t::u16);
 /// \brief u32 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u32(Type_t::u32);
+inline constexpr Type u32(Type_t::u32);
 /// \brief u64 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type u64(Type_t::u64);
+inline constexpr Type u64(Type_t::u64);
 /// \brief nf4 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type nf4(Type_t::nf4);
+inline constexpr Type nf4(Type_t::nf4);
 /// \brief f8e4m3 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f8e4m3(Type_t::f8e4m3);
+inline constexpr Type f8e4m3(Type_t::f8e4m3);
 /// \brief f8e4m3 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f8e5m2(Type_t::f8e5m2);
+inline constexpr Type f8e5m2(Type_t::f8e5m2);
 /// \brief string element type
 /// \ingroup ov_element_cpp_api
-constexpr Type string(Type_t::string);
+inline constexpr Type string(Type_t::string);
 /// \brief f4e2m1 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f4e2m1(Type_t::f4e2m1);
+inline constexpr Type f4e2m1(Type_t::f4e2m1);
 /// \brief f8e8m0 element type
 /// \ingroup ov_element_cpp_api
-constexpr Type f8e8m0(Type_t::f8e8m0);
+inline constexpr Type f8e8m0(Type_t::f8e8m0);
 
-template <typename T>
-Type from() {
-    OPENVINO_THROW("Unknown type");
+template <class T>
+constexpr Type from() {
+    if constexpr (std::is_same_v<T, char> || std::is_same_v<T, bool>) {
+        return boolean;
+    } else if constexpr (std::is_same_v<T, ov::float16>) {
+        return f16;
+    } else if constexpr (std::is_same_v<T, float>) {
+        return f32;
+    } else if constexpr (std::is_same_v<T, double>) {
+        return f64;
+    } else if constexpr (std::is_same_v<T, int8_t>) {
+        return i8;
+    } else if constexpr (std::is_same_v<T, int16_t>) {
+        return i16;
+    } else if constexpr (std::is_same_v<T, int32_t>) {
+        return i32;
+    } else if constexpr (std::is_same_v<T, int64_t>) {
+        return i64;
+    } else if constexpr (std::is_same_v<T, uint8_t>) {
+        return u8;
+    } else if constexpr (std::is_same_v<T, uint16_t>) {
+        return u16;
+    } else if constexpr (std::is_same_v<T, uint32_t>) {
+        return u32;
+    } else if constexpr (std::is_same_v<T, uint64_t>) {
+        return u64;
+    } else if constexpr (std::is_same_v<T, ov::bfloat16>) {
+        return bf16;
+    } else if constexpr (std::is_same_v<T, ov::float8_e4m3>) {
+        return f8e4m3;
+    } else if constexpr (std::is_same_v<T, ov::float8_e5m2>) {
+        return f8e5m2;
+    } else if constexpr (std::is_same_v<T, std::string>) {
+        return string;
+    } else if constexpr (std::is_same_v<T, ov::float4_e2m1>) {
+        return f4e2m1;
+    } else if constexpr (std::is_same_v<T, ov::float8_e8m0>) {
+        return f8e8m0;
+    } else {
+        OPENVINO_THROW("Unknown type");
+    }
 }
-template <>
-OPENVINO_API Type from<char>();
-template <>
-OPENVINO_API Type from<bool>();
-template <>
-OPENVINO_API Type from<float>();
-template <>
-OPENVINO_API Type from<double>();
-template <>
-OPENVINO_API Type from<int8_t>();
-template <>
-OPENVINO_API Type from<int16_t>();
-template <>
-OPENVINO_API Type from<int32_t>();
-template <>
-OPENVINO_API Type from<int64_t>();
-template <>
-OPENVINO_API Type from<uint8_t>();
-template <>
-OPENVINO_API Type from<uint16_t>();
-template <>
-OPENVINO_API Type from<uint32_t>();
-template <>
-OPENVINO_API Type from<uint64_t>();
-template <>
-OPENVINO_API Type from<ov::bfloat16>();
-template <>
-OPENVINO_API Type from<ov::float16>();
-template <>
-OPENVINO_API Type from<ov::float8_e4m3>();
-template <>
-OPENVINO_API Type from<ov::float8_e5m2>();
-template <>
-OPENVINO_API Type from<std::string>();
-template <>
-OPENVINO_API Type from<ov::float4_e2m1>();
-template <>
-OPENVINO_API Type from<ov::float8_e8m0>();
 
+OPENVINO_DEPRECATED(
+    "This function is deprecated and will be removed in 2026.0. Use ov::fundamental_type_for<Type> instead")
 OPENVINO_API Type fundamental_type_for(const Type& type);
 
 OPENVINO_API
@@ -281,12 +285,12 @@ template <>
 class OPENVINO_API AttributeAdapter<ov::element::Type> : public ValueAccessor<std::string> {
 public:
     OPENVINO_RTTI("AttributeAdapter<ov::element::Type>");
-    AttributeAdapter(ov::element::Type& value) : m_ref(value) {}
+    constexpr AttributeAdapter(ov::element::Type& value) : m_ref(value) {}
 
     const std::string& get() override;
     void set(const std::string& value) override;
 
-    operator ov::element::Type&() {
+    constexpr operator ov::element::Type&() {
         return m_ref;
     }
 
diff --git a/src/core/src/type/element_type.cpp b/src/core/src/type/element_type.cpp
index 3fdda4d7f55cf8..bd61d1c985ea97 100644
--- a/src/core/src/type/element_type.cpp
+++ b/src/core/src/type/element_type.cpp
@@ -7,11 +7,21 @@
 #include <cmath>
 #include <functional>
 #include <iostream>
+#include <string_view>
 #include <unordered_map>
 
 #include "openvino/core/type/element_type_traits.hpp"
+#include "openvino/util/common_util.hpp"
 
+namespace ov::element {
 namespace {
+constexpr size_t idx(Type_t e) noexcept {
+    return static_cast<std::underlying_type_t<Type_t>>(e);
+}
+
+// Update it when new type is added
+constexpr size_t enum_types_size = idx(f8e8m0) + 1;
+
 struct TypeInfo {
     size_t m_bitwidth;
     bool m_is_real;
@@ -19,252 +29,179 @@ struct TypeInfo {
     bool m_is_quantized;
     const char* m_cname;
     const char* m_type_name;
-};
-
-struct ElementTypes {
-    struct TypeHash {
-        size_t operator()(ov::element::Type_t t) const {
-            return static_cast<size_t>(t);
+    const char* const* aliases;
+    size_t alias_count;
+
+    bool has_name(const std::string& type) const {
+        if (type == m_type_name) {
+            return true;
+        } else {
+            const auto last = aliases + alias_count;
+            return std::find(aliases, last, type) != last;
         }
-    };
-
-    using ElementsMap = std::unordered_map<ov::element::Type_t, TypeInfo, TypeHash>;
-};
+    }
 
-inline TypeInfo get_type_info(ov::element::Type_t type) {
-    switch (type) {
-    case ov::element::Type_t::undefined:
-        return {std::numeric_limits<size_t>::max(), false, false, false, "undefined", "undefined"};
-    case ov::element::Type_t::dynamic:
-        return {0, false, false, false, "dynamic", "dynamic"};
-    case ov::element::Type_t::boolean:
-        return {8, false, true, false, "char", "boolean"};
-    case ov::element::Type_t::bf16:
-        return {16, true, true, false, "bfloat16", "bf16"};
-    case ov::element::Type_t::f16:
-        return {16, true, true, false, "float16", "f16"};
-    case ov::element::Type_t::f32:
-        return {32, true, true, false, "float", "f32"};
-    case ov::element::Type_t::f64:
-        return {64, true, true, false, "double", "f64"};
-    case ov::element::Type_t::i4:
-        return {4, false, true, true, "int4_t", "i4"};
-    case ov::element::Type_t::i8:
-        return {8, false, true, true, "int8_t", "i8"};
-    case ov::element::Type_t::i16:
-        return {16, false, true, false, "int16_t", "i16"};
-    case ov::element::Type_t::i32:
-        return {32, false, true, true, "int32_t", "i32"};
-    case ov::element::Type_t::i64:
-        return {64, false, true, false, "int64_t", "i64"};
-    case ov::element::Type_t::u1:
-        return {1, false, false, false, "uint1_t", "u1"};
-    case ov::element::Type_t::u2:
-        return {2, false, false, false, "uint2_t", "u2"};
-    case ov::element::Type_t::u3:
-        return {3, false, false, false, "uint3_t", "u3"};
-    case ov::element::Type_t::u4:
-        return {4, false, false, false, "uint4_t", "u4"};
-    case ov::element::Type_t::u6:
-        return {6, false, false, false, "uint6_t", "u6"};
-    case ov::element::Type_t::u8:
-        return {8, false, false, true, "uint8_t", "u8"};
-    case ov::element::Type_t::u16:
-        return {16, false, false, false, "uint16_t", "u16"};
-    case ov::element::Type_t::u32:
-        return {32, false, false, false, "uint32_t", "u32"};
-    case ov::element::Type_t::u64:
-        return {64, false, false, false, "uint64_t", "u64"};
-    case ov::element::Type_t::nf4:
-        return {4, false, false, true, "nfloat4", "nf4"};
-    case ov::element::Type_t::f8e4m3:
-        return {8, true, true, true, "f8e4m3", "f8e4m3"};
-    case ov::element::Type_t::f8e5m2:
-        return {8, true, true, true, "f8e5m2", "f8e5m2"};
-    case ov::element::Type_t::string:
-        return {8 * sizeof(std::string), false, false, false, "string", "string"};
-    case ov::element::Type_t::f4e2m1:
-        return {4, true, true, true, "f4e2m1", "f4e2m1"};
-    case ov::element::Type_t::f8e8m0:
-        return {8, true, true, true, "f8e8m0", "f8e8m0"};
-    default:
-        OPENVINO_THROW("ov::element::Type_t not supported: ", type);
+    constexpr bool is_valid() const {
+        return m_cname != nullptr && m_type_name != nullptr;
     }
 };
+;
+
+constexpr TypeInfo type_info(size_t bitwidth,
+                             bool is_real,
+                             bool is_signed,
+                             bool is_quantized,
+                             const char* cname,
+                             const char* type_name) {
+    return {bitwidth, is_real, is_signed, is_quantized, cname, type_name, nullptr, 0};
+}
+
+template <class Array>
+constexpr TypeInfo type_info(size_t bitwidth,
+                             bool is_real,
+                             bool is_signed,
+                             bool is_quantized,
+                             const char* cname,
+                             const char* type_name,
+                             const Array& aliases) {
+    return {bitwidth, is_real, is_signed, is_quantized, cname, type_name, aliases.data(), aliases.size()};
+}
+
+constexpr auto undefined_aliases = util::make_array("UNSPECIFIED");
+constexpr auto boolean_aliases = util::make_array("BOOL");
+constexpr auto bf16_aliases = util::make_array("BF16");
+constexpr auto f16_aliases = util::make_array("FP16");
+constexpr auto f32_aliases = util::make_array("FP32");
+constexpr auto f64_aliases = util::make_array("FP64");
+constexpr auto i4_aliases = util::make_array("I4");
+constexpr auto i8_aliases = util::make_array("I8");
+constexpr auto i16_aliases = util::make_array("I16");
+constexpr auto i32_aliases = util::make_array("I32");
+constexpr auto i64_aliases = util::make_array("I64");
+constexpr auto u1_aliases = util::make_array("U1", "bin", "BIN");
+constexpr auto u2_aliases = util::make_array("U2");
+constexpr auto u3_aliases = util::make_array("U3");
+constexpr auto u4_aliases = util::make_array("U4");
+constexpr auto u6_aliases = util::make_array("U6");
+constexpr auto u8_aliases = util::make_array("U8");
+constexpr auto u16_aliases = util::make_array("U16");
+constexpr auto u32_aliases = util::make_array("U32");
+constexpr auto u64_aliases = util::make_array("U64");
+constexpr auto nf4_aliases = util::make_array("NF4");
+constexpr auto f8e4m3_aliases = util::make_array("F8E4M3");
+constexpr auto f8e5m2_aliases = util::make_array("F8E5M2");
+constexpr auto string_aliases = util::make_array("STRING");
+constexpr auto f4e2m1_aliases = util::make_array("F4E2M1");
+constexpr auto f8e8m0_aliases = util::make_array("F8E8M0");
+
+static constexpr std::array<TypeInfo, enum_types_size> types_info = {
+    type_info(std::numeric_limits<size_t>::max(),
+              false,
+              false,
+              false,
+              "undefined",
+              "undefined",
+              undefined_aliases),                                                                 // undefined
+    type_info(0, false, false, false, "dynamic", "dynamic"),                                      // dynamic
+    type_info(8, false, true, false, "char", "boolean", boolean_aliases),                         // boolean
+    type_info(16, true, true, false, "bfloat16", "bf16", bf16_aliases),                           // bf16
+    type_info(16, true, true, false, "float16", "f16", f16_aliases),                              // f16
+    type_info(32, true, true, false, "float", "f32", f32_aliases),                                // f32
+    type_info(64, true, true, false, "double", "f64", f64_aliases),                               // f64
+    type_info(4, false, true, true, "int4_t", "i4", i4_aliases),                                  // i4
+    type_info(8, false, true, true, "int8_t", "i8", i8_aliases),                                  // i8
+    type_info(16, false, true, false, "int16_t", "i16", i16_aliases),                             // i16
+    type_info(32, false, true, true, "int32_t", "i32", i32_aliases),                              // i32
+    type_info(64, false, true, false, "int64_t", "i64", i64_aliases),                             // i64
+    type_info(1, false, false, false, "uint1_t", "u1", u1_aliases),                               // u1
+    type_info(2, false, false, false, "uint2_t", "u2", u2_aliases),                               // u2
+    type_info(3, false, false, false, "uint3_t", "u3", u3_aliases),                               // u3
+    type_info(4, false, false, false, "uint4_t", "u4", u4_aliases),                               // u4
+    type_info(6, false, false, false, "uint6_t", "u6", u6_aliases),                               // u6
+    type_info(8, false, false, true, "uint8_t", "u8", u8_aliases),                                // u8
+    type_info(16, false, false, false, "uint16_t", "u16", u16_aliases),                           // u16
+    type_info(32, false, false, false, "uint32_t", "u32", u32_aliases),                           // u32
+    type_info(64, false, false, false, "uint64_t", "u64", u64_aliases),                           // u64
+    type_info(4, false, false, true, "nfloat4", "nf4", nf4_aliases),                              // nf4
+    type_info(8, true, true, true, "f8e4m3", "f8e4m3", f8e4m3_aliases),                           // f8e4m3
+    type_info(8, true, true, true, "f8e5m2", "f8e5m2", f8e5m2_aliases),                           // f8e5m2
+    type_info(8 * sizeof(std::string), false, false, false, "string", "string", string_aliases),  // string
+    type_info(4, true, true, true, "f4e2m1", "f4e2m1", f4e2m1_aliases),                           // f4e2m1
+    type_info(8, true, true, true, "f8e8m0", "f8e8m0", f8e8m0_aliases)                            // f8e8m0
+};
 
-ov::element::Type type_from_string(const std::string& type) {
-    if (type == "f16" || type == "FP16") {
-        return ::ov::element::Type(::ov::element::Type_t::f16);
-    } else if (type == "f32" || type == "FP32") {
-        return ::ov::element::Type(::ov::element::Type_t::f32);
-    } else if (type == "bf16" || type == "BF16") {
-        return ::ov::element::Type(::ov::element::Type_t::bf16);
-    } else if (type == "f64" || type == "FP64") {
-        return ::ov::element::Type(::ov::element::Type_t::f64);
-    } else if (type == "i4" || type == "I4") {
-        return ::ov::element::Type(::ov::element::Type_t::i4);
-    } else if (type == "i8" || type == "I8") {
-        return ::ov::element::Type(::ov::element::Type_t::i8);
-    } else if (type == "i16" || type == "I16") {
-        return ::ov::element::Type(::ov::element::Type_t::i16);
-    } else if (type == "i32" || type == "I32") {
-        return ::ov::element::Type(::ov::element::Type_t::i32);
-    } else if (type == "i64" || type == "I64") {
-        return ::ov::element::Type(::ov::element::Type_t::i64);
-    } else if (type == "u1" || type == "U1" || type == "BIN" || type == "bin") {
-        return ::ov::element::Type(::ov::element::Type_t::u1);
-    } else if (type == "u2" || type == "U2") {
-        return ::ov::element::Type(::ov::element::Type_t::u2);
-    } else if (type == "u3" || type == "U3") {
-        return ::ov::element::Type(::ov::element::Type_t::u3);
-    } else if (type == "u4" || type == "U4") {
-        return ::ov::element::Type(::ov::element::Type_t::u4);
-    } else if (type == "u6" || type == "U6") {
-        return ::ov::element::Type(::ov::element::Type_t::u6);
-    } else if (type == "u8" || type == "U8") {
-        return ::ov::element::Type(::ov::element::Type_t::u8);
-    } else if (type == "u16" || type == "U16") {
-        return ::ov::element::Type(::ov::element::Type_t::u16);
-    } else if (type == "u32" || type == "U32") {
-        return ::ov::element::Type(::ov::element::Type_t::u32);
-    } else if (type == "u64" || type == "U64") {
-        return ::ov::element::Type(::ov::element::Type_t::u64);
-    } else if (type == "boolean" || type == "BOOL") {
-        return ::ov::element::Type(::ov::element::Type_t::boolean);
-    } else if (type == "string" || type == "STRING") {
-        return ::ov::element::Type(::ov::element::Type_t::string);
-    } else if (type == "undefined" || type == "UNSPECIFIED") {
-        return ::ov::element::Type(::ov::element::Type_t::undefined);
-    } else if (type == "dynamic") {
-        return ::ov::element::Type(::ov::element::Type_t::dynamic);
-    } else if (type == "nf4" || type == "NF4") {
-        return ::ov::element::Type(::ov::element::Type_t::nf4);
-    } else if (type == "f8e4m3" || type == "F8E4M3") {
-        return ::ov::element::Type(::ov::element::Type_t::f8e4m3);
-    } else if (type == "f8e5m2" || type == "F8E5M2") {
-        return ::ov::element::Type(::ov::element::Type_t::f8e5m2);
-    } else if (type == "f4e2m1" || type == "F4E2M1") {
-        return ::ov::element::Type(::ov::element::Type_t::f4e2m1);
-    } else if (type == "f8e8m0" || type == "F8E8M0") {
-        return ::ov::element::Type(::ov::element::Type_t::f8e8m0);
-    } else {
-        OPENVINO_THROW("Incorrect type: ", type);
+constexpr bool validate_types_info(decltype(types_info)& info, size_t i = 0) {
+    return i >= info.size() ? true : info[i].is_valid() ? validate_types_info(info, i + 1) : false;
+}
+
+static_assert(validate_types_info(types_info), "Some entries of type_info are invalid.");
+
+constexpr bool is_valid_type_idx(size_t idx) {
+    return idx < types_info.size();
+}
+
+size_t type_idx_for(const std::string& type_name) {
+    size_t type_idx = 0;
+    for (; is_valid_type_idx(type_idx); ++type_idx) {
+        if (types_info[type_idx].has_name(type_name)) {
+            break;
+        }
     }
+    return type_idx;
 }
+
+const TypeInfo& get_type_info(Type_t type) {
+    const auto type_idx = idx(type);
+    OPENVINO_ASSERT(is_valid_type_idx(type_idx), "Type_t not supported: ", type_idx);
+    return types_info[type_idx];
+}
+
+Type type_from_string(const std::string& type) {
+    const auto type_idx = type_idx_for(type);
+    OPENVINO_ASSERT(is_valid_type_idx(type_idx), "Unsupported element type: ", type);
+    return {static_cast<Type_t>(type_idx)};
+}
+
+// generate known types automatically
+static constexpr auto known_types = [] {
+    std::array<Type, enum_types_size - 1> types;
+    for (size_t idx = 1, i = 0; i < types.size(); ++idx, ++i) {
+        types[i] = Type{static_cast<Type_t>(idx)};
+    }
+    return types;
+}();
 }  // namespace
 
-std::vector<const ov::element::Type*> ov::element::Type::get_known_types() {
-    std::vector<const ov::element::Type*> rc = {
-        &ov::element::dynamic, &ov::element::boolean, &ov::element::bf16,   &ov::element::f16,    &ov::element::f32,
-        &ov::element::f64,     &ov::element::i4,      &ov::element::i8,     &ov::element::i16,    &ov::element::i32,
-        &ov::element::i64,     &ov::element::u1,      &ov::element::u2,     &ov::element::u3,     &ov::element::u4,
-        &ov::element::u6,      &ov::element::u8,      &ov::element::u16,    &ov::element::u32,    &ov::element::u64,
-        &ov::element::nf4,     &ov::element::f8e4m3,  &ov::element::f8e5m2, &ov::element::string, &ov::element::f4e2m1,
-        &ov::element::f8e8m0};
-    return rc;
+std::vector<const Type*> Type::get_known_types() {
+    std::vector<const Type*> result(known_types.size());
+    for (size_t i = 0; i < known_types.size(); ++i) {
+        result[i] = &known_types[i];
+    }
+    return result;
 }
 
-ov::element::Type::Type(const std::string& type) : Type(type_from_string(type)) {}
+Type::Type(const std::string& type) : Type(type_from_string(type)) {}
 
-std::string ov::element::Type::c_type_string() const {
+std::string Type::c_type_string() const {
     return get_type_info(m_type).m_cname;
 }
 
-size_t ov::element::Type::size() const {
+size_t Type::size() const {
     return (bitwidth() + 7) >> 3;
 }
 
-size_t ov::element::Type::hash() const {
+size_t Type::hash() const {
     return static_cast<size_t>(m_type);
 }
 
-std::string ov::element::Type::get_type_name() const {
+std::string Type::get_type_name() const {
     return to_string();
 }
 
-std::string ov::element::Type::to_string() const {
+std::string Type::to_string() const {
     return get_type_info(m_type).m_type_name;
 }
 
-namespace ov {
-namespace element {
-template <>
-Type from<char>() {
-    return Type_t::boolean;
-}
-template <>
-Type from<bool>() {
-    return Type_t::boolean;
-}
-template <>
-Type from<ov::float16>() {
-    return Type_t::f16;
-}
-template <>
-Type from<float>() {
-    return Type_t::f32;
-}
-template <>
-Type from<double>() {
-    return Type_t::f64;
-}
-template <>
-Type from<int8_t>() {
-    return Type_t::i8;
-}
-template <>
-Type from<int16_t>() {
-    return Type_t::i16;
-}
-template <>
-Type from<int32_t>() {
-    return Type_t::i32;
-}
-template <>
-Type from<int64_t>() {
-    return Type_t::i64;
-}
-template <>
-Type from<uint8_t>() {
-    return Type_t::u8;
-}
-template <>
-Type from<uint16_t>() {
-    return Type_t::u16;
-}
-template <>
-Type from<uint32_t>() {
-    return Type_t::u32;
-}
-template <>
-Type from<uint64_t>() {
-    return Type_t::u64;
-}
-template <>
-Type from<ov::bfloat16>() {
-    return Type_t::bf16;
-}
-template <>
-Type from<ov::float8_e4m3>() {
-    return Type_t::f8e4m3;
-}
-template <>
-Type from<ov::float8_e5m2>() {
-    return Type_t::f8e5m2;
-}
-template <>
-Type from<std::string>() {
-    return Type_t::string;
-}
-template <>
-Type from<ov::float4_e2m1>() {
-    return Type_t::f4e2m1;
-}
-template <>
-Type from<ov::float8_e8m0>() {
-    return Type_t::f8e8m0;
-}
-
 Type fundamental_type_for(const Type& type) {
     switch (type) {
     case Type_t::boolean:
@@ -322,44 +259,24 @@ Type fundamental_type_for(const Type& type) {
     }
 }
 
-}  // namespace element
-}  // namespace ov
-
-std::ostream& ov::element::operator<<(std::ostream& out, const ov::element::Type& obj) {
+std::ostream& operator<<(std::ostream& out, const Type& obj) {
     return out << obj.to_string();
 }
 
-std::istream& ov::element::operator>>(std::istream& in, ov::element::Type& obj) {
-    const std::unordered_map<std::string, ov::element::Type> legacy = {
-        {"BOOL", ov::element::boolean},  {"BF16", ov::element::bf16},     {"I4", ov::element::i4},
-        {"I8", ov::element::i8},         {"I16", ov::element::i16},       {"I32", ov::element::i32},
-        {"I64", ov::element::i64},       {"U4", ov::element::u4},         {"U8", ov::element::u8},
-        {"U16", ov::element::u16},       {"U32", ov::element::u32},       {"U64", ov::element::u64},
-        {"FP32", ov::element::f32},      {"FP64", ov::element::f64},      {"FP16", ov::element::f16},
-        {"BIN", ov::element::u1},        {"NF4", ov::element::nf4},       {"F8E4M3", ov::element::f8e4m3},
-        {"F8E5M2", ov::element::f8e5m2}, {"STRING", ov::element::string}, {"F4E2M1", ov::element::f4e2m1},
-        {"F8E8M0", ov::element::f8e8m0}};
+std::istream& operator>>(std::istream& in, Type& obj) {
     std::string str;
     in >> str;
-    auto it_legacy = legacy.find(str);
-    if (it_legacy != legacy.end()) {
-        obj = it_legacy->second;
-        return in;
-    }
-    for (auto&& type : Type::get_known_types()) {
-        if (type->to_string() == str) {
-            obj = *type;
-            break;
-        }
+    if (const auto type_idx = type_idx_for(str); is_valid_type_idx(type_idx)) {
+        obj = {static_cast<Type_t>(type_idx)};
     }
     return in;
 }
 
-bool ov::element::Type::compatible(const ov::element::Type& t) const {
+bool Type::compatible(const Type& t) const {
     return (is_dynamic() || t.is_dynamic() || *this == t);
 }
 
-bool ov::element::Type::merge(ov::element::Type& dst, const ov::element::Type& t1, const ov::element::Type& t2) {
+bool Type::merge(Type& dst, const Type& t1, const Type& t2) {
     if (t1.is_dynamic()) {
         dst = t2;
         return true;
@@ -374,69 +291,30 @@ bool ov::element::Type::merge(ov::element::Type& dst, const ov::element::Type& t
     }
 }
 
-bool ov::element::Type::is_static() const {
+bool Type::is_static() const {
     return get_type_info(m_type).m_bitwidth != 0;
 }
 
-bool ov::element::Type::is_real() const {
+bool Type::is_real() const {
     return get_type_info(m_type).m_is_real;
 }
 
-bool ov::element::Type::is_integral_number() const {
-    return is_integral() && (m_type != ov::element::boolean);
+bool Type::is_integral_number() const {
+    return is_integral() && (m_type != boolean);
 }
 
-bool ov::element::Type::is_signed() const {
+bool Type::is_signed() const {
     return get_type_info(m_type).m_is_signed;
 }
 
-bool ov::element::Type::is_quantized() const {
+bool Type::is_quantized() const {
     return get_type_info(m_type).m_is_quantized;
 }
 
-size_t ov::element::Type::bitwidth() const {
+size_t Type::bitwidth() const {
     return get_type_info(m_type).m_bitwidth;
 }
-
-inline size_t compiler_byte_size(ov::element::Type_t et) {
-    switch (et) {
-#define ET_CASE(et)               \
-    case ov::element::Type_t::et: \
-        return sizeof(ov::element_type_traits<ov::element::Type_t::et>::value_type);
-        ET_CASE(boolean);
-        ET_CASE(bf16);
-        ET_CASE(f16);
-        ET_CASE(f32);
-        ET_CASE(f64);
-        ET_CASE(i4);
-        ET_CASE(i8);
-        ET_CASE(i16);
-        ET_CASE(i32);
-        ET_CASE(i64);
-        ET_CASE(u1);
-        ET_CASE(u2);
-        ET_CASE(u3);
-        ET_CASE(u4);
-        ET_CASE(u6);
-        ET_CASE(u8);
-        ET_CASE(u16);
-        ET_CASE(u32);
-        ET_CASE(u64);
-        ET_CASE(nf4);
-        ET_CASE(f8e4m3);
-        ET_CASE(f8e5m2);
-        ET_CASE(string);
-        ET_CASE(f4e2m1);
-        ET_CASE(f8e8m0);
-#undef ET_CASE
-    case ov::element::Type_t::undefined:
-        return 0;
-    case ov::element::Type_t::dynamic:
-        return 0;
-    }
-
-    OPENVINO_THROW("compiler_byte_size: Unsupported value of ov::element::Type_t: ", static_cast<int>(et));
-}
+}  // namespace ov::element
 
 namespace ov {
 template <>
diff --git a/src/frontends/common/include/openvino/frontend/extension/conversion.hpp b/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
index ee861b46033988..46f8ff1e793072 100644
--- a/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
+++ b/src/frontends/common/include/openvino/frontend/extension/conversion.hpp
@@ -20,7 +20,7 @@ class FRONTEND_API ConversionExtensionBase : public ov::Extension {
         return m_op_type;
     }
 
-    ~ConversionExtensionBase() override = default;
+    ~ConversionExtensionBase() override;
 
 private:
     std::string m_op_type;
diff --git a/src/frontends/common/src/extension/conversion.cpp b/src/frontends/common/src/extension/conversion.cpp
index ff90c163604f8c..febd3868c2798b 100644
--- a/src/frontends/common/src/extension/conversion.cpp
+++ b/src/frontends/common/src/extension/conversion.cpp
@@ -6,4 +6,6 @@
 
 using namespace ov::frontend;
 
+ConversionExtensionBase::~ConversionExtensionBase() = default;
+
 ConversionExtension::~ConversionExtension() = default;
diff --git a/src/frontends/tensorflow_common/src/op/select.cpp b/src/frontends/tensorflow_common/src/op/select.cpp
index f19e01f5a021e6..35c7e893e542e1 100644
--- a/src/frontends/tensorflow_common/src/op/select.cpp
+++ b/src/frontends/tensorflow_common/src/op/select.cpp
@@ -13,6 +13,7 @@
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/squeeze.hpp"
 #include "openvino/op/subtract.hpp"
+#include "openvino/op/unsqueeze.hpp"
 
 using namespace std;
 using namespace ov;
@@ -31,7 +32,19 @@ OutputVector translate_select_base_op(const NodeContext& node,
     set_node_name(node.get_name(), select);
     return {select};
 }
-
+bool has_complex_inputs(Output<Node>& x, Output<Node>& y, element::Type& complex_part_type) {
+    auto complex_type_mark_x = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
+    auto complex_type_mark_y = as_type_ptr<ComplexTypeMark>(y.get_node_shared_ptr());
+    if (complex_type_mark_x) {
+        x = complex_type_mark_x->input_value(0);
+        complex_part_type = complex_type_mark_x->get_complex_part_type();
+    }
+    if (complex_type_mark_y) {
+        y = complex_type_mark_y->input_value(0);
+        complex_part_type = complex_type_mark_y->get_complex_part_type();
+    }
+    return (complex_type_mark_x || complex_type_mark_y);
+}
 OutputVector translate_select_v2_op(const NodeContext& node) {
     // according to the TensorFlow documentation. See in the code:
     // https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/lite/kernels/select.cc#L188-L211
@@ -40,10 +53,23 @@ OutputVector translate_select_v2_op(const NodeContext& node) {
     // is true or the value of 'y' if false. There are valid condition input sizes:
     // 1. Either the same shape (in which case the select is elementwise), or
     // 2. Broadcastable shapes between 'condition', 'x' and 'y'.
-    default_op_checks(node, 3, {"SelectV2", "SELECT_V2"});
-    // no preparation for inputs are needed
-    // inputs are already NumPy broadcastable
-    return translate_select_base_op(node, node.get_input(0), node.get_input(1), node.get_input(2));
+    default_op_checks(node, 3, {"SelectV2", "SELECT_V2"}, true);
+    auto condition = node.get_input(0);
+    auto x = node.get_input(1);
+    auto y = node.get_input(2);
+
+    element::Type complex_part_type;
+    auto is_complex = has_complex_inputs(x, y, complex_part_type);
+
+    if (is_complex) {
+        auto const_negative_one = make_shared<v0::Constant>(element::i32, Shape{1}, -1);
+        auto new_condition = make_shared<v0::Unsqueeze>(condition, const_negative_one);
+        auto result = translate_select_base_op(node, new_condition, x, y);
+        auto complex_result = make_shared<ComplexTypeMark>(result[0].get_node_shared_ptr(), complex_part_type);
+        return {complex_result->output(0)};
+    } else {
+        return translate_select_base_op(node, condition, x, y);
+    }
 }
 
 OutputVector translate_select_op(const NodeContext& node) {
@@ -59,21 +85,9 @@ OutputVector translate_select_op(const NodeContext& node) {
     auto condition = node.get_input(0);
     auto x = node.get_input(1);
     auto y = node.get_input(2);
-    auto complex_type_mark_x = as_type_ptr<ComplexTypeMark>(x.get_node_shared_ptr());
-    auto complex_type_mark_y = as_type_ptr<ComplexTypeMark>(y.get_node_shared_ptr());
 
-    auto is_complex = (complex_type_mark_x || complex_type_mark_y);
     element::Type complex_part_type;
-
-    if (complex_type_mark_x) {
-        x = complex_type_mark_x->input_value(0);
-        complex_part_type = complex_type_mark_x->get_complex_part_type();
-    }
-
-    if (complex_type_mark_y) {
-        y = complex_type_mark_y->input_value(0);
-        complex_part_type = complex_type_mark_y->get_complex_part_type();
-    }
+    auto is_complex = has_complex_inputs(x, y, complex_part_type);
 
     // compute number of dimensions to unsqueeze the condition
     auto cond_rank = compute_subgraph_scalar_rank(condition, element::i32);
@@ -85,14 +99,13 @@ OutputVector translate_select_op(const NodeContext& node) {
     auto new_subshape = make_shared<v3::Broadcast>(const_one, num_new_axes);
     auto cond_shape = make_shared<v3::ShapeOf>(condition, element::i32);
     // use extra dimensions in the begin to avoid concatenation of empty tensors that is not supported by Concat
-    auto const_1 = make_shared<v0::Constant>(element::i32, Shape{1}, 1);
-    auto new_cond_shape = make_shared<v0::Concat>(OutputVector{const_1, cond_shape, new_subshape}, 0);
+    auto new_cond_shape = make_shared<v0::Concat>(OutputVector{const_one, cond_shape, new_subshape}, 0);
 
     // prepare the condition to have the same rank as operands `x` and `y`
     auto prep_cond = make_shared<v1::Reshape>(condition, new_cond_shape, false)->output(0);
     // squeeze prep_cond by one extra dimension specially added
-    auto const_0 = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
-    prep_cond = make_shared<v0::Squeeze>(prep_cond, const_0);
+    auto const_zero = make_shared<v0::Constant>(element::i32, Shape{1}, 0);
+    prep_cond = make_shared<v0::Squeeze>(prep_cond, const_zero);
 
     auto result = translate_select_base_op(node, prep_cond, x, y);
     if (is_complex) {
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index 0cad1840e5d1a8..7e2a0a8b4be441 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -38,18 +38,6 @@
 
 ov::ICore::~ICore() = default;
 
-namespace ov {
-namespace util {
-template <class T = void, class... Args>
-constexpr std::array<
-    typename std::conditional<std::is_void<T>::value, typename std::common_type<Args...>::type, T>::type,
-    sizeof...(Args)>
-make_array(Args&&... args) {
-    return {std::forward<Args>(args)...};
-}
-}  // namespace util
-}  // namespace ov
-
 namespace {
 
 #ifdef PROXY_PLUGIN_ENABLED
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp
index 54d4ffaa433944..0cb074da6d426f 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp
@@ -253,7 +253,7 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N,
                                                        size_t offset_out,
                                                        size_t offset_comp) {
     EmitABIRegSpills spill(this);
-    spill.preamble();
+    spill.preamble(get_live_regs());
 
     const auto add_offset = [&](Xbyak::Reg64 reg, size_t bytes_offset) {
         if (bytes_offset) {
@@ -298,6 +298,16 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N,
     spill.postamble();
 }
 
+std::set<snippets::Reg> BrgemmCopyBKernel::get_live_regs() const {
+    // Only the registers `src_reg`, `tr_src_reg` and `comp_reg` should be
+    // saved on each `jit_brgemm_matmul_copy_b_t` binary call.
+    // They're ABI parameter registers (caller saved). So we have to manually
+    // spills only them on each `jit_brgemm_matmul_copy_b_t` binary call
+    return {{snippets::RegType::gpr, static_cast<size_t>(src_reg.getIdx())},
+            {snippets::RegType::gpr, static_cast<size_t>(tr_src_reg.getIdx())},
+            {snippets::RegType::gpr, static_cast<size_t>(comp_reg.getIdx())}};
+}
+
 void BrgemmCopyBKernel::execute(matmul::jit_brgemm_matmul_copy_b_t* kernel,
                                 const void* src,
                                 const void* dst,
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp
index 5ef740067f2035..594702fed4ae3c 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp
@@ -170,6 +170,8 @@ struct BrgemmCopyBKernel : public RepackedInputKernel, public dnnl::impl::cpu::x
     void init_brgemm_copy_b_kernel(std::unique_ptr<dnnl::impl::cpu::x64::matmul::jit_brgemm_matmul_copy_b_t>& kernel,
                                    const BrgemmCopyBKernelConfig& conf) const;
 
+    std::set<snippets::Reg> get_live_regs() const;
+
     static constexpr auto abi_param_regs = dnnl::impl::cpu::x64::abi_param_regs;
     const Xbyak::Reg64 src_reg = abi_param2;
     const Xbyak::Reg64 tr_src_reg = abi_param3;
diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp
index e50f8f413915a5..54e4e2e6fd5a8b 100644
--- a/src/plugins/intel_cpu/src/nodes/reduce.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp
@@ -150,9 +150,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
                                                                            data_type::f32);
         }
 
-        if (mayiuse(avx512_core)) {
-            uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
-        }
+        uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
 
         this->preamble();
 
@@ -188,9 +186,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
 
         this->postamble();
 
-        if (mayiuse(avx512_core)) {
-            uni_vcvtneps2bf16->emit_data();
-        }
+        uni_vcvtneps2bf16->emit_data();
 
         if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 ||
             jcp_.reduce_mode == Algorithm::ReduceMax || jcp_.reduce_mode == Algorithm::ReduceMin ||
@@ -1017,9 +1013,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
             uni_vmovups(op, vmm_dst);
             break;
         case memory::data_type::bf16:
-            uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
-                                         {static_cast<size_t>(ymm_dst.getIdx())});
-            vmovdqu16(op, ymm_dst);
+            if (isa == cpu::x64::avx512_core) {
+                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                             {static_cast<size_t>(ymm_dst.getIdx())});
+                vmovdqu16(op, ymm_dst);
+            } else {
+                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                             {static_cast<size_t>(xmm_dst.getIdx())});
+                uni_vmovdqu(op, xmm_dst);
+            }
             break;
         case memory::data_type::f16:
             vcvtps2ph(op, vmm_dst, 0x4);
@@ -1253,9 +1255,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
                                                                            data_type::f32);
         }
 
-        if (mayiuse(avx512_core)) {
-            uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
-        }
+        uni_vcvtneps2bf16 = std::make_shared<jit_uni_vcvtneps2bf16>(this, isa);
 
         this->preamble();
 
@@ -1312,9 +1312,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
 
         this->postamble();
 
-        if (mayiuse(avx512_core)) {
-            uni_vcvtneps2bf16->emit_data();
-        }
+        uni_vcvtneps2bf16->emit_data();
 
         if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) {
             log_injector->prepare_table();
@@ -1770,9 +1768,15 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
             uni_vmovups(op, vmm_dst);
             break;
         case memory::data_type::bf16:
-            uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
-                                         {static_cast<size_t>(ymm_dst.getIdx())});
-            vmovdqu16(op, ymm_dst);
+            if (isa == cpu::x64::avx512_core) {
+                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                             {static_cast<size_t>(ymm_dst.getIdx())});
+                vmovdqu16(op, ymm_dst);
+            } else {
+                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                             {static_cast<size_t>(xmm_dst.getIdx())});
+                uni_vmovdqu(op, xmm_dst);
+            }
             break;
         case memory::data_type::f16:
             vcvtps2ph(op, vmm_dst, 0x4);
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp
index 3805fde5ce9bfb..a383fc2b7df220 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/utils/ranges.hpp
@@ -34,16 +34,16 @@ struct Range {
         max_known_port = std::max(static_cast<int>(max_known_port), 1);
         for (size_t port = 0; port < max_known_port; port++) {
             std::map<ov::element::Type, ov::test::utils::InputGenerateData> type_map;
-            for (auto& type : ov::element::Type::get_known_types()) {
-                ov::test::utils::InputGenerateData new_range = rangeByType.get_range(*type);
-                if (type->is_real() && port < real_port_ranges.size()) {
+            for (const auto& type : get_known_types()) {
+                ov::test::utils::InputGenerateData new_range = rangeByType.get_range(type);
+                if (type.is_real() && port < real_port_ranges.size()) {
                     new_range.correct_range(real_port_ranges.at(port));
                     new_range.input_attribute = real_port_ranges.at(port).input_attribute;
-                } else if (type->is_integral() && port < int_port_ranges.size()) {
+                } else if (type.is_integral() && port < int_port_ranges.size()) {
                     new_range.correct_range(int_port_ranges.at(port));
                     new_range.input_attribute = int_port_ranges.at(port).input_attribute;
                 }
-                type_map[*type] = new_range;
+                type_map[type] = new_range;
             }
             data.push_back(type_map);
         }
diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/type_ranges.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/type_ranges.hpp
index c84b58066387f3..7dc5841869a493 100644
--- a/src/tests/test_utils/common_test_utils/include/common_test_utils/type_ranges.hpp
+++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/type_ranges.hpp
@@ -15,6 +15,21 @@ namespace ov {
 namespace test {
 namespace utils {
 
+static const std::vector<element::Type>& get_known_types() {
+    static const auto known_types = [] {
+        using namespace ov::element;
+        constexpr size_t enum_count = static_cast<std::underlying_type_t<Type_t>>(Type_t::f8e8m0) - 1;
+
+        std::vector<Type> types(enum_count);
+        for (size_t idx = 1, i = 0; i < types.size(); ++idx, ++i) {
+            types[i] = Type{static_cast<Type_t>(idx)};
+        }
+        return types;
+    }();
+
+    return known_types;
+}
+
 static ov::test::utils::InputGenerateData get_range_by_type(
     ov::element::Type elemType,
     uint32_t max_range_limit = testing::internal::Random::kMaxRange) {
@@ -110,8 +125,8 @@ struct RangeByType {
     std::map<ov::element::Type, ov::test::utils::InputGenerateData> data;
 
     RangeByType() {
-        for (auto& type : ov::element::Type::get_known_types()) {
-            data[*type] = get_range_by_type(*type);
+        for (const auto& type : get_known_types()) {
+            data[type] = get_range_by_type(type);
         }
     }
 
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SelectV2.py b/tests/layer_tests/tensorflow_tests/test_tf_SelectV2.py
index 058f2e21a4a60b..d199275bf34345 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_SelectV2.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_SelectV2.py
@@ -51,3 +51,52 @@ def test_select_v2_basic(self, params, ie_device, precision, ir_version, temp_di
         self._test(*self.create_select_v2_net(**params),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
                    use_legacy_frontend=use_legacy_frontend)
+
+
+class TestComplexSelectV2(CommonTFLayerTest):
+    def _prepare_input(self, inputs_info):
+        rng = np.random.default_rng()
+        assert 'cond:0' in inputs_info, "Test error: inputs_info must contain `cond`"
+        assert 'x_real:0' in inputs_info, "Test error: inputs_info must contain `x_real`"
+        assert 'x_imag:0' in inputs_info, "Test error: inputs_info must contain `x_imag`"
+        assert 'y_real:0' in inputs_info, "Test error: inputs_info must contain `y_real`"
+        assert 'y_imag:0' in inputs_info, "Test error: inputs_info must contain `y_imag`"
+        cond_shape = inputs_info['cond:0']
+        inputs_data = {}
+        inputs_data['cond:0'] = np.random.randint(0, 2, cond_shape).astype(bool)
+        for part in ['x_real:0', 'x_imag:0', 'y_real:0', 'y_imag:0']:
+            inputs_data[part] = 4 * rng.random(inputs_info[part]).astype(np.float32) - 2
+        return inputs_data
+    
+    def create_complex_select_v2_net(self, cond_shape, x_shape, y_shape):
+        tf.compat.v1.reset_default_graph()
+        # Create the graph and model
+        with tf.compat.v1.Session() as sess:
+            cond = tf.compat.v1.placeholder(tf.bool, cond_shape, 'cond')
+            x_real = tf.compat.v1.placeholder(tf.float32, x_shape, 'x_real')
+            x_imag = tf.compat.v1.placeholder(tf.float32, x_shape, 'x_imag')
+            y_real = tf.compat.v1.placeholder(tf.float32, y_shape, 'y_real')
+            y_imag = tf.compat.v1.placeholder(tf.float32, y_shape, 'y_imag')
+            complex_x = tf.raw_ops.Complex(real=x_real, imag=x_imag)
+            complex_y = tf.raw_ops.Complex(real=y_real, imag=y_imag)
+            complex_select = tf.raw_ops.SelectV2(condition=cond, t=complex_x, e=complex_y)
+            tf.raw_ops.Real(input=complex_select)
+            tf.raw_ops.Imag(input=complex_select)
+            tf.compat.v1.global_variables_initializer()
+            tf_net = sess.graph_def
+        return tf_net, None
+    
+    test_data_basic = [
+        dict(cond_shape=[3, 1], x_shape=[3, 1], y_shape=[3, 1]),
+        dict(cond_shape=[], x_shape=[2], y_shape=[3, 2]),
+        dict(cond_shape=[4], x_shape=[3, 2, 1], y_shape=[2, 4]),
+    ]
+
+    @pytest.mark.parametrize("params", test_data_basic)
+    @pytest.mark.precommit
+    @pytest.mark.nightly
+    def test_complex_select_v2(self, params, ie_device, precision, ir_version, temp_dir,
+                                  use_legacy_frontend):
+        self._test(*self.create_complex_select_v2_net(**params),
+                   ie_device, precision, ir_version, temp_dir=temp_dir,
+                   use_legacy_frontend=use_legacy_frontend)
\ No newline at end of file