From 6581feeeb075a3f417bb927384a0b57f31db2021 Mon Sep 17 00:00:00 2001 From: tadamczx <156996781+tadamczx@users.noreply.github.com> Date: Mon, 14 Oct 2024 13:39:27 +0200 Subject: [PATCH 001/112] [DOCS] OV GenAI Python API for master (#27028) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/CMakeLists.txt | 6 +- .../install_appropriate_openvino_version.py | 58 +++++++++---------- docs/scripts/tests/suppress_warnings.txt | 1 + docs/sphinx_setup/api/api_reference.rst | 1 + docs/sphinx_setup/api/genai_api/api.rst | 12 ++++ docs/sphinx_setup/conf.py | 12 +++- 6 files changed, 58 insertions(+), 32 deletions(-) create mode 100644 docs/sphinx_setup/api/genai_api/api.rst diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 797c95ef7d91c5..dfb98af33c805c 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(snippets) set(ENABLE_CPP_API OFF CACHE BOOL "Build with C/C++ API.") set(ENABLE_PYTHON_API OFF CACHE BOOL "Build with Python API.") +set(ENABLE_GENAI_API OFF CACHE BOOL "Build with GenAI API.") set(ENABLE_NOTEBOOKS OFF CACHE BOOL "Build with openvino notebooks.") set(ENABLE_OMZ OFF CACHE BOOL "Build with open_model_zoo.") set(ENABLE_OVMS OFF CACHE BOOL "Build with ovms.") @@ -61,11 +62,12 @@ function(build_docs) list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OpenVINO C/C++ API reference") endif() - if(${ENABLE_PYTHON_API}) + if(${ENABLE_PYTHON_API} OR ${ENABLE_GENAI_API}) list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "STARTED preprocessing OpenVINO Python API") list(APPEND commands COMMAND ${Python3_EXECUTABLE} ${OV_INSTALLATION_SCRIPT} --ov_dir=${SPHINX_SETUP_DIR} - --python=${Python3_EXECUTABLE}) + --python=${Python3_EXECUTABLE} + --enable_genai=${ENABLE_GENAI_API}) list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OpenVINO Python API") endif() diff --git a/docs/scripts/install_appropriate_openvino_version.py b/docs/scripts/install_appropriate_openvino_version.py index d9cbb6a9ddb1f9..262fd13d771ca2 100644 --- a/docs/scripts/install_appropriate_openvino_version.py +++ b/docs/scripts/install_appropriate_openvino_version.py @@ -2,64 +2,64 @@ import argparse import subprocess import requests -import pkg_resources from packaging import version from pathlib import Path def determine_openvino_version(file_path): pattern = r"version_name\s*=\s*['\"]([^'\"]+)['\"]" - with open(file_path, 'r') as file: content = file.read() - match = re.search(pattern, content) - - if match: - return match.group(1) - else: - return None + return match.group(1) if match else None -def get_latest_version(major_version): - url = f"https://pypi.org/pypi/openvino/json" +def get_latest_version(package, major_version): + url = f"https://pypi.org/pypi/{package}/json" response = requests.get(url) - if response.status_code == 200: data = response.json() versions = data['releases'].keys() - - # Filter versions by the major version prefix matching_versions = [v for v in versions if v.startswith(major_version)] - - # Sort the matching versions and return the latest one if matching_versions: matching_versions.sort(key=version.parse) return matching_versions[-1] - return None +def install_package(python_executable, package): + subprocess.check_call([f'{python_executable}', '-m', 'pip', 'install', '-U', package, '--no-cache-dir']) + + def main(): parser = argparse.ArgumentParser() parser.add_argument('--ov_dir', type=Path, help='OpenVINO docs directory') parser.add_argument('--python', type=Path, help='Python executable') + parser.add_argument('--enable_genai', type=str, choices=['ON', 'OFF'], default='OFF', help='Enable GenAI API installation') args = parser.parse_args() - ov_dir = args.ov_dir - python_executable = args.python - version_name = determine_openvino_version(ov_dir.joinpath("conf.py")) - - if version_name is None: - ov_version = "openvino" - elif version_name == "nightly": - ov_version = "openvino-nightly" + + version_name = determine_openvino_version(args.ov_dir.joinpath("conf.py")) + + if version_name == "nightly": + install_package(args.python, "openvino-nightly") + print("OpenVINO nightly version installed. OpenVINO GenAI nightly version is not available.") + elif version_name is None or version_name == "latest": + install_package(args.python, "openvino") + if args.enable_genai == 'ON': + install_package(args.python, "openvino-genai") else: - latest_version = get_latest_version(version_name) - if latest_version: - ov_version = f"openvino=={latest_version}" + ov_version = get_latest_version("openvino", version_name) + if ov_version: + install_package(args.python, f"openvino=={ov_version}") else: - ov_version = f"openvino=={version_name}" - subprocess.check_call([f'{python_executable}', '-m', 'pip', 'install', '-U', ov_version, '--no-cache-dir']) + print(f"No matching OpenVINO version found for {version_name}") + + if args.enable_genai == 'ON': + ov_genai_version = get_latest_version("openvino-genai", version_name) + if ov_genai_version: + install_package(args.python, f"openvino-genai=={ov_genai_version}") + else: + print(f"No matching OpenVINO GenAI version found for {version_name}") if __name__ == "__main__": diff --git a/docs/scripts/tests/suppress_warnings.txt b/docs/scripts/tests/suppress_warnings.txt index b9942f28dc02ae..993a290c6d7ea3 100644 --- a/docs/scripts/tests/suppress_warnings.txt +++ b/docs/scripts/tests/suppress_warnings.txt @@ -61,3 +61,4 @@ toctree contains reference to nonexisting document pygments lexer name non-consecutive header level increase document headings start at +inline strong start-string without end-string diff --git a/docs/sphinx_setup/api/api_reference.rst b/docs/sphinx_setup/api/api_reference.rst index acf816364ca0fc..9f60573707fbde 100644 --- a/docs/sphinx_setup/api/api_reference.rst +++ b/docs/sphinx_setup/api/api_reference.rst @@ -15,6 +15,7 @@ API Reference OpenVINO Runtime C++ API OpenVINO Runtime C API OpenVINO Node.js API + GenAI Python API diff --git a/docs/sphinx_setup/api/genai_api/api.rst b/docs/sphinx_setup/api/genai_api/api.rst new file mode 100644 index 00000000000000..fc5151d7781ae7 --- /dev/null +++ b/docs/sphinx_setup/api/genai_api/api.rst @@ -0,0 +1,12 @@ +OpenVINO GenAI API +=================== + +.. meta:: + :description: Explore OpenVINO GenAI Python API and implementation of its features in Intel® Distribution of OpenVINO™ GenAI. + + +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + + openvino_genai \ No newline at end of file diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index 351a6d6c5ea8b9..148309ccbafe96 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -34,11 +34,21 @@ 'breathe' ] +autodoc_mock_imports = [] + try: import openvino except ImportError: - autodoc_mock_imports = ["openvino"] + autodoc_mock_imports.append("openvino") + autodoc_mock_imports.append("openvino_genai") # Mock openvino_genai too, as it depends on openvino + +if "openvino" not in autodoc_mock_imports: + try: + import openvino_genai + except ImportError: + autodoc_mock_imports.append("openvino_genai") + breathe_projects = { "openvino": "../xml/" } From f61db78a2e5089b506489b88e48fa4f82e0829b2 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Mon, 14 Oct 2024 14:57:46 +0200 Subject: [PATCH 002/112] [DOCS] Add Ovem files to folder (#27029) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/CMakeLists.txt | 2 +- docs/articles_en/openvino-workflow.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index dfb98af33c805c..eedfe078cbd552 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -85,7 +85,7 @@ function(build_docs) list(APPEND commands COMMAND ${Python3_EXECUTABLE} ${FILE_HELPER_SCRIPT} --filetype=md --input_dir=${OVMS_DOCS_DIR} - --output_dir=${SPHINX_SOURCE_DIR} + --output_dir=${SPHINX_SOURCE_DIR}/openvino-workflow/model-server --exclude_dir=${SPHINX_SOURCE_DIR}) list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OVMS") endif() diff --git a/docs/articles_en/openvino-workflow.rst b/docs/articles_en/openvino-workflow.rst index 90101fd1fb35e8..0dda91f91fb552 100644 --- a/docs/articles_en/openvino-workflow.rst +++ b/docs/articles_en/openvino-workflow.rst @@ -14,7 +14,7 @@ OpenVINO Workflow openvino-workflow/model-optimization Running Inference Deployment on a Local System - Deployment on a Model Server + Deployment on a Model Server openvino-workflow/torch-compile From 87e463e2b8da37e700ae60b55c3191bc30331ecd Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Mon, 14 Oct 2024 16:23:22 +0100 Subject: [PATCH 003/112] NPUW: Fix - don't register an extra parameter to a group (#27032) ### Details: - There's a case where there's a Param->Convert path that stays in model HEAD, and then it acts as an input to all other partitions. In this case, this convert is seen as "extra" input which needs to be registetered - mistakenly. ### Tickets: - E-138529 --- .../plugin/npuw/partitioning/partitioning.cpp | 18 ++++++++++++++---- src/plugins/intel_npu/src/plugin/npuw/util.hpp | 8 +++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 58a8219f497fed..5e3f12fedf68a6 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -345,6 +345,10 @@ void Partitioner::identifySubgraphs() { } LOG_INFO("Caching done: " << node_id_cache.size() << " layers."); + // Accumulate knowledge about known OV layers when walking + // over a topologically-sorted list. + std::unordered_set nodes_known_now; + // FIXME: Need to do some sanity checks here. What if partitioning // has been generated for another variation of this model? // What if that was a completely different model? @@ -458,16 +462,19 @@ void Partitioner::identifySubgraphs() { continue; } else if ((ov::is_type(input_node) || ov::is_type(input_node)) && + !nodes_known_now.count(input_node) && ov::op::util::is_parameter(input_node->input(0).get_source_output().get_node_shared_ptr())) { // So the situation is: - // - a group has an input layer + // - a group has an input layer // - which reads from a Slice or Convert // - which reads from a Parameter + // - not a part of any prior group // This happens when an offline plan is used with a kvcache // model extended with slices to maintain zero-copy (LLM case) auto extra_param = input_node->input(0).get_source_output().get_node_shared_ptr(); input_mapping[input_node] = extra_param; extra_params.insert(extra_param); + LOG_DEBUG("Registered extra param " << extra_param); } else { // Ok, this input is connected to some other node's output // Replace this connection with a link to a newly created Parameter @@ -671,7 +678,8 @@ void Partitioner::identifySubgraphs() { } } this_group_idx++; // FIXME: indexed() is better! - } // for (partitions) + nodes_known_now.insert(group_nodes.begin(), group_nodes.end()); + } // for (partitions) // Return what we've got here std::vector& result = P.subgraphs; @@ -1387,14 +1395,16 @@ void Partitioner::matchParameters(const std::string& func_name) { this_model_nodes.insert(node_ptr.get()); } for (auto&& node : call->get_ordered_ops()) { + using ov::npuw::util::at::_; + if (ov::op::util::is_parameter(node)) { PKey pkey; for (auto&& iport : node->output(0).get_target_inputs()) { if (this_model_nodes.count(iport.get_node()) > 0) { LOG_DEBUG("Register link " << iport.get_node()->get_friendly_name() << " : " << iport.get_index()); - pkey.insert( - PReader{layer_to_prototype.at(iport.get_node()->get_friendly_name()), iport.get_index()}); + pkey.insert(PReader{_(layer_to_prototype).at(iport.get_node()->get_friendly_name()), + iport.get_index()}); } } LOG_DEBUG("Find orig parameter for " << node); diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index 1704314aee75ea..02d2c8c097811e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -65,8 +65,9 @@ ov::Tensor permute(const ov::Tensor& t, const std::vector& axes); ov::Tensor concat(const std::vector& tt, std::size_t axis); namespace at { -template +template struct Impl { + using M = typename std::decay::type; using V = typename M::mapped_type; M* m = nullptr; @@ -96,6 +97,11 @@ Impl _(M* pM) { return Impl(pM); } +template +Impl _(M&& m) { + return Impl(&m); +} + template Impl _(std::shared_ptr pM) { return Impl(pM.get()); From 7f85f15aa4a25d157c4879507cb8cc3a6e7306a4 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Mon, 14 Oct 2024 18:31:07 +0200 Subject: [PATCH 004/112] Place wheels to a separate folder (#26952) ### Details: - Changed cpack dir name where the wheels should be installed ### Tickets: - *153216* --- .github/actions/openvino_provider/action.yml | 2 +- .github/workflows/job_build_linux.yml | 70 +++++++++++++------ .github/workflows/job_build_windows.yml | 25 +++++-- .github/workflows/job_jax_models_tests.yml | 37 ++++------ .github/workflows/job_onnx_models_tests.yml | 35 +++++----- .github/workflows/job_python_unit_tests.yml | 36 +++++----- .github/workflows/job_pytorch_layer_tests.yml | 41 +++++------ .../workflows/job_pytorch_models_tests.yml | 35 ++++------ .github/workflows/job_samples_tests.yml | 28 ++++---- .../workflows/job_tensorflow_layer_tests.yml | 51 ++++++-------- .../workflows/job_tensorflow_models_tests.yml | 39 +++++------ .github/workflows/job_tokenizers.yml | 41 +++++------ .github/workflows/windows_vs2019_release.yml | 58 +++++++-------- .../developer_package/packaging/archive.cmake | 8 +-- .../packaging/common-libraries.cmake | 7 +- .../packaging/debian/debian.cmake | 5 +- cmake/developer_package/packaging/npm.cmake | 3 - cmake/developer_package/packaging/nsis.cmake | 5 +- .../packaging/packaging.cmake | 2 - .../developer_package/packaging/rpm/rpm.cmake | 5 +- tools/openvino_dev/CMakeLists.txt | 23 ------ 21 files changed, 252 insertions(+), 304 deletions(-) diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index 131abb59b5e252..31c49afd15bd94 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -137,7 +137,7 @@ runs: artifacts_path=${{ steps.openvino_commit_download.outputs.artifacts_path }} cd $artifacts_path version=$(yq eval '.components.dldt.custom_params.wheel_product_version' manifest.yml) - wheel_path=${{ inputs.install_dir && '$artifacts_path/tools' || './tools' }} + wheel_path=${{ inputs.install_dir && '$artifacts_path/wheels' || './wheels' }} default_find_links_cmd="--find-links=$wheel_path" find_links_cmd=$([[ -n "$PIP_FIND_LINKS" ]] && echo "" || echo "$default_find_links_cmd") echo "ov_version=$version" >> $GITHUB_OUTPUT diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index 0cb7cfb93e16f0..d253f779152e6d 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -82,6 +82,7 @@ jobs: INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_DIR_JS: /__w/openvino/openvino/openvino_install/js INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install + INSTALL_WHEELS_DIR: /__w/openvino/openvino/install/wheels DEVELOPER_PACKAGE_DIR: /__w/openvino/openvino/developer_package_install BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: ${{ inputs.os }}_${{ inputs.arch }}_Release @@ -174,25 +175,24 @@ jobs: - name: Cmake install - OpenVINO run: | - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${DEVELOPER_PACKAGE_DIR} -DCOMPONENT=developer_package -P ${BUILD_DIR}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake - - - name: Pack Artifacts - run: | - pushd ${INSTALL_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * - popd - - pushd ${DEVELOPER_PACKAGE_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz * - popd - - pushd ${INSTALL_TEST_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * - popd - + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_DIR} + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_WHEELS_DIR} --component python_wheels + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${INSTALL_TEST_DIR} --component tests + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${DEVELOPER_PACKAGE_DIR} --component developer_package + working-directory: ${{ env.BUILD_DIR }} + + - name: Pack openvino_package + run: tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * + working-directory: ${{ env.INSTALL_DIR }} + + - name: Pack openvino_developer_package + run: tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz * + working-directory: ${{ env.DEVELOPER_PACKAGE_DIR }} + + - name: Pack openvino_tests + run: tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * + working-directory: ${{ env.INSTALL_TEST_DIR }} + - name: Build Debian packages if: ${{ inputs.build-debian-packages }} run: | @@ -205,6 +205,7 @@ jobs: -UTBB* \ -DENABLE_SYSTEM_TBB=ON \ -DENABLE_PYTHON_PACKAGING=ON \ + -DENABLE_WHEEL=OFF \ -DENABLE_TESTS=OFF \ -DPython3_EXECUTABLE=$python_exec \ -DCPACK_GENERATOR=DEB \ @@ -217,6 +218,7 @@ jobs: cmake \ -DCUSTOM_OPERATIONS="calculate_grid;complex_mul;fft;grid_sample;sparse_conv;sparse_conv_transpose" \ -DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB_REPO}/modules/java_api;${OPENVINO_CONTRIB_REPO}/modules/custom_operations" \ + -DENABLE_WHEEL=OFF \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} @@ -224,9 +226,12 @@ jobs: - name: CMake configure, build and install - OpenVINO JS API if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }} run: | - cmake -UTBB* -DCPACK_GENERATOR=NPM -DENABLE_SYSTEM_TBB=OFF -S ${OPENVINO_REPO} -B ${BUILD_DIR} + cmake -UTBB* -S ${OPENVINO_REPO} -B ${BUILD_DIR} \ + -DCPACK_GENERATOR=NPM \ + -DENABLE_SYSTEM_TBB=OFF \ + -DENABLE_WHEEL=OFF cmake --build ${BUILD_DIR} --parallel - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR_JS} -P ${BUILD_DIR}/cmake_install.cmake + cmake --install ${BUILD_DIR} --prefix ${INSTALL_DIR_JS} - name: Build RPM packages if: ${{ inputs.build-rpm-packages }} @@ -235,6 +240,7 @@ jobs: -DCPACK_GENERATOR=RPM \ -DENABLE_SYSTEM_TBB=ON \ -DENABLE_PYTHON_PACKAGING=ON \ + -DENABLE_WHEEL=OFF \ -DENABLE_TESTS=OFF \ ${BUILD_DIR} cmake --build ${BUILD_DIR} --parallel --target package --verbose @@ -257,7 +263,15 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - + + - name: Upload openvino wheels + if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: openvino_wheels + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + if-no-files-found: 'error' + - name: Upload openvino js package if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }} uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 @@ -307,7 +321,7 @@ jobs: popd - name: Store artifacts to a shared drive - id: store_artifacts + id: store_artifacts_common if: ${{ always() }} uses: ./openvino/.github/actions/store_artifacts with: @@ -319,3 +333,13 @@ jobs: ${{ env.MANIFEST_PATH }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} + + - name: Store artifacts to a shared drive (wheels) + id: store_artifacts_wheels + if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/wheels + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index 66301cee1f1046..c8e249513a08f0 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -44,6 +44,7 @@ jobs: INSTALL_DIR: "${{ github.workspace }}\\openvino_install" INSTALL_DIR_JS: "${{ github.workspace }}\\openvino_install\\js" INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" + INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels" BUILD_DIR: "${{ github.workspace }}\\openvino_build" ARTIFACTS_SHARE: "C:\\mount\\build-artifacts" MANIFEST_PATH: "${{ github.workspace }}\\manifest.yml" @@ -179,13 +180,14 @@ jobs: - name: Cmake install - OpenVINO run: | - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR }} + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels + cmake --install . --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_TEST_DIR }} --component tests + working-directory: ${{ env.BUILD_DIR }} - name: Pack Artifacts run: | - $file=Get-ChildItem -Path "${{ env.INSTALL_DIR }}" + $file = Get-ChildItem -Path "${{ env.INSTALL_DIR }}" $compress = @{ Path = $file CompressionLevel = "Optimal" @@ -204,9 +206,12 @@ jobs: - name: CMake configure, build and install - OpenVINO JS API if: ${{ fromJSON(inputs.affected-components).JS_API }} run: | - cmake -DCPACK_GENERATOR=NPM -DENABLE_SYSTEM_TBB=OFF -UTBB* -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} + cmake -UTBB* -S ${{ env.OPENVINO_REPO }} -B ${{ env.BUILD_DIR }} ` + -DCPACK_GENERATOR=NPM ` + -DENABLE_SYSTEM_TBB=OFF ` + -DENABLE_WHEEL=OFF cmake --build ${{ env.BUILD_DIR }} --parallel - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake --install ${{ env.BUILD_DIR }} --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_DIR_JS }} # # Upload build artifacts and logs @@ -218,6 +223,13 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.zip if-no-files-found: 'error' + + - name: Upload openvino wheels + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: openvino_wheels + path: ${{ env.BUILD_DIR }}/wheels/*.whl + if-no-files-found: 'error' - name: Upload openvino tests package uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 @@ -242,6 +254,7 @@ jobs: artifacts: | ${{ env.BUILD_DIR }}/openvino_package.zip ${{ env.BUILD_DIR }}/openvino_tests.zip + ${{ env.INSTALL_WHEELS_DIR }}/wheels ${{ env.MANIFEST_PATH }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 8f9292d35fb803..2fed97a78e9c07 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -33,25 +33,22 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tokenizers extension + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tokenizers_wheel - path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[tokenizers_wheel|wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables @@ -60,16 +57,11 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "MODEL_HUB_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/model_hub_tests" >> "$GITHUB_ENV" - - - name: Extract OpenVINO packages + + - name: Extract OpenVINO packages and tests run: | - pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - popd - - pushd ${INSTALL_TEST_DIR} tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} - popd + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -90,8 +82,9 @@ jobs: run: | # To enable pytest parallel features python3 -m pip install pytest-xdist[psutil] - python3 -m pip install ${INSTALL_DIR}/tools/openvino-* - python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-* + python3 -m pip install ./openvino-* + python3 -m pip install ./openvino_tokenizers-* + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install JAX tests requirements for precommit run: | diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index ffc4da8ef87b54..0eda00f7afb937 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -28,6 +28,7 @@ jobs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests ONNX_MODELS_PATH: ${{ github.workspace }}/onnx_test_models # instead of using static MODELS_SHARE_PATH @@ -37,18 +38,20 @@ jobs: ONNX_MODEL_ZOO_SHA: "5faef4c33eba0395177850e1e31c4a6a9e634c82" if: ${{ github.event_name != 'merge_group' }} steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tests) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} - + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables run: | @@ -59,15 +62,10 @@ jobs: echo $MODELS_SHARE_PATH echo "LOGS_FOLDER=$GITHUB_WORKSPACE/onnx_models_tests_logs" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages + - name: Extract OpenVINO packages and tests run: | - pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - popd - - pushd ${INSTALL_TEST_DIR} tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} - popd + working-directory: ${{ env.INSTALL_DIR }} # Issue 148922 # Can be a possible root cause for the bug @@ -87,15 +85,14 @@ jobs: - name: Install OpenVINO Python wheels run: | # Install the core OV wheel - python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl + python3 -m pip install ./openvino-*.whl extras_to_install="onnx" # Find and install OV dev wheel - pushd ${INSTALL_DIR}/tools - ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[$extras_to_install] - popd + ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') + python3 -m pip install $ov_dev_wheel_name[$extras_to_install] + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python tests dependencies run: | diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index e6ba39fdb3bfe3..8db2ebf86dca91 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -38,22 +38,25 @@ jobs: env: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input OPENVINO_REPO: ${{ github.workspace }}/openvino - INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables @@ -63,14 +66,10 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages + - name: Extract OpenVINO artifacts run: | - pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR - popd - pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR - popd + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -95,15 +94,14 @@ jobs: - name: Install OpenVINO Python wheels run: | # Install the core OV wheel - python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl + python3 -m pip install ./openvino-*.whl extras_to_install="caffe,kaldi,onnx,tensorflow2,pytorch" # Find and install OV dev wheel - pushd ${INSTALL_DIR}/tools - ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') - python3 -m pip install $ov_dev_wheel_name[$extras_to_install] - popd + ov_dev_wheel_name=$(find . -name 'openvino_dev*.whl') + python3 -m pip install $ov_dev_wheel_name[$extras_to_install] + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python API tests dependencies run: | diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index cf8514a7cd6707..c4f0d1efb37c75 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -40,19 +40,22 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables @@ -62,26 +65,18 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - - name: Extract OpenVINO packages (Linux, macOS) + + - name: Extract OpenVINO artifacts (Linux, macOS) if: runner.os != 'Windows' run: | - pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR - popd - pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR - popd + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} - - name: Extract OpenVINO packages (Windows) + - name: Extract OpenVINO artifacts (Windows) if: runner.os == 'Windows' run: | - pushd ${{ env.INSTALL_DIR }} - Expand-Archive openvino_package.zip -DestinationPath ${{ env.INSTALL_DIR }} - popd - pushd ${{ env.INSTALL_TEST_DIR }} Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} - popd + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -103,14 +98,16 @@ jobs: if: runner.os != 'Windows' run: | # Install the core OV wheel - python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl + python3 -m pip install ./openvino-*.whl + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install OpenVINO Python wheels (Windows) if: runner.os == 'Windows' run: | # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName } + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Pytorch Layer tests dependencies run: | diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index bcbcad872b42e1..ce40dd7f0618ce 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -33,6 +33,7 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests steps: - name: Check sudo @@ -48,23 +49,19 @@ jobs: sudo sh -c "echo 'Acquire::Retries \"10\";' >> /etc/apt/apt.conf.d/80-retries" fi - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: openvino_tokenizers_wheel - path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[wheels|tokenizers_wheel]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables @@ -74,15 +71,10 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "MODEL_HUB_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/model_hub_tests" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages + - name: Extract OpenVINO artifacts run: | - pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - popd - - pushd ${INSTALL_TEST_DIR} tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} - popd + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -109,8 +101,9 @@ jobs: - name: Install OpenVINO Python wheels run: | # To enable pytest parallel features - python3 -m pip install ${INSTALL_DIR}/tools/openvino-* - python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-* + python3 -m pip install ./openvino-* + python3 -m pip install ./openvino_tokenizers-* + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install PyTorch tests requirements for precommit if: ${{ inputs.model_scope == 'precommit' }} diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index cc314ee93ee876..12c63644d7b586 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -31,20 +31,23 @@ jobs: DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels BUILD_DIR: ${{ github.workspace }}/build steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[package|tests]* path: ${{ env.INSTALL_DIR }} + merge-multiple: true - - name: Download OpenVINO tests package + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} - + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables run: | @@ -52,14 +55,11 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages + - name: Extract OpenVINO packages, wheels and tests run: | - pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR - popd - pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR - popd + tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' @@ -122,7 +122,7 @@ jobs: export SHARE=$INSTALL_TEST_DIR/smoke_tests/samples_smoke_tests_data # Install Python benchmark_app by installing openvino-*.whl - python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_DIR/tools/openvino-*.whl + python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_WHEELS_DIR/openvino-*.whl export LD_LIBRARY_PATH=${IE_APP_PATH}:$LD_LIBRARY_PATH source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index cc9e2781923c33..3ad19d3301945f 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -40,25 +40,22 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} - - - name: Download OpenVINO tokenizers extension + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tokenizers_wheel - path: ${{ env.INSTALL_DIR }} + pattern: openvino_[wheels|openvino_tokenizers]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables @@ -69,25 +66,17 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages (Linux, macOS) + - name: Extract OpenVINO artifacts (Linux and macOS) if: runner.os != 'Windows' run: | - pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR - popd - pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR - popd - - - name: Extract OpenVINO packages (Windows) + tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Extract OpenVINO artifacts (Windows) if: runner.os == 'Windows' run: | - pushd ${{ env.INSTALL_DIR }} - Expand-Archive openvino_package.zip -DestinationPath ${{ env.INSTALL_DIR }} - popd - pushd ${{ env.INSTALL_TEST_DIR }} Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} - popd + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -109,21 +98,23 @@ jobs: if: runner.os != 'Windows' run: | # Install the core OV wheel - python3 -m pip install ${INSTALL_DIR}/tools/openvino-*.whl + python3 -m pip install ./openvino-*.whl # Install the core OV Tokenizers wheel - python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-*.whl + python3 -m pip install ./openvino_tokenizers-*.whl + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install OpenVINO Python wheels (Windows) if: runner.os == 'Windows' run: | # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }}\tools -Filter openvino-*.whl | % { $_.FullName } + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" # Find and install the core OV Tokenizers wheel - $ovCoreWheelPath=Get-ChildItem -Path ${{ env.INSTALL_DIR }} -Filter openvino_tokenizers-*.whl | % { $_.FullName } + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino_tokenizers-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python Layer tests dependencies run: | diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index b2cdf5a6336db0..76ee01cc76c3ef 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -33,27 +33,24 @@ jobs: OPENVINO_REPO: ${{ github.workspace }}/openvino INSTALL_DIR: ${{ github.workspace }}/install INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels MODEL_HUB_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/model_hub_tests NUMBER_OF_REPLICAS: 2 steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tokenizers extension + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tokenizers_wheel - path: ${{ env.INSTALL_DIR }} - - - name: Download OpenVINO tests package - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} - + pattern: openvino_[wheels|tokenizers_wheel]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + # Needed as ${{ github.workspace }} is not working correctly when using Docker - name: Setup Variables run: | @@ -66,15 +63,10 @@ jobs: echo "HF_HUB_CACHE=/mount/testdata$((GITHUB_RUN_NUMBER % NUMBER_OF_REPLICAS))/hugging_face" >> "$GITHUB_ENV" echo $HF_HUB_CACHE - - name: Extract OpenVINO packages + - name: Extract OpenVINO artifacts (Linux and macOS) run: | - pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - popd - - pushd ${INSTALL_TEST_DIR} - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} - popd + tar -I pigz -xf openvino_tests.tar.gz -C . + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -93,8 +85,9 @@ jobs: - name: Install OpenVINO Python wheels run: | - python3 -m pip install ${INSTALL_DIR}/tools/openvino-* - python3 -m pip install ${INSTALL_DIR}/openvino_tokenizers-* + python3 -m pip install ./openvino-* + python3 -m pip install ./openvino_tokenizers-* + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install TF Models tests requirements run: python3 -m pip install -r ${INSTALL_TEST_DIR}/requirements_tensorflow diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index c01c2740201384..089b104d7af1d1 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -39,6 +39,7 @@ jobs: container: ${{ fromJSON(inputs.container) }} env: INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}/openvino_tokenizers EXTENSION_BUILD_DIR: ${{ github.workspace }}/build @@ -73,26 +74,13 @@ jobs: path: ${{ env.OPENVINO_TOKENIZERS_REPO }} ref: ${{ env.TARGET_BRANCH }} - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package - path: ${{ env.INSTALL_DIR }} - - - name: Extract OpenVINO packages (Linux and macOS) - if: runner.os != 'Windows' - run: | - pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - popd - - - name: Extract OpenVINO packages (Windows) - if: runner.os == 'Windows' - run: | - pushd ${{ env.INSTALL_DIR }} - Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}" - popd - + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + # # Dependencies # @@ -101,17 +89,18 @@ jobs: if: runner.os != 'Windows' run: | # Find and install wheel - pushd ${INSTALL_DIR}/tools - wheel_name=$(find . -name 'openvino-*.whl') - python3 -m pip install $wheel_name - popd + wheel_name=$(find . -name 'openvino-*.whl') + python3 -m pip install $wheel_name + working-directory: ${{ env.INSTALL_WHEELS_DIR }} + - name: Install OpenVINO Python wheel (Windows) if: runner.os == 'Windows' run: | # Find and install wheel - $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\\tools" -Filter openvino-*.whl | % { $_.FullName } + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} # # Build @@ -119,16 +108,18 @@ jobs: - name: Build tokenizers wheel (Linux and macOS) if: runner.os != 'Windows' + working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }} run: | # use OpenVINO wheel package only to build the extension - python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} --find-links ${INSTALL_DIR}/tools ${OPENVINO_TOKENIZERS_REPO} + python -m pip wheel -v --no-deps --wheel-dir ${EXTENSION_BUILD_DIR} --find-links ${INSTALL_WHEELS_DIR} . env: CMAKE_BUILD_PARALLEL_LEVEL: '4' - name: Build tokenizers wheel (Windows) if: runner.os == 'Windows' + working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }} run: | - python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} --find-links ${env:INSTALL_DIR}/tools ${env:OPENVINO_TOKENIZERS_REPO} + python3 -m pip wheel -v --no-deps --wheel-dir ${env:EXTENSION_BUILD_DIR} --find-links ${env:INSTALL_WHEELS_DIR} . env: CMAKE_BUILD_PARALLEL_LEVEL: '4' diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 32bdf5148ec540..a0871712285bf6 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -72,30 +72,30 @@ jobs: OPENVINO_REPO: "${{ github.workspace }}\\openvino" INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" + INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels" SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples" BUILD_DIR: "${{ github.workspace }}\\build" steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[package|tests]* path: ${{ env.INSTALL_DIR }} + merge-multiple: true - - name: Download OpenVINO tests package + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true - name: Extract OpenVINO packages run: | - pushd ${{ env.INSTALL_DIR }} - Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}" - popd - pushd ${{ env.INSTALL_TEST_DIR }} - Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" - popd + Expand-Archive openvino_package.zip -DestinationPath . + Expand-Archive openvino_tests.zip -DestinationPath . + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -127,16 +127,17 @@ jobs: - name: Samples tests run: | # Install Python benchmark_app by installing openvino-*.whl - $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino-*.whl | % { $_.FullName } - python3 -m pip install --ignore-installed PyYAML -r ${{ env.INSTALL_TEST_DIR }}/smoke_tests/requirements.txt "$ovCoreWheelPath" - . "${{ env.INSTALL_DIR }}/setupvars.ps1" + $ovCoreWheelPath=Get-ChildItem -Path ./wheels -Filter openvino-*.whl | % { $_.FullName } + python3 -m pip install --ignore-installed PyYAML -r ./tests/smoke_tests/requirements.txt "$ovCoreWheelPath" + . "./setupvars.ps1" $Env:PYTHONCOERCECLOCALE="warn" - python3 -bb -W error -X dev -X warn_default_encoding -m pytest ${{ env.INSTALL_TEST_DIR }}/smoke_tests --numprocesses auto + python3 -bb -W error -X dev -X warn_default_encoding -m pytest ./tests/smoke_tests --numprocesses auto env: IE_APP_PATH: ${{ env.INSTALL_DIR }}/samples_bin IE_APP_PYTHON_PATH: ${{ env.INSTALL_DIR }}/samples/python SHARE: ${{ env.INSTALL_TEST_DIR }}/smoke_tests/samples_smoke_tests_data WORKSPACE: ${{ env.INSTALL_DIR }} + working-directory: ${{ env.INSTALL_DIR }} # Test .bat scripts for samples building - name: Build cpp samples (bat) @@ -237,30 +238,29 @@ jobs: OPENVINO_REPO: "${{ github.workspace }}\\openvino" INSTALL_DIR: "${{ github.workspace }}\\install" INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" + INSTALL_WHEELS_DIR: "${{ github.workspace }}\\install\\wheels" LAYER_TESTS_INSTALL_DIR: "${{ github.workspace }}\\install\\tests\\layer_tests" PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" steps: - - name: Download OpenVINO package + - name: Download OpenVINO artifacts (tarballs) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_package + pattern: openvino_[tests]* path: ${{ env.INSTALL_DIR }} + merge-multiple: true - - name: Download OpenVINO tests package + - name: Download OpenVINO artifacts (wheels) uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: openvino_tests - path: ${{ env.INSTALL_TEST_DIR }} + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true - name: Extract OpenVINO packages run: | - pushd ${{ env.INSTALL_DIR }} - Expand-Archive openvino_package.zip -DestinationPath "${{ env.INSTALL_DIR }}" - popd - pushd ${{ env.INSTALL_TEST_DIR }} - Expand-Archive openvino_tests.zip -DestinationPath "${{ env.INSTALL_DIR }}" - popd + Expand-Archive openvino_tests.zip -DestinationPath . + working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 @@ -281,12 +281,13 @@ jobs: - name: Install OpenVINO Python wheels run: | # Find and install the core OV wheel - $ovCoreWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino-*.whl | % { $_.FullName } + $ovCoreWheelPath=Get-ChildItem -Path . -Filter openvino-*.whl | % { $_.FullName } python3 -m pip install "$ovCoreWheelPath" # Find and install the dev OV wheel - $ovDevWheelPath=Get-ChildItem -Path "${{ env.INSTALL_DIR }}\tools" -Filter openvino_dev*.whl | % { $_.FullName } + $ovDevWheelPath=Get-ChildItem -Path . -Filter openvino_dev*.whl | % { $_.FullName } python3 -m pip install "$ovDevWheelPath[caffe,kaldi,onnx,tensorflow2,pytorch]" + working-directory: ${{ env.INSTALL_WHEELS_DIR }} - name: Install Python API tests dependencies run: | @@ -382,7 +383,6 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test || fromJSON(needs.smart_ci.outputs.affected_components).PDPD_FE.test run: | - . "${{ env.INSTALL_DIR }}/setupvars.ps1" python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/py_frontend_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_py_fontend.xml - name: OVC unit tests diff --git a/cmake/developer_package/packaging/archive.cmake b/cmake/developer_package/packaging/archive.cmake index 6df5145ae2e488..5978ec73052970 100644 --- a/cmake/developer_package/packaging/archive.cmake +++ b/cmake/developer_package/packaging/archive.cmake @@ -30,8 +30,7 @@ macro(ov_archive_cpack_set_dirs) set(OV_CPACK_DOCDIR docs) set(OV_CPACK_LICENSESDIR licenses) set(OV_CPACK_SAMPLESDIR samples) - set(OV_CPACK_WHEELSDIR tools) - set(OV_CPACK_TOOLSDIR tools) + set(OV_CPACK_WHEELSDIR wheels) set(OV_CPACK_DEVREQDIR tools) set(OV_CPACK_PYTHONDIR python) @@ -87,12 +86,11 @@ macro(ov_define_component_include_rules) unset(OV_CPACK_COMP_BENCHMARK_APP_EXCLUDE_ALL) unset(OV_CPACK_COMP_OVC_EXCLUDE_ALL) set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL EXCLUDE_FROM_ALL) - unset(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL) + # we don't need wheels in the distribution packages + set(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL EXCLUDE_FROM_ALL) unset(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL) # nodejs set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # scripts unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL) unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL) diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 581c9144c1b907..0ec054da853e2c 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -11,7 +11,6 @@ include(GNUInstallDirs) # macro(ov_common_libraries_cpack_set_dirs) # override default locations for common libraries - set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}) set(OV_CPACK_LIBRARYDIR ${CMAKE_INSTALL_LIBDIR}) if(WIN32) @@ -42,7 +41,7 @@ macro(ov_common_libraries_cpack_set_dirs) unset(OV_CPACK_SHAREDIR) # skipped during common libraries packaging - set(OV_CPACK_WHEELSDIR "tools") + set(OV_CPACK_WHEELSDIR "wheels") endmacro() ov_common_libraries_cpack_set_dirs() @@ -98,14 +97,12 @@ macro(ov_define_component_include_rules) # we don't pack artifacts of setup.py install, because it's called explicitly in conda / brew # or not used at all like in cases with conan / vcpkg set(OV_CPACK_COMP_PYTHON_OPENVINO_PACKAGE_EXCLUDE_ALL ${OV_CPACK_COMP_PYTHON_OPENVINO_EXCLUDE_ALL}) - # we don't need wheels in package, it's used installed only in open source distribution + # we don't need wheels in the distribution packages set(OV_CPACK_COMP_PYTHON_WHEELS_EXCLUDE_ALL EXCLUDE_FROM_ALL) # we don't need requirements.txt in package, because dependencies are installed by packages managers like conda set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # nodejs set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # scripts set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL) diff --git a/cmake/developer_package/packaging/debian/debian.cmake b/cmake/developer_package/packaging/debian/debian.cmake index f133428d66ec74..2b95fcfde5c145 100644 --- a/cmake/developer_package/packaging/debian/debian.cmake +++ b/cmake/developer_package/packaging/debian/debian.cmake @@ -11,7 +11,6 @@ include(GNUInstallDirs) # macro(ov_debian_cpack_set_dirs) # override default locations for Debian - set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}) set(OV_CPACK_RUNTIMEDIR ${CMAKE_INSTALL_LIBDIR}) if(CMAKE_CROSSCOMPILING) @@ -42,7 +41,7 @@ macro(ov_debian_cpack_set_dirs) unset(OV_CPACK_SHAREDIR) # skipped during debian packaging - set(OV_CPACK_WHEELSDIR "tools") + set(OV_CPACK_WHEELSDIR "wheels") endmacro() ov_debian_cpack_set_dirs() @@ -111,8 +110,6 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # nodejs set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # scripts set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL) diff --git a/cmake/developer_package/packaging/npm.cmake b/cmake/developer_package/packaging/npm.cmake index 2a2509cdcae65a..a1ad45bce356ee 100644 --- a/cmake/developer_package/packaging/npm.cmake +++ b/cmake/developer_package/packaging/npm.cmake @@ -19,7 +19,6 @@ macro(ov_npm_cpack_set_dirs) set(OV_CPACK_LICENSESDIR licenses) set(OV_CPACK_SAMPLESDIR .) set(OV_CPACK_WHEELSDIR .) - set(OV_CPACK_TOOLSDIR .) set(OV_CPACK_DEVREQDIR .) set(OV_CPACK_PYTHONDIR .) @@ -78,8 +77,6 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # nodejs unset(OV_CPACK_COMP_NPM_EXCLUDE_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # scripts set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL) diff --git a/cmake/developer_package/packaging/nsis.cmake b/cmake/developer_package/packaging/nsis.cmake index d7d8ed152c49d9..9b2242fe1f5302 100644 --- a/cmake/developer_package/packaging/nsis.cmake +++ b/cmake/developer_package/packaging/nsis.cmake @@ -58,8 +58,7 @@ macro(ov_archive_cpack_set_dirs) set(OV_CPACK_DOCDIR docs) set(OV_CPACK_LICENSESDIR licenses) set(OV_CPACK_SAMPLESDIR samples) - set(OV_CPACK_WHEELSDIR tools) - set(OV_CPACK_TOOLSDIR tools) + set(OV_CPACK_WHEELSDIR wheels) set(OV_CPACK_DEVREQDIR tools) set(OV_CPACK_PYTHONDIR python) @@ -133,8 +132,6 @@ macro(ov_define_component_include_rules) unset(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL) # nodejs set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL) - # tools - unset(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL) # scripts unset(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL) unset(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL) diff --git a/cmake/developer_package/packaging/packaging.cmake b/cmake/developer_package/packaging/packaging.cmake index d62a8ca7fe2084..478fcdf0bd2c86 100644 --- a/cmake/developer_package/packaging/packaging.cmake +++ b/cmake/developer_package/packaging/packaging.cmake @@ -173,8 +173,6 @@ macro(ov_define_component_names) set(OV_CPACK_COMP_OPENVINO_REQ_FILES "openvino_req_files") # nodejs set(OV_CPACK_COMP_NPM "ov_node_addon") - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES "openvino_dev_req_files") # scripts set(OV_CPACK_COMP_INSTALL_DEPENDENCIES "install_dependencies") set(OV_CPACK_COMP_SETUPVARS "setupvars") diff --git a/cmake/developer_package/packaging/rpm/rpm.cmake b/cmake/developer_package/packaging/rpm/rpm.cmake index b7c482555bd131..45d9b0c0ca2121 100644 --- a/cmake/developer_package/packaging/rpm/rpm.cmake +++ b/cmake/developer_package/packaging/rpm/rpm.cmake @@ -11,7 +11,6 @@ include(GNUInstallDirs) # macro(ov_rpm_cpack_set_dirs) # override default locations for RPM - set(OV_CPACK_TOOLSDIR ${CMAKE_INSTALL_BINDIR}) # only C++ tools are here set(OV_CPACK_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}) set(OV_CPACK_LIBRARYDIR ${CMAKE_INSTALL_LIBDIR}) set(OV_CPACK_RUNTIMEDIR ${CMAKE_INSTALL_LIBDIR}) @@ -33,7 +32,7 @@ macro(ov_rpm_cpack_set_dirs) unset(OV_CPACK_SHAREDIR) # skipped during rpm packaging - set(OV_CPACK_WHEELSDIR "tools") + set(OV_CPACK_WHEELSDIR "wheels") endmacro() ov_rpm_cpack_set_dirs() @@ -102,8 +101,6 @@ macro(ov_define_component_include_rules) set(OV_CPACK_COMP_OPENVINO_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # nodejs set(OV_CPACK_COMP_NPM_EXCLUDE_ALL EXCLUDE_FROM_ALL) - # tools - set(OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL EXCLUDE_FROM_ALL) # scripts set(OV_CPACK_COMP_INSTALL_DEPENDENCIES_EXCLUDE_ALL EXCLUDE_FROM_ALL) set(OV_CPACK_COMP_SETUPVARS_EXCLUDE_ALL EXCLUDE_FROM_ALL) diff --git a/tools/openvino_dev/CMakeLists.txt b/tools/openvino_dev/CMakeLists.txt index d8488e97d1082e..924c83abc9bff8 100644 --- a/tools/openvino_dev/CMakeLists.txt +++ b/tools/openvino_dev/CMakeLists.txt @@ -31,29 +31,6 @@ else() endif() set(WHEEL_BUILD "${OpenVINO_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) -# outbound requirements.txt files for openvino-dev package - -ov_cpack_add_component(${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES} HIDDEN) - -set(REQUIREMENTS_IN "${CMAKE_CURRENT_SOURCE_DIR}/requirements_dev.txt.in") -set(EXTRAS_LIST _ caffe kaldi onnx pytorch tensorflow tensorflow2) - -foreach(EXTRAS IN LISTS EXTRAS_LIST) - if(EXTRAS STREQUAL "_") - set(REQUIREMENTS_OUT "requirements.txt") - set(EXTRAS "") - else() - set(REQUIREMENTS_OUT "requirements_${EXTRAS}.txt") - set(EXTRAS "[${EXTRAS}]") - endif() - configure_file(${REQUIREMENTS_IN} ${REQUIREMENTS_OUT}) - - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${REQUIREMENTS_OUT} - DESTINATION ${OV_CPACK_DEVREQDIR} - COMPONENT ${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES} - ${OV_CPACK_COMP_OPENVINO_DEV_REQ_FILES_EXCLUDE_ALL}) -endforeach() - # check __init__.py files alignment function(ov_check_init_files_alignment init_files) From e78f5187015653d08a0f17032538993e23485167 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Mon, 14 Oct 2024 18:51:18 +0200 Subject: [PATCH 005/112] [GHA] Configurable event name for provider (#27035) To test archives from pre-commits when needed --- .github/actions/openvino_provider/action.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/actions/openvino_provider/action.yml b/.github/actions/openvino_provider/action.yml index 31c49afd15bd94..dd1078bb0d4353 100644 --- a/.github/actions/openvino_provider/action.yml +++ b/.github/actions/openvino_provider/action.yml @@ -9,6 +9,11 @@ inputs: description: 'Branch of OpenVINO to take the revision from if no specific hash was provided. Taken from github context by default' required: false + event_name: + description: 'Even name from which artifacts were generated. "push" by default; overwrite it with "pull_request" + if revision/branch is from PR' + default: "push" + required: false ov_artifact_name: description: "Name under which to upload provided OpenVINO build artifacts, set automatically by default" required: false @@ -107,7 +112,7 @@ runs: with: platform: ${{ inputs.platform }}_${{ inputs.arch }} storage_root: ${{ inputs.commit_share_path }} - event_name: "commit" + event_name: ${{ inputs.event_name }} trigger_repo_sha: ${{ env.OV_REVISION }} branch_name: ${{ inputs.branch_name }} to_restore: ${{ inputs.commit_packages_to_provide }} From 062762baf0b347ac6262e8ff4f9f23e253feafbf Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Mon, 14 Oct 2024 20:11:48 +0100 Subject: [PATCH 006/112] [NPUW] L0 allocation improvements (#27011) EISW-142611 --- .../src/plugin/npuw/compiled_model.cpp | 42 ++++++++++++++++++- .../src/plugin/npuw/compiled_model.hpp | 4 ++ .../plugin/npuw/just_sync_infer_request.cpp | 40 ++++++++++++++---- .../plugin/npuw/just_sync_infer_request.hpp | 13 ++++++ .../npuw/partitioning/online/compiler.cpp | 4 +- .../src/plugin/npuw/weights_bank.cpp | 17 ++++---- 6 files changed, 102 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 1c75b1cbdf3211..2fe90eb82c41bb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -442,13 +442,14 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, } void ov::npuw::CompiledModel::finalize_weights_bank() { + LOG_INFO("Finalizing weights bank..."); // Register lazy tensors for (std::size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) { auto& comp_model_desc = m_compiled_submodels[idx]; // Skip optimized out and non-functions if (!comp_model_desc.compiled_model && !comp_model_desc.replaced_by) { - return; + continue; } const auto real_idx = comp_model_desc.replaced_by.value_or(idx); @@ -489,6 +490,45 @@ void ov::npuw::CompiledModel::finalize_weights_bank() { comp_model_desc.is_remote[tidx] = m_weights_bank->is_remote(lt); } } + + LOG_INFO("Done."); +} + +std::string ov::npuw::CompiledModel::global_mem_device() const { + // Force globally set device if set + const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>(); + if (!device_alloc.empty()) { + return device_alloc; + } + + // Check if there is at least 1 NPU submodel + for (std::size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) { + auto& comp_model_desc = m_compiled_submodels[idx]; + if (!comp_model_desc.compiled_model) { + continue; + } + if (ov::npuw::util::starts_with(*comp_model_desc.device_it, "NPU")) { + return "NPU"; + } + } + + return "CPU"; +} + +std::string ov::npuw::CompiledModel::funcall_mem_device(const std::size_t idx) const { + // FIXME: currently we allocate intermediate tensors for EVERY submodel. + // It's not feasible to allocate them in L0 due to high memory consumption. + // Until we make such memory reusable, hard-coding those tensors to CPU. + return "CPU"; + + // Force globally set device if set + const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>(); + if (!device_alloc.empty()) { + return device_alloc; + } + + auto& comp_model_desc = m_compiled_submodels[idx]; + return *comp_model_desc.device_it; } void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr& model) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp index 7f308d46094f35..4152d08275ba6d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp @@ -74,6 +74,10 @@ class CompiledModel : public ov::ICompiledModel { void finalize_weights_bank(); + std::string global_mem_device() const; + + std::string funcall_mem_device(const std::size_t idx) const; + std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc; ::intel_npu::Config m_cfg; GetPropertiesMap m_prop_to_opt; diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp index 93f9f12fe86048..fbbabf083bccd8 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp @@ -16,7 +16,6 @@ #include "openvino/core/except.hpp" #include "openvino/core/parallel.hpp" #include "openvino/runtime/iasync_infer_request.hpp" -#include "openvino/runtime/make_tensor.hpp" #include "plugin.hpp" #include "util.hpp" #include "weights_bank.hpp" @@ -49,7 +48,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrm_compiled_submodels[real_idx]; auto& proto_comp_model = proto_comp_model_desc.compiled_model; @@ -68,13 +67,13 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrparams) { const auto& iport = proto_comp_model_desc.compiled_model->inputs()[p.idx]; m_spatial_io[real_idx].input_tails[p.idx] = - ov::get_tensor_impl(ov::Tensor(iport.get_element_type(), iport.get_shape())); + allocTensor(iport, m_npuw_model->funcall_mem_device(real_idx)); } const auto num_outs = proto_comp_model_desc.compiled_model->outputs().size(); for (std::size_t out_idx = 0u; out_idx < num_outs; out_idx++) { const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx]; m_spatial_io[real_idx].output_tails[out_idx] = - ov::get_tensor_impl(ov::Tensor(oport.get_element_type(), oport.get_shape())); + allocTensor(oport, m_npuw_model->funcall_mem_device(real_idx)); } } } // if(spatial) @@ -88,7 +87,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrout_dim] = proto_comp_model_desc.spatial->range; } m_funcall_result[LinkFrom{i, out_idx}] = - ov::get_tensor_impl(ov::Tensor(port.get_element_type(), shape)); + allocTensor(port.get_element_type(), shape, m_npuw_model->funcall_mem_device(real_idx)); } if (real_idx != i) { // If this function call is NOT the function body, do nothing here - the original @@ -153,7 +152,9 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrinputs().size(); i++) { const auto& port = m_npuw_model->inputs()[i]; - m_input_tensors.push_back(ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape()))); + ov::SoPtr allocated = allocTensor(port, m_npuw_model->global_mem_device()); + m_input_tensors.push_back(allocated); + m_input_allocated.insert(allocated->data()); m_port_to_tensor[port] = TensorStorage{m_input_tensors.back(), true}; } // for(inputs) @@ -173,7 +174,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrsecond // Function calls have their tensors allocated, so just use one - : ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape())); + : allocTensor(port, m_npuw_model->global_mem_device()); m_output_tensors.push_back(tensor); m_port_to_tensor[port] = TensorStorage{tensor, true}; @@ -421,7 +422,7 @@ void ov::npuw::JustInferRequest::bind_global_parameters(std::size_t idx) { LOG_BLOCK(); if (!is_spatial_param(sub_in_idx)) { // Input parameter is non-spatial, do normal handling - if (do_copy) { + if (do_copy || m_input_allocated.count(g_tnsr->data()) == 0) { LOG_DEBUG("Will be copied"); copy_list.emplace_back(g_tnsr, s_port); } else { @@ -919,6 +920,29 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool } // if (replaced_by) } +ov::SoPtr ov::npuw::JustInferRequest::allocTensor(const ov::element::Type type, + const ov::Shape& shape, + const std::string& device) { + if (device == "CPU" || ov::shape_size(shape) == 0) { + return ov::get_tensor_impl(ov::Tensor(type, shape)); + } + + ov::SoPtr remote_tensor; + ov::Tensor allocated_tensor; + { + std::lock_guard guard(m_alloc_mutex); + m_remote_ctx = m_npuw_model->get_plugin()->get_core()->get_default_context(device)._ptr; + remote_tensor = m_remote_ctx->create_host_tensor(type, shape); + allocated_tensor = ov::make_tensor(remote_tensor); + } + return ov::get_tensor_impl(allocated_tensor); +} + +ov::SoPtr ov::npuw::JustInferRequest::allocTensor(const ov::Output& node, + const std::string& device) { + return allocTensor(node.get_element_type(), node.get_shape(), device); +} + void ov::npuw::JustInferRequest::subscribe_subrequest(std::size_t idx, Completed cb) { get_real_subrequest(idx)->set_callback(std::move(cb)); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp index 2544647dd0066c..7335b54c30062e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp @@ -6,10 +6,15 @@ #include #include +#include #include #include #include "base_sync_infer_request.hpp" +#include "openvino/runtime/iplugin.hpp" +#include "openvino/runtime/iremote_context.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace npuw { @@ -59,6 +64,9 @@ class JustInferRequest final : public IBaseInferRequest { void connect_subrequests(); void recreate_subrequests(std::size_t idx); + ov::SoPtr allocTensor(const ov::element::Type type, const ov::Shape& shape, const std::string& device); + ov::SoPtr allocTensor(const ov::Output& node, const std::string& device); + using LinkFrom = std::pair output idx }; std::vector m_subrequests_gio; + + std::mutex m_alloc_mutex; + std::shared_ptr m_remote_ctx = nullptr; + + std::unordered_set m_input_allocated; }; } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 4b8973b5bb94ae..46b6cb7b12681d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -74,8 +74,8 @@ std::vector getAvoids(::intel_npu::Config& cfg) { std::string avoids_opt = cfg.getString<::intel_npu::NPUW_ONLINE_AVOID>(); if (avoids_opt.empty()) { - LOG_WARN(::intel_npu::NPUW_ONLINE_AVOID().key() - << " property is not set! NPU device will be prioritized for every subgraph."); + LOG_VERB(::intel_npu::NPUW_ONLINE_AVOID().key() + << " property is not set. NPU device will be prioritized for every subgraph."); return {}; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp index d142f72f9b7126..2a79bf33ef9a53 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp @@ -95,13 +95,16 @@ ov::Tensor Bank::unsafe_eval_and_alloc(const LazyTensor& tensor, const std::stri return transformed_tensor; } - // FIXME: L0 allocation may crash when run in parallel - std::lock_guard guard(m_alloc_mutex); - - m_remote_ctx = m_core->get_default_context(device_for_alloc)._ptr; - auto remote_tensor = - m_remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape()); - auto allocated_tensor = ov::make_tensor(remote_tensor); + ov::SoPtr remote_tensor; + ov::Tensor allocated_tensor; + { + // FIXME: L0 allocation may crash when run in parallel + std::lock_guard guard(m_alloc_mutex); + m_remote_ctx = m_core->get_default_context(device_for_alloc)._ptr; + remote_tensor = + m_remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape()); + allocated_tensor = ov::make_tensor(remote_tensor); + } transformed_tensor.copy_to(allocated_tensor); m_device_bank[device_for_alloc][tensor] = allocated_tensor; return allocated_tensor; From b2782fae3b507a2398ac7400986f6b3505e6fdee Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Mon, 14 Oct 2024 22:10:00 +0200 Subject: [PATCH 007/112] [GHA] Fix wheels storage handling (#27039) Calling store_artifacts twice in a row is excessive and causes improper artifacts storage due to dir rotation --- .github/workflows/job_build_linux.yml | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index d253f779152e6d..d58e879c736610 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -321,7 +321,7 @@ jobs: popd - name: Store artifacts to a shared drive - id: store_artifacts_common + id: store_artifacts if: ${{ always() }} uses: ./openvino/.github/actions/store_artifacts with: @@ -331,15 +331,8 @@ jobs: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz ${{ env.BUILD_DIR }}/deb ${{ env.MANIFEST_PATH }} + ${{ env.STORE_WHEELS == 'true' && format('{0}/wheels', env.BUILD_DIR) || '' }} storage_dir: ${{ env.PRODUCT_TYPE }} storage_root: ${{ env.ARTIFACTS_SHARE }} - - - name: Store artifacts to a shared drive (wheels) - id: store_artifacts_wheels - if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} - uses: ./openvino/.github/actions/store_artifacts - with: - artifacts: | - ${{ env.BUILD_DIR }}/wheels - storage_dir: ${{ env.PRODUCT_TYPE }} - storage_root: ${{ env.ARTIFACTS_SHARE }} + env: + STORE_WHEELS: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} From c44a32e253cc52067f5f99b9e83a7f3fec102ed9 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Tue, 15 Oct 2024 07:50:55 +0900 Subject: [PATCH 008/112] [GPU] Fix sd1.5_controlnet_lora bad image. (#26881) 1. Fixed a bug where dynamic tensor value would disappear when using dynamic and static inputs together. 2. Fixed onednn gemm post-op wrong dims in case spatial 1x1. 3. Fixed side effect for can_be_optimized condition in allocate_output . Skippable should be true for using mem pool. ### Tickets: - *149836* --------- Signed-off-by: hyunback --- src/plugins/intel_gpu/src/graph/gemm.cpp | 3 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 3 + .../intel_gpu/src/graph/primitive_inst.cpp | 7 +- .../intel_gpu/src/graph/program_node.cpp | 12 +++ .../tests/unit/test_cases/gemm_gpu_test.cpp | 74 +++++++++++++++++++ 5 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp index a8b196bd45885f..25007cb93b18d5 100644 --- a/src/plugins/intel_gpu/src/graph/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/gemm.cpp @@ -229,7 +229,8 @@ layout gemm_inst::transform_output_layout(const std::shared_ptr prim (i == 1) ? transposed_input1_pshape : input_layouts[i].get_partial_shape(); for (size_t j = 0; j != input_pshape.size(); ++j) { - ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]); + if (input_pshape[j].get_max_length() != input_pshape[j].get_min_length()) + ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index b0c6758af7d909..b7017c414c505f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -274,6 +274,9 @@ void concat_in_place_optimization::optimize_cascade(concatenation_node& node, st } node.set_output_layout(concat_layout); node.can_be_optimized(true); + if (node.is_dynamic()) { + node.set_runtime_skippable(true); + } GPU_DEBUG_TRACE_DETAIL << "[prepare_buffer_fusing] : " << node.id() << " can be optimized" << std::endl; } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index c51b34d81cf153..13634b49fd9d96 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -502,7 +502,7 @@ event::ptr primitive_inst::realloc_if_needed() { event::ptr ev = nullptr; const auto& users = get_user_insts(); - if (users.size() == 1 && users.front()->get_node().is_type()) { + if (users.size() == 1 && users.front()->get_node().is_type() && users.front()->get_node().is_runtime_skippable()) { auto concat_inst = users.front(); if (concat_inst->can_be_optimized()) { if (!concat_inst->allocation_done_by_other) { @@ -656,7 +656,7 @@ event::ptr primitive_inst::realloc_if_needed() { } // Clear out memory if if was previously reused, but now primitive can't be optimized - if (_node->is_runtime_skippable() || _node->is_type()) { + if (!_node->is_type() && (_node->is_runtime_skippable() || _node->is_type())) { if (can_be_optimized()) { _max_output_layout_count = _deps[0].first->_max_output_layout_count; GPU_DEBUG_PROFILED_STAGE_MEMALLOC_INFO("can_be_optimized"); @@ -1351,7 +1351,8 @@ void primitive_inst::do_runtime_in_place_concat() { if (get_users().size() != 1) return; auto concat_inst = get_user_insts().front(); - if (!concat_inst->get_node().is_type() || !concat_inst->get_node().can_be_optimized()) + + if (!concat_inst->get_node().is_type() || !(concat_inst->get_node().can_be_optimized() && concat_inst->get_node().is_runtime_skippable())) return; if (has_subgraph_dependency(concat_inst->dependencies())) { diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 21ba4e656fae0d..fc9648b90e444c 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1548,6 +1548,18 @@ void program_node::create_onednn_primitive_attributes( mem_desc.get_dims(), mem_desc.get_data_type()); } else if (is_type()) { size_t rank = cldnn::format::dimension(in.format); + auto in_pshape = in.get_partial_shape(); + auto out_pshape = get_output_layout().get_partial_shape(); + size_t ones_to_add = std::max(out_pshape.size(), static_cast(rank)) - in_pshape.size(); + if (ones_to_add > 0) { + layout new_layout = in; + ov::PartialShape new_input_pshape; + std::vector dims(in_pshape.begin(), in_pshape.begin() + in_pshape.size()); + new_input_pshape = ov::PartialShape(dims); + new_input_pshape.insert(new_input_pshape.begin(), ones_to_add, 1ul); + new_layout.set_partial_shape(new_input_pshape); + in = new_layout; + } size_t in_batched_size = in.count() / (in.spatial(0) * in.spatial(1)); dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in_batched_size == 1); dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index adaa572878bff4..51f66f3abb7bfe 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -1577,6 +1577,76 @@ class gemm_gpu_tests: public ::testing::Test { ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i; } } + + void test_dynamic_static_broadcast_3dim(std::vector BMKN, bool is_caching_test, const double abs_error = 0.0001) { + tests::random_generator rg; + rg.set_seed(GET_SUITE_NAME); + + auto& engine = get_test_engine(); + cldnn::layout input0_layout; + cldnn::layout input1_layout; + + std::vector input0_order = {0, 1, 2}; + std::vector input1_order = {0, 1, 2}; + std::vector output_order = {0, 1, 2}; + + size_t BATCH_SIZE = BMKN[0]; + size_t M_SIZE = BMKN[1]; + size_t K_SIZE = BMKN[2]; + size_t N_SIZE = BMKN[3]; + + ov::Shape input0_shape = { BATCH_SIZE, M_SIZE, K_SIZE }; + ov::Shape input1_shape = { 1, K_SIZE, N_SIZE }; + ov::Shape output_shape = { BATCH_SIZE, M_SIZE, N_SIZE }; + + input0_layout = layout{ov::PartialShape::dynamic(input0_shape.size()), data_types::f16, format::bfyx}; + input1_layout = layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx}; + + auto input0_mem = engine.allocate_memory(layout{ov::PartialShape(input0_shape), data_types::f16, format::bfyx}); + auto input1_mem = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx}); + + auto input_0_data = rg.generate_random_1d(ov::shape_size(input0_shape), -2, 2); + auto input_1_data = rg.generate_random_1d(ov::shape_size(input1_shape), -2, 2); + + set_values(input0_mem, input_0_data); + set_values(input1_mem, input_1_data); + + topology topology; + topology.add(input_layout("input0", input0_layout), + input_layout("input1", input1_layout), + gemm("gemm", { input_info("input0"), input_info("input1") }, data_types::f16, input0_order, input1_order, output_order) + ); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + network->set_input_data("input0", input0_mem); + network->set_input_data("input1", input1_mem); + + auto outputs = network->execute(); + + auto output_mem = outputs.at("gemm").get_memory(); + cldnn::mem_lock output_ptr(output_mem, get_test_stream()); + + std::vector ref_out_data; + ref_out_data.resize(ov::shape_size(output_shape)); + + ov::reference::matmul(input_0_data.data(), + input_1_data.data(), + ref_out_data.data(), + input0_shape, + input1_shape, + output_shape, + false, + false); + + ASSERT_EQ(output_ptr.size(), ref_out_data.size()); + + for (uint32_t i = 0; i < ref_out_data.size(); ++i) { + ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i; + } + } }; TEST_F(gemm_gpu_tests, basic_bfyx_t2_inplace_crop_with_pad) { @@ -1710,6 +1780,10 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_4d_f32_n_tile_32_input1_ylast) { this->test_transpose_matmul_f32(4, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 1, 2, 3}, /*input1_order*/{0, 1, 3, 2}); } +TEST_F(gemm_gpu_tests, test_dynamic_static_broadcast_3dim) { + this->test_dynamic_static_broadcast_3dim(/*BMKN*/{2, 16, 2, 2}, false); +} + TEST_F(gemm_gpu_tests, transpose_matmul_in0_indirect) { this->test_transpose_indirect(false, true, false); } From 421eaec1b52d6813035a94a4548ed4c1de0ef374 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Tue, 15 Oct 2024 03:01:02 +0200 Subject: [PATCH 009/112] [GHA] Include files in plain subfolders to restore (#27046) Quick fix for restored wheel files --- .github/actions/restore_artifacts/restore_artifacts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/restore_artifacts/restore_artifacts.py b/.github/actions/restore_artifacts/restore_artifacts.py index d79388a27f5616..007bc56aafe9b2 100644 --- a/.github/actions/restore_artifacts/restore_artifacts.py +++ b/.github/actions/restore_artifacts/restore_artifacts.py @@ -33,10 +33,10 @@ def include_filter(include_list: set | list): """ Returns input for shutil.copytree ignore - to copy only files from include list """ - def _filter(_, files: list): + def _filter(root, files: list): if not include_list: return [] - return [f for f in files if f not in include_list] + return [f for f in files if f not in include_list and Path(root).name not in include_list] return _filter From fe2f67bc7adfd122ece8d9be992657b6cdb0ff89 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 15 Oct 2024 07:12:02 +0400 Subject: [PATCH 010/112] Assert on minimal macOS deployment target (#26136) ### Details: - See https://github.com/openvinotoolkit/openvino.genai/actions/runs/10470176019/job/28994760817 --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c68e3a611b39ba..e9e8d3724d9ac5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,11 @@ if(ENABLE_COVERAGE) include(cmake/coverage.cmake) endif() +if(APPLE AND CMAKE_OSX_DEPLOYMENT_TARGET AND + CMAKE_OSX_DEPLOYMENT_TARGET VERSION_LESS 10.15) + message(FATAL_ERROR "OpenVINO requires MACOSX_DEPLOYMENT_TARGET at least 10.15, specified ${CMAKE_OSX_DEPLOYMENT_TARGET}") +endif() + # resolving dependencies for the project message (STATUS "CMAKE_VERSION ......................... " ${CMAKE_VERSION}) message (STATUS "CMAKE_CROSSCOMPILING .................. " ${CMAKE_CROSSCOMPILING}) From 9486b7dac93ba8f96973830424647031f4267097 Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Tue, 15 Oct 2024 14:32:53 +0800 Subject: [PATCH 011/112] [CPU] attn supports f16 (#26487) ### Details: - *rebase from https://github.com/openvinotoolkit/openvino/pull/22939* - *enable avx512 fp16 for attention* - *enable amx fp16 for attention* - *update PagedAttentionExtension lightly. can specify the correct type to pa second output precision* ### Tickets: - *128183* --- .../dev_api/openvino/op/paged_attention.hpp | 5 + src/core/src/op/paged_attention.cpp | 18 +- src/plugins/intel_cpu/src/graph.cpp | 4 +- .../nodes/kernels/scaled_attn/attn_quant.cpp | 5 +- .../src/nodes/kernels/scaled_attn/common.hpp | 95 +++++-- .../nodes/kernels/scaled_attn/executor_pa.cpp | 73 +++-- .../scaled_attn/executor_pa_common.cpp | 10 +- .../scaled_attn/executor_pa_common.hpp | 3 +- .../kernels/scaled_attn/mha_single_token.cpp | 250 ++++++++++-------- .../kernels/scaled_attn/softmax_kernel.hpp | 43 ++- .../src/nodes/kernels/x64/brgemm_kernel.cpp | 148 +++++++---- .../src/nodes/kernels/x64/brgemm_kernel.hpp | 3 + .../intel_cpu/src/nodes/paged_attn.cpp | 2 + .../intel_cpu/src/nodes/scaled_attn.cpp | 45 ++-- .../transformation_pipeline.cpp | 13 +- .../transformations/transformation_pipeline.h | 1 + .../subgraph_tests/src/arm/concat_sdp.cpp | 1 + .../subgraph_tests/src/classes/concat_sdp.cpp | 42 ++- .../subgraph_tests/src/classes/concat_sdp.hpp | 5 +- .../src/common/concat_multiple_query_sdp.cpp | 41 ++- .../subgraph_tests/src/common/concat_sdp.cpp | 2 + .../common/concat_transpose_sdp_transpose.cpp | 4 + .../subgraph_tests/src/x64/concat_sdp.cpp | 47 ++++ .../skip_tests_config.cpp | 5 + .../tests/unit/brgemm_executor_test.cpp | 21 +- 25 files changed, 605 insertions(+), 281 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp diff --git a/src/core/dev_api/openvino/op/paged_attention.hpp b/src/core/dev_api/openvino/op/paged_attention.hpp index e5995e0b8699b0..0c1c396cbefb5b 100644 --- a/src/core/dev_api/openvino/op/paged_attention.hpp +++ b/src/core/dev_api/openvino/op/paged_attention.hpp @@ -17,6 +17,11 @@ class OPENVINO_API PagedAttentionExtension : public ov::op::Op { PagedAttentionExtension(const ov::OutputVector& args); void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + void set_out_type(int index, const ov::element::Type& output_type); + +protected: + std::vector m_output_type = {ov::element::undefined, ov::element::undefined}; }; } // namespace op diff --git a/src/core/src/op/paged_attention.cpp b/src/core/src/op/paged_attention.cpp index e3771bcbf92937..261b0ce1c47605 100644 --- a/src/core/src/op/paged_attention.cpp +++ b/src/core/src/op/paged_attention.cpp @@ -146,13 +146,27 @@ void PagedAttentionExtension::validate_and_infer_types() { get_input_element_type(12), "."); - set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); - set_output_type(1, get_input_element_type(0), {Dimension::dynamic()}); + if (m_output_type[0] == ov::element::undefined) { + set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); + } else { + set_output_type(0, m_output_type[0], get_input_partial_shape(0)); + } + + if (m_output_type[1] == ov::element::undefined) { + set_output_type(1, get_input_element_type(0), {Dimension::dynamic()}); + } else { + set_output_type(1, m_output_type[1], {Dimension::dynamic()}); + } } std::shared_ptr PagedAttentionExtension::clone_with_new_inputs(const ov::OutputVector& new_args) const { return std::make_shared(new_args); } +void PagedAttentionExtension::set_out_type(int index, const ov::element::Type& output_type) { + OPENVINO_ASSERT(index < 2, "Output index should be 0 or 1, but got " + std::to_string(index)); + m_output_type[index] = output_type; +} + } // namespace op } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index a17b8d28e17f5d..dc0f953efe70ab 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -194,8 +194,8 @@ void Graph::Replicate(const std::shared_ptr &model, const auto port = unusedOutput.get_index(); const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], - parentNode->getOriginalOutputPrecisionAtPort(port), - nodeName, "Result", m_context); + parentNode->getOriginalOutputPrecisionAtPort(port), + nodeName, "Result", m_context); CreateEdge(parentNode, outNode, port, 0); AddNode(outNode); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp index a9998e88402ca7..d95f973fa9f2f0 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp @@ -180,7 +180,6 @@ static void attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, // For compatibility, all input_kvs are permuted to BHLS size_t B = k_src.m_dims[0], H = k_src.m_dims[1], L1 = k_src.m_dims[2], S = k_src.m_dims[3]; // Internal LBHS layout has strides[L] > strides[B] - assert(k_src.m_strides[2] > k_src.m_strides[0]); parallel_for3d(L1, B, H, [&](size_t m, size_t b, size_t h) { auto p_k = k_scale_zp.ptr(m, b, h); auto p_v = v_scale_zp.ptr(m, b, h); @@ -238,6 +237,8 @@ void attn_quantkv(const ov::intel_cpu::PlainTensor& k_src, attn_quant_mt(k_src, v_src, k_dst, v_dst, k_scale_zp, v_scale_zp); } else if (k_src.get_precision() == ov::element::bf16 && k_dst.get_precision() == ov::element::u8) { attn_quant_mt(k_src, v_src, k_dst, v_dst, k_scale_zp, v_scale_zp); + } else if (k_src.get_precision() == ov::element::f16 && k_dst.get_precision() == ov::element::u8) { + attn_quant_mt(k_src, v_src, k_dst, v_dst, k_scale_zp, v_scale_zp); } else { OPENVINO_THROW("unsupport src type: ", k_src.get_precision(), ", dst type: ", k_dst.get_precision(), " in attn_quantkv"); } @@ -252,6 +253,8 @@ void paged_attn_quantkv(const ov::intel_cpu::PlainTensor& k_src, paged_attn_quant_mt(k_src, v_src, k_dst, v_dst, slot_mapping); } else if (k_src.get_precision() == ov::element::bf16 && k_dst.get_precision() == ov::element::u8) { paged_attn_quant_mt(k_src, v_src, k_dst, v_dst, slot_mapping); + } else if (k_src.get_precision() == ov::element::f16 && k_dst.get_precision() == ov::element::u8) { + paged_attn_quant_mt(k_src, v_src, k_dst, v_dst, slot_mapping); } else { OPENVINO_THROW("unsupport src type: ", k_src.get_precision(), ", dst type: ", k_dst.get_precision(), " in paged_attn_quantkv"); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp index 3341f6f6082d99..2956c8a6a6b5b8 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp @@ -37,15 +37,22 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); return _mm512_castsi512_ps(_mm512_slli_epi32(y, 16)); } + // load addr to __m512 reg + inline __m512 mm512_uni_loadu_ps(const float* a) { + return _mm512_loadu_ps(a); + } + inline __m512 mm512_uni_loadu_ps(const ov::bfloat16* a) { auto vec_bf16 = _mm256_loadu_si256(reinterpret_cast(a)); return cvt_bf16_to_fp32(vec_bf16); } - inline __m512 mm512_uni_loadu_ps(const float* a) { - return _mm512_loadu_ps(a); + inline __m512 mm512_uni_loadu_ps(const ov::float16* a) { + auto vec_f16 = _mm256_loadu_si256(reinterpret_cast(a)); + return _mm512_cvtph_ps(vec_f16); } + // load addr to __m512 reg inline __m512 mm512_uni_loadu_tail_ps(const float* a, size_t count) { __mmask16 mask = (1 << count) - 1; return _mm512_maskz_loadu_ps(mask, a); @@ -57,6 +64,13 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); return cvt_bf16_to_fp32(bf16_vec); } + inline __m512 mm512_uni_loadu_tail_ps(const ov::float16* a, size_t count) { + auto mask = (1 << count) - 1; + auto f16_vec = _mm256_maskz_loadu_epi16(mask, a); + return _mm512_cvtph_ps(f16_vec); + } + + // store __m512 reg to addr inline void mm512_uni_storeu_ps(float* a, __m512 v) { _mm512_storeu_ps(a, v); } @@ -72,6 +86,13 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); x = _mm512_mask_blend_epi32(mask, nan, x); // Check NaN before converting back to bf16 _mm256_storeu_si256(reinterpret_cast<__m256i *>(addr), _mm512_cvtepi32_epi16(x)); } + + inline void mm512_uni_storeu_ps(ov::float16* addr, __m512 v) { + __m256i vec_f16 = _mm512_cvtps_ph(v, 0); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(addr), vec_f16); + } + + // store __m512 reg to addr inline void mm512_uni_mask_storeu_ps(ov::bfloat16 *addr, __mmask16 mask_addr, __m512 xps) { __m512i xpi32 = _mm512_castps_si512(xps); __m512i nan = _mm512_set1_epi32(0xffff); @@ -85,18 +106,29 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); _mm512_mask_cvtepi32_storeu_epi16(addr, mask_addr, x); } - inline __m512 mm512_uni_loadu_ps(ov::float16* a) { - auto vec_f16 = _mm256_loadu_si256(reinterpret_cast(a)); - return _mm512_cvtph_ps(vec_f16); + inline void mm512_uni_storeu_tail_ps(float *addr, __m512 v, size_t count) { + __mmask16 mask_addr = (1 << count) - 1; + _mm512_mask_storeu_ps(addr, mask_addr, v); } - inline __m512 mm512_uni_loadu_tail_ps(const ov::float16* a, size_t count) { - auto mask = (1 << count) - 1; - auto f16_vec = _mm256_maskz_loadu_epi16(mask, a); - return _mm512_cvtph_ps(f16_vec); + + inline void mm512_uni_storeu_tail_ps(ov::bfloat16 *addr, __m512 v, size_t count) { + __mmask16 mask_addr = (1 << count) - 1; + __m512i xpi32 = _mm512_castps_si512(v); + __m512i nan = _mm512_set1_epi32(0xffff); + auto mask = _mm512_cmp_ps_mask(v, v, _CMP_ORD_Q); + __m512i ones = _mm512_set1_epi32(0x1); + __m512i vec_bias = _mm512_set1_epi32(0x7fff); + auto x = _mm512_and_si512(_mm512_srli_epi32(xpi32, 16), ones); // LSB = x[16] + x = _mm512_add_epi32(x, vec_bias); // rounding_bias = 0x7fff + LSB + x = _mm512_srli_epi32(_mm512_add_epi32(x, xpi32), 16); // x = (x + rounding_bias) >> 16; + x = _mm512_mask_blend_epi32(mask, nan, x); // Check NaN before converting back to bf16 + _mm512_mask_cvtepi32_storeu_epi16(addr, mask_addr, x); } - inline void mm512_uni_storeu_ps(ov::float16* addr, __m512 v) { + + inline void mm512_uni_storeu_tail_ps(ov::float16 *addr, __m512 v, size_t count) { + __mmask16 mask_addr = (1 << count) - 1; __m256i vec_f16 = _mm512_cvtps_ph(v, 0); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(addr), vec_f16); + _mm256_mask_storeu_epi16(reinterpret_cast<__m256i *>(addr), mask_addr, vec_f16); } #endif @@ -115,12 +147,11 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); }; return _mm256_loadu_si256(&mask[N7]); } + + // load addr to __m256 reg inline __m256 mm256_uni_loadu_ps(const float* a) { return _mm256_loadu_ps(a); } - inline void mm256_uni_storeu_ps(float* a, __m256 v) { - _mm256_storeu_ps(a, v); - } inline __m256 mm256_uni_loadu_ps(const ov::bfloat16* a) { auto vec_bf16 = _mm_loadu_si128(reinterpret_cast(a)); @@ -128,6 +159,13 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); return o; } + inline __m256 mm256_uni_loadu_ps(const ov::float16* a) { + auto vec_f16 = _mm_loadu_si128(reinterpret_cast(a)); + auto o = _mm256_cvtph_ps(vec_f16); + return o; + } + + // load addr tail to __m256 reg inline __m256 mm256_uni_loadu_tail_ps(const float* a, const size_t count) { auto mask = get_mask(count); return _mm256_maskload_ps(a, mask); @@ -140,6 +178,17 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); return mm256_uni_loadu_ps(tmp_values); } + inline __m256 mm256_uni_loadu_tail_ps(const ov::float16* a, const size_t count) { + ov::float16 tmp_values[8] = {0}; + std::memcpy(tmp_values, a, count * sizeof(ov::float16)); + return mm256_uni_loadu_ps(tmp_values); + } + + // store __m256 reg to addr + inline void mm256_uni_storeu_ps(float* a, __m256 v) { + _mm256_storeu_ps(a, v); + } + inline void mm256_uni_storeu_ps(ov::bfloat16 *addr, __m256 xps) { __m256i xpi32 = _mm256_castps_si256(xps); __m256i nan = _mm256_set1_epi32(0xffff); @@ -156,21 +205,17 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); _mm_storeu_si128(reinterpret_cast<__m128i *>(addr), bf16_o); } - inline __m256 mm256_uni_loadu_ps(ov::float16* a) { - auto vec_f16 = _mm_loadu_si128(reinterpret_cast<__m128i*>(a)); - auto o = _mm256_cvtph_ps(vec_f16); - return o; - } - inline __m256 mm256_uni_loadu_tail_ps(const ov::float16* a, const size_t count) { - ov::float16 tmp_values[8] = {0}; - std::memcpy(tmp_values, a, count * sizeof(ov::float16)); - return mm256_uni_loadu_ps(tmp_values); - } inline void mm256_uni_storeu_ps(ov::float16* a, __m256 v) { __m128i vec_f16 = _mm256_cvtps_ph(v, 0); _mm_storeu_si128(reinterpret_cast<__m128i *>(a), vec_f16); } + // store __m256 to addr + inline void mm256_uni_storeu_tail_ps(float *addr, __m256 v, size_t count) { + const auto mask = get_mask(count); + return _mm256_maskstore_ps(addr, mask, v); + } + inline void hsum(__m256& x) { __m256 y; // x: 0 1 2 3 4 5 6 7 y = _mm256_permute_ps(x, 0x39); // y: 1 2 3 0 5 6 7 4 @@ -292,4 +337,4 @@ static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); } // namespace XARCH } // namespace Cpu } // namespace Extensions -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp index 1fe7b811b922a8..971aa6bb58c994 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp @@ -14,6 +14,7 @@ #endif #include "openvino/core/type/bfloat16.hpp" +#include "openvino/core/type/float16.hpp" #include "openvino/core/parallel.hpp" #include "executor_pa.hpp" #include "executor_pa_common.hpp" @@ -619,7 +620,8 @@ void transpose_16NxK(TDST* dst, TSRC* src, TDST* tmp, size_t N, size_t K, size_t } #if defined(HAVE_AVX512F) -static void transpose_16NxK(ov::bfloat16* dst, ov::bfloat16* src, ov::bfloat16* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +template::value || std::is_same::value), bool>::type> +static void transpose_16NxK(T* dst, T* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { // will treat as uint32_t transpose auto s = reinterpret_cast(src); auto d = reinterpret_cast(dst); @@ -669,8 +671,8 @@ void dequant(TDST* dst, uint8_t* src, size_t N, size_t K) { } #if defined(HAVE_AVX512F) -// pack bf16/u8 to bf16 -static void pack_32x32_kernel(ov::bfloat16* dst, ov::bfloat16* src, size_t dst_stride, size_t src_stride) { +template::value || std::is_same::value), bool>::type> +static void pack_32x32_kernel(T* dst, T* src, size_t dst_stride, size_t src_stride) { static const uint64_t idx[8] = {0, 4, 1, 5, 2, 6, 3, 7}; auto midx = _mm512_loadu_si512(idx); for (size_t i = 0; i < 16; i++) { @@ -687,7 +689,8 @@ static void pack_32x32_kernel(ov::bfloat16* dst, ov::bfloat16* src, size_t dst_s } } -static void pack_32x16_kernel(ov::bfloat16* dst, ov::bfloat16* src, size_t dst_stride, size_t src_stride) { +template::value || std::is_same::value), bool>::type> +static void pack_32x16_kernel(T* dst, T* src, size_t dst_stride, size_t src_stride) { static const uint64_t idx[8] = {0, 4, 1, 5, 2, 6, 3, 7}; auto midx = _mm512_loadu_si512(idx); for (size_t i = 0; i < 16; i++) { @@ -704,7 +707,8 @@ static void pack_32x16_kernel(ov::bfloat16* dst, ov::bfloat16* src, size_t dst_s } } -static void pack_32Nx16K(ov::bfloat16* dst, ov::bfloat16* src, ov::bfloat16* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +template::value || std::is_same::value), bool>::type> +static void pack_32Nx16K(T* dst, T* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { for (size_t n = 0; n < N; n += 32) { size_t k = 0; for (; k + 32 <= K; k += 32) { @@ -718,7 +722,8 @@ static void pack_32Nx16K(ov::bfloat16* dst, ov::bfloat16* src, ov::bfloat16* tmp } } -static void pack_32Nx16K(ov::bfloat16* dst, uint8_t* src, ov::bfloat16* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +template::value || std::is_same::value), bool>::type> +static void pack_32Nx16K(T* dst, uint8_t* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { // The layout for per token per head: // |scale(f32)|zeropoint(f32)|quantized feature(u8,idx_1)|quantized feature(u8,idx_2)|...|quantized feature(u8,idx_S)| // The quantized feature will start from 8bytes=sizeof(float)+sizeof(float) @@ -730,7 +735,7 @@ static void pack_32Nx16K(ov::bfloat16* dst, uint8_t* src, ov::bfloat16* tmp, siz s += src_stride + 2 * sizeof(float); t += src_stride; } - pack_32Nx16K(dst, tmp, reinterpret_cast(0), N, K, dst_stride, src_stride); + pack_32Nx16K(dst, tmp, reinterpret_cast(0), N, K, dst_stride, src_stride); } #endif @@ -769,7 +774,7 @@ struct MHAHelper { std::vector> _wv_gemm_acc; // second token std::shared_ptr _gemv; - bool _fastpath_valid = false; + ov::element::Type _fastpath_valid_prec = ov::element::undefined; // second token for bhl loop PlainTensor _weight_bhl; PlainTensor _output_bhl; @@ -851,11 +856,20 @@ struct MHAHelper { _qk_scratch_a.resize({_nthr, _qk_gemm[_block_size - 1]->get_scratch_a_size() / sizeof(DATA_TYPE)}); _wv_scratch_a.resize({_nthr, _wv_gemm[_block_size - 1]->get_scratch_a_size() / sizeof(DATA_TYPE)}); - _fastpath_valid = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::amx_bf16) && - (S % 32 == 0) && (block_size % 16 == 0) && (S <= 32 * 6) && precision_of::value == ov::element::bf16; - // aligned to cache line (64bytes=16*sizeof(float)) to avoid false sharing - if (_fastpath_valid && !_gemv) - _gemv = std::make_shared(static_cast(S), static_cast(block_size)); + if ((S % 32 == 0) && (block_size % 16 == 0) && (S <= 32 * 6)) { + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::amx_bf16) && + precision_of::value == ov::element::bf16 && + precision_of::value == ov::element::bf16) { + _fastpath_valid_prec = ov::element::bf16; + } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::amx_fp16) && + precision_of::value == ov::element::f16 && + precision_of::value == ov::element::f16) { + _fastpath_valid_prec = ov::element::f16; + } + } + if (one_of(_fastpath_valid_prec, ov::element::bf16, ov::element::f16) && !_gemv) { + _gemv = std::make_shared(static_cast(S), static_cast(block_size), _fastpath_valid_prec); + } } if (init_alibi_lookup && (!_alibi_lookup || _alibi_lookup.m_dims[0] < kv_len)) { @@ -903,7 +917,7 @@ struct MHAHelper { auto q_start = q_blk * _block_size; auto q_end = std::min(q_start + _block_size, q_len); auto q_cnt = q_end - q_start; - constexpr bool q_is_bf16 = precision_of::value == ov::element::bf16; + constexpr bool q_is_xf16 = one_of(precision_of::value, ov::element::bf16, ov::element::f16); constexpr bool q_cache_is_same = precision_of::value == precision_of::value; auto cur_kv_len_blocks = div_up(cur_kv_len, _block_size); for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { @@ -978,12 +992,12 @@ struct MHAHelper { // reuse float buffer, need to use float to compute offset auto* w_ptr = reinterpret_cast(_weight.ptr(ithr, h, 0, 0)); - float* fp32_out_ptr = q_is_bf16 ? _output.ptr(ithr, 0, h, 0) : output_emb.ptr(q_start, h * _S); + float* fp32_out_ptr = q_is_xf16 ? _output.ptr(ithr, 0, h, 0) : output_emb.ptr(q_start, h * _S); // for each weight block, loop through all value block for (size_t v_blk = 0; v_blk < cur_kv_len_blocks; v_blk++) { DATA_TYPE* v_ptr; - if (q_is_bf16 || !q_cache_is_same) { + if (q_is_xf16 || !q_cache_is_same) { v_ptr = wv_scratch_b.ptr(v_blk, hk); } else { v_ptr = present_value.ptr(block_table[v_blk], hk); @@ -1004,11 +1018,11 @@ struct MHAHelper { _wv_scratch_a ? _wv_scratch_a.ptr(ithr, 0) : nullptr); } } - if (q_is_bf16) { + if (q_is_xf16) { attn_memcpy2d_kernel(_output.ptr(ithr, 0, h, 0), output_emb.ptr(q_start, h * _S), ov::element::f32, - ov::element::bf16, + precision_of::value, _output.stride(1), output_emb.stride(0), _S, @@ -1026,13 +1040,13 @@ struct MHAHelper { // output: [nthr, 32, H, S] void exec_kernel_one_bh(const PlainTensor& query, const PlainTensor& present_key, const PlainTensor& present_value, const PlainTensor& output_emb, const int32_t* block_table, size_t ithr, size_t hk, size_t q_len, size_t cur_kv_len, const PlainTensor& alibi_slopes, float* score_output) { - if (_fastpath_valid) { + if (one_of(_fastpath_valid_prec, ov::element::bf16, ov::element::f16)) { _gemv->tile_config(); for (size_t pk = 0, i = 0; pk < cur_kv_len; pk += _block_size, i++) { auto block_number = block_table[i]; for (size_t pq = 0; pq < q_len; pq++) { for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { - (*_gemv)(query.ptr(h, pq), present_key.ptr(block_number, hk), + (*_gemv)(query.ptr(h, pq), present_key.ptr(block_number, hk), _weight.ptr(ithr, h, pq) + pk); } } @@ -1128,11 +1142,11 @@ struct MHAHelper { auto pk = pk_in_blocks * _block_size; if (pk < context_len) { auto block_number = block_indices.ptr()[block_indices_begins.ptr()[b] + pk_in_blocks]; - if (_fastpath_valid) { + if (one_of(_fastpath_valid_prec, ov::element::bf16, ov::element::f16)) { _gemv->tile_config(); for (size_t pq = 0; pq < q_len; pq++) { for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { - (*_gemv)(query.ptr(b, h, pq), present_key.ptr(block_number, hk), + (*_gemv)(query.ptr(b, h, pq), present_key.ptr(block_number, hk), _weight_bhl.ptr(b, h, pq) + pk); } } @@ -1334,7 +1348,7 @@ struct MHA { const PlainTensor& alibi_slopes) { auto Hk = v_cache.m_dims[1]; - constexpr bool q_is_bf16 = precision_of::value == ov::element::bf16; + constexpr bool q_is_xf16 = one_of(precision_of::value, ov::element::bf16, ov::element::f16); constexpr bool q_cache_is_same = precision_of::value == precision_of::value; auto attn_work_count = _workitems.attn_work_size(); auto reorder_work_count = _workitems.reorder_work_size(); @@ -1360,7 +1374,7 @@ struct MHA { _helper._output.template ptr(ithr), _helper._block_size, _helper._S, _helper._block_size, _helper._S); - if (q_is_bf16) { + if (q_is_xf16) { pack_32Nx16K(_helper._wv_scratch_b.template ptr(batch_in_reorder, kv_block, hk), v_ptr, _helper._output.template ptr(ithr), @@ -1604,6 +1618,17 @@ std::shared_ptr make_pa_executor(ov::element::Type data_ } #else OPENVINO_THROW("make_pa_executor: bf16 needs avx512+ hardware."); +#endif + } else if (data_type == ov::element::f16) { +#if defined(HAVE_AVX512F) + if (kvcache_type == ov::element::u8) { + executor = std::make_shared>(); + } else { + OPENVINO_ASSERT(kvcache_type == ov::element::f16, "expect kvcache type f16, current: ", kvcache_type); + executor = std::make_shared>(); + } +#else + OPENVINO_THROW("make_pa_executor: f16 needs avx512+ hardware."); #endif } else if (data_type == ov::element::f32) { if (kvcache_type == ov::element::u8) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp index 63a8a0f7d24062..70723a577b0c2b 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.cpp @@ -10,6 +10,7 @@ #include #include "openvino/core/type/bfloat16.hpp" +#include "openvino/core/type/float16.hpp" #include "openvino/core/parallel.hpp" #include "executor_pa_common.hpp" #include "utils/plain_tensor.hpp" @@ -57,7 +58,8 @@ void TileConfiger::generate() { ret(); } -JitMatMulVecAMX::JitMatMulVecAMX(int head_size, int block_size) : jit_generator(jit_name()), m_head_size(head_size), m_block_size(block_size) { +JitMatMulVecAMX::JitMatMulVecAMX(int head_size, int block_size, ov::element::Type amx_prec) : + jit_generator(jit_name()), m_head_size(head_size), m_block_size(block_size), m_amx_prec(amx_prec) { create_kernel(); m_tile_cfg.reset(1, 0, @@ -98,7 +100,11 @@ void JitMatMulVecAMX::generate() { tilezero(tmmC); for (int i = 0; i < num_B_tiles; i++) { tileloadd(tmmA, ptr[reg_k_addr + reg_stride_A + i * 64]); - tdpbf16ps(tmmC, tmmA, Xbyak::Tmm(tmmB0.getIdx() + i)); + if (m_amx_prec == ov::element::bf16) { + tdpbf16ps(tmmC, tmmA, Xbyak::Tmm(tmmB0.getIdx() + i)); + } else if (m_amx_prec == ov::element::f16) { + tdpfp16ps(tmmC, tmmA, Xbyak::Tmm(tmmB0.getIdx() + i)); + } } tilestored(ptr[reg_dst_addr + reg_stride_BC + m * sizeof(float)], tmmC); add(reg_k_addr, m_head_size * 2 * 16); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.hpp index 237860ec692e76..bc21457a3285b4 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa_common.hpp @@ -69,9 +69,10 @@ class JitMatMulVecAMX : public dnnl::impl::cpu::x64::jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(JitMatMulVecAMX) int m_head_size; int m_block_size; + ov::element::Type m_amx_prec; TileConfiger m_tile_configer; TileConfig m_tile_cfg; - JitMatMulVecAMX(int head_size, int block_size); + JitMatMulVecAMX(int head_size, int block_size, ov::element::Type amx_prec); void tile_config() { m_tile_configer(&m_tile_cfg); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 3ce275d47e3d9d..0670c744a6da91 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -841,20 +841,20 @@ static void attn_reduce(ov::float16* dst, ov::float16* temp, size_t M, size_t S, template static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, - const ov::intel_cpu::PlainTensor& present_key, - const ov::intel_cpu::PlainTensor& present_value, - const ov::intel_cpu::PlainTensor& alibi_mask, - const ov::intel_cpu::PlainTensor& attention_mask, - const ov::intel_cpu::PlainTensor& beams, - ov::intel_cpu::PlainTensor& output_emb, - ov::intel_cpu::PlainTensor& buf_attn_w, - ov::intel_cpu::PlainTensor& buf_attn_score, - bool has_out_transpose, - bool auto_causal, - float d_scale, - const ov::intel_cpu::PlainTensor& past_k_scale_zp, - const ov::intel_cpu::PlainTensor& past_v_scale_zp, - ov::intel_cpu::PlainTensor& head_sum) { + const ov::intel_cpu::PlainTensor& present_key, + const ov::intel_cpu::PlainTensor& present_value, + const ov::intel_cpu::PlainTensor& alibi_mask, + const ov::intel_cpu::PlainTensor& attention_mask, + const ov::intel_cpu::PlainTensor& beams, + ov::intel_cpu::PlainTensor& output_emb, + ov::intel_cpu::PlainTensor& buf_attn_w, + ov::intel_cpu::PlainTensor& buf_attn_score, + bool has_out_transpose, + bool auto_causal, + float d_scale, + const ov::intel_cpu::PlainTensor& past_k_scale_zp, + const ov::intel_cpu::PlainTensor& past_v_scale_zp, + ov::intel_cpu::PlainTensor& head_sum) { ov::intel_cpu::PlainTensor causal_mask; bool select_nfltmax_at_0 = false; auto B = query.size(0); @@ -976,16 +976,16 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, attn_mask_ptr = reinterpret_cast(&attention_mask.at({b, h, pq, 0}, true)); uint8_t* cmask_ptr = causal_mask ? &causal_mask.at({b, h, pq, 0}, true) : nullptr; attn_softmax_kernel(buf_attn_w.ptr(b, h, pq), - buf_attn_w.ptr(b, h, pq), - d_scale, - alibi_ptr, - attn_mask_ptr, - cmask_ptr, - select_nfltmax_at_0, - ncausal, - cur_kv_len, - attn_mask_prec, - precision); + buf_attn_w.ptr(b, h, pq), + d_scale, + alibi_ptr, + attn_mask_ptr, + cmask_ptr, + select_nfltmax_at_0, + ncausal, + cur_kv_len, + attn_mask_prec, + precision); }); // attn_w * V @@ -1054,11 +1054,11 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, for (size_t pq = 0; pq < q_len; pq++) { for (size_t h = h_group * h_each_group_len; h < (h_group + 1) * h_each_group_len; h++) { attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), - buf_attn_w.ptr(b, h, pq)[pv], - v, - S, - p + 0, - p + 1); + buf_attn_w.ptr(b, h, pq)[pv], + v, + S, + p + 0, + p + 1); } } parallel_it_step(pv, kv_len, b, B, h_group, h_group_num); @@ -1093,86 +1093,36 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, if (query.get_precision() == ov::element::bf16) { if (present_key.get_precision() == ov::element::u8) { mha_single_token_kernel(query, - present_key, - present_value, - alibi_mask, - attention_mask, - beams, - output_emb, - buf_attn_w, - buf_attn_score, - has_out_transpose, - auto_causal, - d_scale, - past_k_scale_zp, - past_v_scale_zp, - head_sum); + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); } else { mha_single_token_kernel(query, - present_key, - present_value, - alibi_mask, - attention_mask, - beams, - output_emb, - buf_attn_w, - buf_attn_score, - has_out_transpose, - auto_causal, - d_scale, - past_k_scale_zp, - past_v_scale_zp, - head_sum); - } - } else if (query.get_precision() == ov::element::f32) { - if (present_key.get_precision() == ov::element::u8) { - mha_single_token_kernel(query, - present_key, - present_value, - alibi_mask, - attention_mask, - beams, - output_emb, - buf_attn_w, - buf_attn_score, - has_out_transpose, - auto_causal, - d_scale, - past_k_scale_zp, - past_v_scale_zp, - head_sum); - } else if (present_key.get_precision() == ov::element::f16) { - mha_single_token_kernel(query, - present_key, - present_value, - alibi_mask, - attention_mask, - beams, - output_emb, - buf_attn_w, - buf_attn_score, - has_out_transpose, - auto_causal, - d_scale, - past_k_scale_zp, - past_v_scale_zp, - head_sum); - } else { - mha_single_token_kernel(query, - present_key, - present_value, - alibi_mask, - attention_mask, - beams, - output_emb, - buf_attn_w, - buf_attn_score, - has_out_transpose, - auto_causal, - d_scale, - past_k_scale_zp, - past_v_scale_zp, - head_sum); + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); } } else if (query.get_precision() == ov::element::f16) { #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) @@ -1196,8 +1146,90 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, OPENVINO_THROW("Unsupported precision: ", query.get_precision()); } #else - OPENVINO_THROW("Unsupported precision: ", query.get_precision()); + if (present_key.get_precision() == ov::element::u8) { + mha_single_token_kernel(query, + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); + } else { + mha_single_token_kernel(query, + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); + } #endif + } else if (query.get_precision() == ov::element::f32) { + if (present_key.get_precision() == ov::element::u8) { + mha_single_token_kernel(query, + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); + } else if (present_key.get_precision() == ov::element::f16) { + mha_single_token_kernel(query, + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); + } else { + mha_single_token_kernel(query, + present_key, + present_value, + alibi_mask, + attention_mask, + beams, + output_emb, + buf_attn_w, + buf_attn_score, + has_out_transpose, + auto_causal, + d_scale, + past_k_scale_zp, + past_v_scale_zp, + head_sum); + } } else { OPENVINO_THROW("Unsupported precision: ", query.get_precision()); } @@ -1205,4 +1237,4 @@ void mha_single_token(const ov::intel_cpu::PlainTensor& query, } // namespace XARCH } // namespace Cpu } // namespace Extensions -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp index bffe0ee3761dd5..60c6a24ec5f2fa 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp @@ -751,14 +751,14 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_ while (i + vec_len_f32_avx512 <= size) { v_a = _mm512_loadu_ps(a + i); v_a = _mm512_mul_ps(v_a, v_scale); - _mm512_storeu_ps(a_dst + i, v_a); + mm512_uni_storeu_ps(a_dst + i, v_a); i += vec_len_f32_avx512; } if (i < size) { __mmask16 mask = (1 << (size - i)) - 1; v_a = _mm512_maskz_loadu_ps(mask, a + i); v_a = _mm512_mul_ps(v_a, v_scale); - _mm512_mask_storeu_ps(a_dst + i, mask, v_a); + mm512_uni_storeu_tail_ps(a_dst + i, v_a, size - i); i += (size - i); } @@ -768,14 +768,14 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_ while (i + vec_len_f32_avx2 <= size) { v_a = _mm256_loadu_ps(a + i); v_a = _mm256_mul_ps(v_a, v_scale); - _mm256_storeu_ps(a_dst + i, v_a); + mm256_uni_storeu_ps(a_dst + i, v_a); i += vec_len_f32_avx2; } if (i < size) { auto mask = get_mask(size - i); v_a = _mm256_maskload_ps(a + i, mask); v_a = _mm256_mul_ps(v_a, v_scale); - _mm256_maskstore_ps(a_dst + i, mask, v_a); + mm256_uni_storeu_tail_ps(a_dst + i, v_a, size - i); i += (size - i); } @@ -793,11 +793,12 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_ } } -inline void multiply_scalar(float* a, ov::bfloat16* a_dst, const float val, const size_t size) { +template::value || std::is_same::value), bool>::type> +inline void multiply_scalar(float* a, T* a_dst, const float val, const size_t size) { + size_t i = 0; #if defined(HAVE_AVX512F) auto v_scale = _mm512_set1_ps(val); __m512 v_a = {0}; - size_t i = 0; while (i + vec_len_f32_avx512 <= size) { v_a = _mm512_loadu_ps(a + i); v_a = _mm512_mul_ps(v_a, v_scale); @@ -808,10 +809,12 @@ inline void multiply_scalar(float* a, ov::bfloat16* a_dst, const float val, cons __mmask16 mask = (1 << (size - i)) - 1; v_a = _mm512_maskz_loadu_ps(mask, a + i); v_a = _mm512_mul_ps(v_a, v_scale); - mm512_uni_mask_storeu_ps(a_dst + i, mask, v_a); + mm512_uni_storeu_tail_ps(a_dst + i, v_a, size - i); + + i += (size - i); } #else - for (size_t i = 0; i < size; i++) { + for (; i < size; i++) { a_dst[i] = a[i] * val; } #endif @@ -898,6 +901,7 @@ inline void attn_softmax_kernel(float* a, float alibi_slope) { using func_fp32_type = void (*)(float*, float, const float*, const float*, const uint8_t*, bool, size_t, float, float&); using func_bf16_type = void (*)(float*, float, const float*, const ov::bfloat16*, const uint8_t*, bool, size_t, float, float&); + using func_f16_type = void (*)(float*, float, const float*, const ov::float16*, const uint8_t*, bool, size_t, float, float&); static constexpr func_fp32_type funcs_fp32[] = { scale_add2_reduce_max, scale_add2_reduce_max, @@ -918,12 +922,24 @@ inline void attn_softmax_kernel(float* a, scale_add2_reduce_max, scale_add2_reduce_max }; + static constexpr func_f16_type funcs_f16[] = { + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max, + scale_add2_reduce_max + }; int dispatch = (alibi ? 0b100 : 0) | (attn_mask ? 0b010 : 0) | (causal_mask ? 0b001 : 0); float max = std::numeric_limits::lowest(); if (attn_mask_prec == ov::element::f32) { funcs_fp32[dispatch](a, scale, alibi, static_cast(attn_mask), causal_mask, select_nfltmax_at_0, len, alibi_slope, max); - } else { + } else if (attn_mask_prec == ov::element::bf16) { funcs_bf16[dispatch](a, scale, alibi, static_cast(attn_mask), causal_mask, select_nfltmax_at_0, len, alibi_slope, max); + } else { + funcs_f16[dispatch](a, scale, alibi, static_cast(attn_mask), causal_mask, select_nfltmax_at_0, len, alibi_slope, max); } float sum = 0.0f; @@ -936,11 +952,16 @@ inline void attn_softmax_kernel(float* a, // apply causual mask to final result instead of attn_score if (total_size > len) memset(static_cast(a_dst) + len, 0, sizeof(float) * (total_size - len)); - } else { + } else if (dst_precision == ov::element::bf16) { multiply_scalar(a, static_cast(a_dst), scalar, len); // apply causual mask to final result instead of attn_score if (total_size > len) memset(static_cast(a_dst) + len, 0, sizeof(ov::bfloat16) * (total_size - len)); + } else { + multiply_scalar(a, static_cast(a_dst), scalar, len); + // apply causual mask to final result instead of attn_score + if (total_size > len) + memset(static_cast(a_dst) + len, 0, sizeof(ov::float16) * (total_size - len)); } } #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) @@ -1022,4 +1043,4 @@ inline void attn_softmax_kernel(ov::float16* a, } // namespace XARCH } // namespace Cpu } // namespace Extensions -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp index e729fac66dd257..2895a272b982b5 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.cpp @@ -38,32 +38,54 @@ BrgemmKernel::BrgemmKernel(size_t M, // blocking M M_blk = matmulOptimalM; M_tail = M % M_blk; - brgVnniFactor = 4 / inType.size(); - if (inType != ov::element::bf16 && inType != ov::element::f32) - THROW_ERROR("brgemm kernel only supports bf16, f32"); + if (!one_of(inType, ov::element::bf16, ov::element::f16, ov::element::f32)) + THROW_ERROR("brgemm kernel only supports f16, bf16, f32"); + bool is_f32 = inType == ov::element::f32; + bool is_bf16 = inType == ov::element::bf16; if (is_bf16 && !mayiuse(avx512_core_bf16)) THROW_ERROR("brgemm bf16 kernel could only be used above avx512_bf16"); - bool isAMXSupported = is_bf16 && mayiuse(avx512_core_amx); + bool is_f16 = inType == ov::element::f16; + if (is_f16 && !mayiuse(avx512_core_fp16)) + THROW_ERROR("brgemm f16 kernel could only be used above avx512_f16"); + + srcType = weiType = inType; + // If isa is avx512_core_fp16, f16 is supported by upconverted to f32 + is_avx_f16_only = inType == ov::element::f16 && mayiuse(avx512_core_fp16) && !mayiuse(avx512_core_amx_fp16); + if (is_avx_f16_only) { + srcType = ov::element::f32; + weiType = ov::element::f32; + } + brgVnniFactor = 4 / weiType.size(); + + /* + AVX AMX + fp32 Y N + bf16 Y Y + fp16 Y Y + */ + bool isAMXSupported = (is_bf16 && mayiuse(avx512_core_amx)) || (is_f16 && mayiuse(avx512_core_amx_fp16)); + bool isBrgWithAMX = isAMXSupported && !is_avx_f16_only; + size_t vlen; if (mayiuse(avx512_core)) vlen = cpu_isa_traits::vlen; else vlen = cpu_isa_traits::vlen; // blocking N - N_blk = is_bf16 ? 32 : std::max(N, vlen / inType.size()); + N_blk = !is_f32 ? 32 : std::max(N, vlen / inType.size()); N_tail = N % N_blk; // blocking K - K_blk = isAMXSupported ? 32 : K; + K_blk = isBrgWithAMX ? 32 : K; K_tail = K % K_blk; - if (isAMXSupported && K_tail) { + if (isBrgWithAMX && K_tail) { K_tail = rnd_up(K_tail, 2); } // copied K must be round up by vlen / inType.size(), otherwise copy B kernel may access wrong memory - packedBSize = rnd_up(K, vlen / inType.size()) * rnd_up(N, N_blk) * inType.size(); + packedBSize = rnd_up(K, vlen / weiType.size()) * rnd_up(N, N_blk) * weiType.size(); size_t brg0BaseIdx = std::numeric_limits::max(); for (size_t m = 0; m < 2; m++) { for (size_t k = 0; k < 2; k++) { @@ -78,18 +100,18 @@ BrgemmKernel::BrgemmKernel(size_t M, brgemmCtx.M = M_; brgemmCtx.N = N_; brgemmCtx.K = K_; - brgemmCtx.LDA = k ? K_blk : lda; - brgemmCtx.LDB = (is_bf16 || b_transposed) ? rnd_up(N, N_blk) : ldb; // bf16/b_transposed needs copy + brgemmCtx.LDA = k ? K_blk : (is_avx_f16_only ? K : lda); // f16 use f32 internally + brgemmCtx.LDB = (!is_f32 || b_transposed) ? rnd_up(N, N_blk) : ldb; // bf16/fp16/b_transposed needs copy brgemmCtx.LDC = ldc; - brgemmCtx.dt_in0 = static_cast(DnnlExtensionUtils::ElementTypeToDataType(inType)); - brgemmCtx.dt_in1 = static_cast(DnnlExtensionUtils::ElementTypeToDataType(inType)); + brgemmCtx.dt_in0 = static_cast(DnnlExtensionUtils::ElementTypeToDataType(srcType)); + brgemmCtx.dt_in1 = static_cast(DnnlExtensionUtils::ElementTypeToDataType(weiType)); brgemmCtx.beta = beta; // don't create brgemm kernels for empty tiles if (M_ != 0 && K_ != 0 && N_ != 0) { if (brg0BaseIdx == std::numeric_limits::max()) brg0BaseIdx = getBrgIdx(m, k, n); - init_brgemm(brgemmCtx, brgKernels[getBrgIdx(m, k, n)], isAMXSupported); + init_brgemm(brgemmCtx, brgKernels[getBrgIdx(m, k, n)], isBrgWithAMX); } } } @@ -97,12 +119,19 @@ BrgemmKernel::BrgemmKernel(size_t M, auto& brgemmCtx0 = brgCtxs[brg0BaseIdx]; - if (brgemmCtx0.is_with_amx && K_tail) { - init_brgemm_copy_a(brgCopyAKernel, K, K_blk, K_tail, K_blk, brgemmCtx0.dt_in0, false, lda * inType.size()); - packedASize = M_blk * rnd_up(K, K_blk) * inType.size(); + if ((brgemmCtx0.is_with_amx && K_tail) || is_avx_f16_only) { + init_brgemm_copy_a(brgCopyAKernel, + K, + K_blk, + K_tail, + is_avx_f16_only ? K : K_blk, + brgemmCtx0.dt_in0, + false, + lda * inType.size()); + packedASize = M_blk * rnd_up(K, brgemmCtx0.LDA) * srcType.size(); } - if (brgemmCtx0.is_with_amx || inType == ov::element::bf16 || b_transposed) { + if (brgemmCtx0.is_with_amx || !is_f32 || b_transposed) { size_t b_stride = 0; b_stride = ldb * inType.size(); // K should use the original K @@ -136,10 +165,20 @@ void BrgemmKernel::init_brgemm(brgemmCtx& ctx, const bool is_int8 = one_of(ctx.dt_in0, data_type::u8, data_type::s8) && one_of(ctx.dt_in1, data_type::u8, data_type::s8); cpu_isa_t isa; - if (mayiuse(avx512_core)) { - isa = use_amx ? isa_undef - : ctx.dt_in0 == dnnl_data_type_t::dnnl_bf16 ? avx512_core_bf16 - : (is_int8 ? avx512_core_vnni : avx512_core); + if (use_amx) { + isa = isa_undef; + } else if (mayiuse(avx512_core)) { + if (ctx.dt_in0 == dnnl_data_type_t::dnnl_bf16 && mayiuse(avx512_core_bf16)) { + isa = avx512_core_bf16; + } else if (ctx.dt_in0 == dnnl_data_type_t::dnnl_f16 && mayiuse(avx512_core_fp16)) { + isa = avx512_core_fp16; + } else { + if (is_int8) { + isa = avx512_core_vnni; + } else { + isa = avx512_core; + } + } } else { isa = cpu_isa_t::avx2; } @@ -161,7 +200,7 @@ void BrgemmKernel::init_brgemm(brgemmCtx& ctx, ctx.K, nullptr); if (status != dnnl_success) { - THROW_ERROR("cannot be executed due to invalid brgconv params"); + THROW_ERROR("cannot be executed due to invalid brgemm params"); } if (use_amx && b_accumulate) { @@ -193,6 +232,7 @@ void BrgemmKernel::init_brgemm(brgemmCtx& ctx, } brgKernel.reset(brgKernel_); } + void BrgemmKernel::init_brgemm_copy_a( std::unique_ptr& brgCopyKernel, size_t K, @@ -214,13 +254,15 @@ void BrgemmKernel::init_brgemm_copy_a( brgCopyKernelConf.s8s8_compensation_required = false; brgCopyKernelConf.wei_zp_type = dnnl::impl::cpu::x64::none; brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none; - brgCopyKernelConf.src_dt = dt_in0; + brgCopyKernelConf.src_dt = is_avx_f16_only ? dnnl_data_type_t::dnnl_f32 : dt_in0; brgCopyKernelConf.copy_A_src_stride = copy_A_src_stride; - brgCopyKernelConf.a_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast(dt_in0)); + // copy_a_kernel assumes that in/out tensor has same data type except f16 + // copy_a_kernel has special path for f16: assuming input(f16) -> output(f32) + brgCopyKernelConf.a_dt_sz = is_avx_f16_only ? sizeof(ov::float16) : DnnlExtensionUtils::sizeOfDataType(static_cast(dt_in0)); // copied A has the same precision of original - brgCopyKernelConf.tr_a_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast(dt_in0)); + brgCopyKernelConf.tr_a_dt_sz = is_avx_f16_only ? sizeof(float) : DnnlExtensionUtils::sizeOfDataType(static_cast(dt_in0)); brgCopyKernelConf.transposed_A = transpose; - brgCopyKernelConf.isa = avx512_core_amx; + brgCopyKernelConf.isa = is_avx_f16_only ? avx512_core_fp16 : avx512_core_amx; create_brgemm_matmul_copy_a(brgCopyKernel, &brgCopyKernelConf); } @@ -238,8 +280,8 @@ void BrgemmKernel::init_brgemm_copy_b( bool transpose, size_t copy_B_wei_stride) { brgemm_matmul_conf_t brgCopyKernelConf; - brgCopyKernelConf.src_dt = dt_in0; - brgCopyKernelConf.wei_dt = dt_in1; + brgCopyKernelConf.src_dt = is_avx_f16_only ? dnnl_data_type_t::dnnl_f32 : dt_in0; + brgCopyKernelConf.wei_dt = is_avx_f16_only ? dnnl_data_type_t::dnnl_f32 : dt_in1; brgCopyKernelConf.orig_wei_dt = dt_in1; brgCopyKernelConf.wei_n_blk = N_blk; brgCopyKernelConf.wei_tag = transpose ? dnnl_ba : dnnl_ab; @@ -255,17 +297,23 @@ void BrgemmKernel::init_brgemm_copy_b( brgCopyKernelConf.K_blk = K; brgCopyKernelConf.K_tail = 0; brgCopyKernelConf.N_chunk_elems = brgCopyKernelConf.N_blk; - brgCopyKernelConf.b_dt_sz = + // f16 is computed by upconverting. in(f16) -> out(f32) + brgCopyKernelConf.b_dt_sz = is_avx_f16_only ? sizeof(ov::float16) : DnnlExtensionUtils::sizeOfDataType(static_cast(brgCopyKernelConf.src_dt)); - brgCopyKernelConf.tr_b_dt_sz = + brgCopyKernelConf.tr_b_dt_sz = is_avx_f16_only ? sizeof(float) : DnnlExtensionUtils::sizeOfDataType(static_cast(brgCopyKernelConf.src_dt)); brgCopyKernelConf.req_wei_vnni_downconvert = false; if (is_with_amx) { - brgCopyKernelConf.isa = avx512_core_amx; + brgCopyKernelConf.isa = dt_in0 == dnnl_data_type_t::dnnl_f16 ? avx512_core_amx_fp16 : avx512_core_amx; brgCopyKernelConf.s8s8_compensation_required = false; } else { - brgCopyKernelConf.isa = dt_in0 == dnnl_data_type_t::dnnl_bf16 ? avx512_core_bf16 : avx512_core_vnni; + if (inType == ov::element::f16) { + brgCopyKernelConf.isa = mayiuse(avx512_core_fp16) ? avx512_core_fp16 : avx2_vnni_2; + } else { + brgCopyKernelConf.isa = dt_in0 == dnnl_data_type_t::dnnl_bf16 ? avx512_core_bf16 : avx512_core_vnni; + } + brgCopyKernelConf.s8s8_compensation_required = false; } brgCopyKernelConf.has_zero_point_a = false; @@ -283,7 +331,7 @@ void BrgemmKernel::copy_buffer_b(void* b, void* scratch_b) { for (size_t nb = 0; nb < div_up(N, N_blk); nb++) { auto N_stride = b_transposed ? ldb : 1; auto pCopyKernel0In = ptr_b + nb * N_blk * inType.size() * N_stride; - auto pCopyKernel0Out = ptr_scartch_b + nb * N_blk * brgVnniFactor * inType.size(); + auto pCopyKernel0Out = ptr_scartch_b + nb * N_blk * brgVnniFactor * weiType.size(); auto ctx = jit_brgemm_matmul_copy_b_t::ctx_t(); @@ -306,15 +354,13 @@ void BrgemmKernel::executeGemm(bool is_M_tail, void* a, void* b, void* c, void* auto ptr_C = reinterpret_cast(c); auto ptr_scartch_a = reinterpret_cast(scratch_a); auto ptr_scartch_b = reinterpret_cast(b); - uint8_t* ptr_a_tail = nullptr; size_t brgIdx0 = getBrgIdx(0, 0, 0); // The step for matrix A over main K dimension size_t K0_step0 = brgCtxs[brgIdx0].K; auto cur_M_blk = is_M_tail ? M_tail : M_blk; if (brgCopyAKernel) { - // only copy tailed data; - size_t K_offset = K < K_blk ? 0 : K0_step0 * inType.size(); + size_t K_offset = is_avx_f16_only ? 0 : (K < K_blk ? 0 : K0_step0 * srcType.size()); auto pCopyKernelIn = ptr_A + K_offset; auto pCopyKernelOut = ptr_scartch_a; @@ -331,8 +377,6 @@ void BrgemmKernel::executeGemm(bool is_M_tail, void* a, void* b, void* c, void* ctx.current_K_blk = K % K_blk; (*brgCopyAKernel)(&ctx); - - ptr_a_tail = pCopyKernelOut; } size_t count_N = 0; for (size_t n = 0; n < 2; n++) { @@ -341,17 +385,17 @@ void BrgemmKernel::executeGemm(bool is_M_tail, void* a, void* b, void* c, void* size_t mIdx = is_M_tail ? 1 : 0; auto& brgemmCtx = brgCtxs[getBrgIdx(mIdx, k, n)]; if (brgemmCtx.K != 0 && brgemmCtx.N != 0 && brgemmCtx.M != 0) { - auto local_a_ptr = k > 0 ? ptr_a_tail : ptr_A; - auto B_stride = (k * count_K + n * count_N * brgVnniFactor) * inType.size(); + auto local_a_ptr = is_avx_f16_only ? ptr_scartch_a : (k > 0 ? ptr_scartch_a : ptr_A); + auto B_stride = (k * count_K + n * count_N * brgVnniFactor) * weiType.size(); auto weight_ptr = ptr_scartch_b + B_stride; auto C_stride = n * count_N * ov::element::f32.size(); auto out_ptr = ptr_C + C_stride; callBrgemm(brgemmCtx, - brgKernels[getBrgIdx(mIdx, k, n)], - local_a_ptr, - weight_ptr, - out_ptr, - wsp); + brgKernels[getBrgIdx(mIdx, k, n)], + local_a_ptr, + weight_ptr, + out_ptr, + wsp); // stride K, N if body kernel is executed. if (k == 0) { count_K = brgemmCtx.K * brgemmCtx.LDB; @@ -373,17 +417,17 @@ void BrgemmKernel::executeGemm(void* a, void* b, void* c, void* wsp, void* scrat for (size_t mb = 0; mb < div_up(M, M_blk); mb++) { const bool is_M_tail = (M - mb * M_blk < M_blk); - auto ptr_a = ptr_A + (mb * M_blk * lda) * inType.size(); + auto ptr_a = ptr_A + (mb * M_blk * lda) * srcType.size(); auto ptr_c = ptr_C + (mb * M_blk * ldc) * ov::element::f32.size(); executeGemm(is_M_tail, ptr_a, scratch_b, wsp, ptr_c, scratch_a); } } void BrgemmKernel::callBrgemm(brgemmCtx& ctx, - std::unique_ptr& brgKernel, - const void* pin0, - const void* pin1, - void* pout, - void* wsp) { + std::unique_ptr& brgKernel, + const void* pin0, + const void* pin1, + void* pout, + void* wsp) { if (ctx.is_with_amx) amx_tile_configure(ctx.palette); if (ctx.is_with_comp) { @@ -398,4 +442,4 @@ void BrgemmKernel::callBrgemm(brgemmCtx& ctx, } } // namespace intel_cpu -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.hpp index 513b484ab0b963..38384f2aceae83 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/brgemm_kernel.hpp @@ -59,6 +59,9 @@ class BrgemmKernel { size_t packedBSize = 0; size_t packedASize = 0; ov::element::Type inType; + ov::element::Type weiType; + ov::element::Type srcType; + bool is_avx_f16_only = false; bool b_accumulate = false; static constexpr size_t MHA_BRGEMM_KERNELS_NUM = 8; static constexpr size_t matmulOptimalM = 32; diff --git a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp index 2272fa481d5471..6bf7d3099a85d9 100644 --- a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp @@ -190,6 +190,8 @@ ov::element::Type PagedAttention::getRuntimePrecision() const { // bf16 should be enabled only when platform supports if (rtPrecision == ov::element::bf16 && ov::with_cpu_x86_bfloat16()) { rtPrecision = ov::element::bf16; + } else if (rtPrecision == ov::element::f16 && ov::with_cpu_x86_avx512_core_fp16()) { + rtPrecision = ov::element::f16; } else { rtPrecision = ov::element::f32; } diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index 016fa90398aa4b..eecba2acff260b 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -261,7 +261,7 @@ struct MHAKernel { void prepare_brgemm_prim(dnnl::stream strm, PlainTensor& query, PlainTensor& present_key, bool has_out_transpose) { auto in_type = precision_of::value; - auto qkv_dt = in_type == ov::element::f32 ? dt::f32 : dt::bf16; + auto qkv_dt = DnnlExtensionUtils::ElementTypeToDataType(in_type); auto B = query.size(0); auto H = query.size(1); auto q_len = query.size(2); @@ -354,13 +354,13 @@ struct MHAKernel { size_t h_each_group_len = H / Hk; const size_t m_block_size = qk_gemm_ptr->get_mblk_size(); auto m_blocks = (q_len + m_block_size - 1) / m_block_size; - bool is_bf16 = precision_of::value == ov::element::bf16; + bool is_xf16 = precision_of::value == ov::element::bf16 || precision_of::value == ov::element::f16; // packed k, v parallel_for2d(B, Hk, [&](size_t b, size_t h) { T* k_ptr = &present_key.at({b, h, 0, 0}); T* v_ptr = &present_value.at({b, h, 0, 0}); qk_gemm_ptr->copy_buffer_b(k_ptr, &qk_scratch_b.at({b, h, 0})); - if (is_bf16) + if (is_xf16) wv_gemm_ptr->copy_buffer_b(v_ptr, &wv_scratch_b.at({b, h, 0})); }); @@ -420,12 +420,12 @@ struct MHAKernel { } auto* w_ptr = reinterpret_cast(weight_score.ptr(ithr, h, 0, 0)); float* fp32_out_ptr; - if (is_bf16) { + if (is_xf16) { fp32_out_ptr = has_out_transpose ? &fp32_out.at({b, m_start, h, 0}) : &fp32_out.at({b, h, m_start, 0}); } else { fp32_out_ptr = has_out_transpose ? &output_emb.at({b, m_start, h * head_size}) : &output_emb.at({b, h, m_start, 0}); } - T* v_ptr = is_bf16 ? &wv_scratch_b.at({b, h / h_each_group_len, 0}) + T* v_ptr = is_xf16 ? &wv_scratch_b.at({b, h / h_each_group_len, 0}) : &present_value.at({b, h / h_each_group_len, 0, 0}); wv_gemm_ptr->executeGemm(m_cnt < m_block_size, w_ptr, @@ -433,12 +433,12 @@ struct MHAKernel { fp32_out_ptr, wsp.data() + tid * wsp_size_per_thread, wv_scratch_a ? &wv_scratch_a.at({tid, 0}) : nullptr); - if (is_bf16) { + if (is_xf16) { if (has_out_transpose) { attn_memcpy2d_kernel(&fp32_out.at({b, m_start, h, 0}), &output_emb.at({b, m_start, h * head_size}), ov::element::f32, - ov::element::bf16, + precision_of::value, fp32_out.stride(1), output_emb.stride(1), head_size, @@ -447,7 +447,7 @@ struct MHAKernel { attn_memcpy2d_kernel(&fp32_out.at({b, h, m_start, 0}), &output_emb.at({b, h, m_start, 0}), ov::element::f32, - ov::element::bf16, + precision_of::value, 0, 0, m_cnt * head_size, @@ -1068,28 +1068,35 @@ void ScaledDotProductAttention::createPrimitive() { auto builder = [&](const ScaledDotProductAttentionKey& key) -> std::shared_ptr { std::shared_ptr executor = nullptr; - if (rtPrecision == ov::element::bf16) { #ifdef OPENVINO_ARCH_X86_64 + if (rtPrecision == ov::element::bf16) { executor = std::make_shared>(context); -#endif + } else if (rtPrecision == ov::element::f16) { + if (with_cpu_x86_avx512_core_fp16()) { + executor = std::make_shared>(context); + } else { + executor = std::make_shared>(context); + } } else { -#if defined(OV_CPU_WITH_ACL) - if (rtPrecision == ov::element::f16) - executor = std::make_shared>(context); - else - executor = std::make_shared>(context); -#elif defined(OV_CPU_WITH_MLAS) +#ifdef OV_CPU_WITH_MLAS executor = std::make_shared>(context); -#elif defined(OPENVINO_ARCH_X86_64) +#else if (with_cpu_x86_avx512_core()) { executor = std::make_shared>(context); } else { executor = std::make_shared>(context); } -#else - executor = std::make_shared>(context); #endif } +#elif defined(OV_CPU_WITH_ACL) + if (rtPrecision == ov::element::f16) { + executor = std::make_shared>(context); + } else { + executor = std::make_shared>(context); + } +#else + executor = std::make_shared>(context); +#endif return executor; }; diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index abf1ad8f283205..0e683482a97934 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -238,6 +238,17 @@ bool Transformations::fuse_type_to_fq(const std::shared_ptr& node, con return true; } +bool Transformations::fuse_type_to_pa(const std::shared_ptr& node, const precisions_map& precisions) { + auto pa = ov::as_type_ptr(node); + if (!pa) + return false; + // PagedAttentionExtension's 2nd output type should be kept f32. + // The reason is that the pagedattention node in CPU plugin hardcodes 2nd output type as f32. + // So, set f32 to the 2nd output type, which can avoid extra data type conversion during transformation. + pa->set_out_type(1, ov::element::f32); + return true; +} + bool Transformations::fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { auto convert = ov::as_type_ptr(node); if (!convert) @@ -391,7 +402,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) type_to_fuse_map fuse_map = {{ov::opset1::FakeQuantize::get_type_info_static(), fuse_type_to_fq}}; #else - type_to_fuse_map fuse_map = {}; + type_to_fuse_map fuse_map = {{ov::op::PagedAttentionExtension::get_type_info_static(), fuse_type_to_pa}}; #endif const bool keep_precision_sensitive_in_fp32 = true; CPU_REGISTER_PASS_COMMON(manager, diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h index 0b6a437f667747..33c26ab8aea9e4 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.h @@ -48,6 +48,7 @@ class Transformations { static bool fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions); static bool fuse_type_to_fq(const std::shared_ptr& node, const precisions_map& precisions); + static bool fuse_type_to_pa(const std::shared_ptr& node, const precisions_map& precisions); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp index eb6fdc2a6bfc3f..8a9212f8998f94 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp @@ -37,6 +37,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, ConcatSDPTest, ::testing::Combine(::testing::Values(ElementType::f16), ::testing::ValuesIn(inputShapes), + ::testing::Values(false), ::testing::Values(true, false)), ConcatSDPTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp index f4abaa03b7c28b..f5a7bfacfac99f 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp @@ -28,8 +28,9 @@ namespace test { std::string ConcatSDPTest::getTestCaseName(const testing::TestParamInfo& obj) { ElementType inType; std::vector inputShapes; - bool hasShapeof; - std::tie(inType, inputShapes, hasShapeof) = obj.param; + bool forceKVU8; + bool hasShapeOf; + std::tie(inType, inputShapes, forceKVU8, hasShapeOf) = obj.param; std::ostringstream result; result << "IS="; for (const auto& shape : inputShapes) { @@ -46,21 +47,24 @@ std::string ConcatSDPTest::getTestCaseName(const testing::TestParamInfo(gatherK); shapeof_v = std::make_shared(gatherV); } @@ -107,20 +111,20 @@ void ConcatSDPTest::SetUp() { pastv_assign->set_friendly_name("pastv_w"); ResultVector results{std::make_shared(add)}; - if (hasShapeOf) { + if (m_hasShapeOf) { results.push_back(std::make_shared(shapeof_k)); results.push_back(std::make_shared(shapeof_v)); } SinkVector sinks{pastk_assign, pastv_assign}; function = std::make_shared(results, sinks, inputParams, "ConcatSDP"); targetDevice = ov::test::utils::DEVICE_CPU; - functionRefs = function->clone(); pass::Manager manager; // decompose ScaledDotProductAttention manager.register_pass(); manager.run_passes(functionRefs); } + void ConcatSDPTest::generate_inputs(const std::vector& targetInputStaticShapes) { std::vector shapes(4); shapes[0] = targetInputStaticShapes[0]; @@ -129,6 +133,7 @@ void ConcatSDPTest::generate_inputs(const std::vector& targetInputSta shapes[3] = targetInputStaticShapes[1]; SubgraphBaseTest::generate_inputs(shapes); } + template void strided_iota(IT first, size_t n, T value, T stride) { for (size_t i = 0; i < n; i++) { @@ -136,6 +141,7 @@ void strided_iota(IT first, size_t n, T value, T stride) { value += stride; } } + void ConcatSDPTest::generate(int idx, const std::vector& targetInputStaticShapes) { inputs.clear(); auto create_input = [this] (std::shared_ptr param, ov::Shape shape, float val) { @@ -169,16 +175,19 @@ void ConcatSDPTest::generate(int idx, const std::vector& targetInputS create_input(function->get_parameters()[3], targetInputStaticShapes[1], idx + 4.0f); create_input(function->get_parameters()[4], ov::Shape{targetInputStaticShapes[0][0]}, idx + 0.0f); } + void ConcatSDPTest::prepare() { compile_model(); inferRequest = compiledModel.create_infer_request(); ASSERT_TRUE(inferRequest); } + void ConcatSDPTest::reset() { for (auto&& state : inferRequest.query_state()) { state.reset(); } } + std::vector ConcatSDPTest::run_test(std::shared_ptr model) { function = model; prepare(); @@ -201,6 +210,12 @@ std::vector ConcatSDPTest::run_test(std::shared_ptr model } TEST_P(ConcatSDPTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ElementType inType; + std::vector inputShapes; + bool forceKVU8; + bool hasShapeOf; + std::tie(inType, inputShapes, forceKVU8, hasShapeOf) = this->GetParam(); + auto actualOutputs = run_test(function); if (!hasShapeOf) { CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 1); @@ -216,9 +231,14 @@ TEST_P(ConcatSDPTest, CompareWithRefs) { } } } + + // the range of our result will exceed f16 max value and there may be 'inf'. In softmax, there is a step: + // v - max(v), if v is inf, the result of 'v-max(v)' will be nan + // use f32 as reference if (inType == ElementType::f16) { configuration["INFERENCE_PRECISION_HINT"] = "f32"; } + auto expectedOutputs = run_test(functionRefs); CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 0); for (size_t i = 0; i < actualOutputs.size(); i++) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp index 56fad11f53e600..ac59e48f496b3b 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp @@ -34,7 +34,7 @@ namespace test { template void strided_iota(IT first, size_t n, T value, T stride); -typedef std::tuple, bool> ConcatSDPTestParams; +typedef std::tuple, bool, bool> ConcatSDPTestParams; class ConcatSDPTest : public testing::WithParamInterface, @@ -46,7 +46,8 @@ class ConcatSDPTest : void prepare(); void reset(); std::vector run_test(std::shared_ptr model); - bool hasShapeOf; + bool m_forceKVU8; + bool m_hasShapeOf; protected: void generate_inputs(const std::vector& targetInputStaticShapes) override; void SetUp() override; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp index bc73de76999daf..d05e7840562191 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_multiple_query_sdp.cpp @@ -18,6 +18,7 @@ namespace test { using InputShapeAndTransposeOrder = std::pair, std::vector>; using ConcatMultiQuerySDPParams = std::tuple; // Subgraph: @@ -52,8 +53,10 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterface& obj) { ElementType qkvType; InputShapeAndTransposeOrder inputShapeAndOrders; - bool hasShapeof; - std::tie(qkvType, inputShapeAndOrders, hasShapeof) = obj.param; + bool forceKVU8; + bool hasShapeOf; + std::tie(qkvType, inputShapeAndOrders, forceKVU8, hasShapeOf) = obj.param; + ElementType kvCacheType = forceKVU8 ? ov::element::Type_t::u8 : qkvType; std::ostringstream result; std::vector& inputShapes = inputShapeAndOrders.first; std::vector& transposeOrder = inputShapeAndOrders.second; @@ -71,8 +74,9 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterfaceGetParam(); + std::tie(qkvType, inputShapeAndOrders, forceKVU8, hasShapeOf) = this->GetParam(); std::vector& inputShapes = inputShapeAndOrders.first; std::vector& transposeOrder = inputShapeAndOrders.second; targetDevice = ov::test::utils::DEVICE_CPU; rel_threshold = 1e-2f; configuration[ov::hint::inference_precision.name()] = ov::element::f32; - if (qkvType == ElementType::bf16) { - configuration[ov::hint::inference_precision.name()] = ov::element::bf16; + if (qkvType == ElementType::bf16 || qkvType == ElementType::f16) { + configuration[ov::hint::inference_precision.name()] = ov::element::Type(qkvType).get_type_name(); rel_threshold = 0.01f; } + if (forceKVU8) + configuration["KV_CACHE_PRECISION"] = "u8"; init_input_shapes(inputShapes); ov::ParameterVector inputParams; // q,k,v @@ -229,6 +236,10 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterface(t.data()), t.get_size(), val, 0.1f); inputs.insert({param, t}); + } else if (param->get_element_type() == element::f16) { + ov::Tensor t{ov::element::f16, shape}; + strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); + inputs.insert({param, t}); } else { ov::Tensor t{ov::element::bf16, shape}; strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); @@ -269,6 +280,10 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterface b.get_name(); + }); for (std::string name : {"pastk", "pastv"}) { auto itr = std::find_if(states.begin(), states.end(), [&](const ov::VariableState& state) { return name == state.get_name(); @@ -290,17 +305,20 @@ class ConcatMultiQuerySDPTest : public testing::WithParamInterfaceGetParam(); - if (qkvType == ElementType::bf16 && !ov::with_cpu_x86_bfloat16()) - GTEST_SKIP(); + std::tie(qkvType, inputShapeAndOrders, forceKVU8, hasShapeOf) = this->GetParam(); auto actualOutputs = run_test(function); CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 1); CheckNumberOfNodesWithType(compiledModel, "Concatenation", 0); CheckNumberOfNodesWithType(compiledModel, "Reorder", 0); CheckNumberOfNodesWithType(compiledModel, "Transpose", 1); CheckNumberOfNodesWithType(compiledModel, "Gather", 0); + // use f32 as reference + if (qkvType == ElementType::f16) { + configuration["INFERENCE_PRECISION_HINT"] = "f32"; + } auto expectedOutputs = run_test(functionRefs); CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 0); for (size_t i = 0; i < actualOutputs.size(); i++) { @@ -384,8 +402,9 @@ const std::vector inputShapeAndReorders = {{ INSTANTIATE_TEST_SUITE_P(smoke_ConcatMultiQuerySDPTest, ConcatMultiQuerySDPTest, - ::testing::Combine(::testing::Values(ElementType::f32, ElementType::bf16), + ::testing::Combine(::testing::Values(ElementType::f32, ElementType::bf16, ElementType::f16), ::testing::ValuesIn(inputShapeAndReorders), + ::testing::Values(true, false), ::testing::Values(true, false)), ConcatMultiQuerySDPTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp index f9971a7fe9ce16..57927434524891 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp @@ -37,8 +37,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, ConcatSDPTest, ::testing::Combine(::testing::Values(ElementType::f32), ::testing::ValuesIn(inputShapes), + ::testing::Values(true, false), ::testing::Values(true, false)), ConcatSDPTest::getTestCaseName); + } // namespace } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp index 839370d3a97728..65bc379c78b540 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_transpose_sdp_transpose.cpp @@ -253,6 +253,10 @@ class ConcatSDPTransposeTest : public ConcatSDPTransposeTestBase { outputs.push_back(copy); } auto states = inferRequest.query_state(); + // k, v may be in any order + std::sort(states.begin(), states.end(), [] (VariableState& a, VariableState& b) { + return a.get_name() > b.get_name(); + }); for (std::string name : {"pastk", "pastv"}) { auto itr = std::find_if(states.begin(), states.end(), [&](const ov::VariableState& state) { return name == state.get_name(); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp new file mode 100644 index 00000000000000..93c99048fec349 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "openvino/opsets/opset13.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" + +#include "custom/subgraph_tests/src/classes/concat_sdp.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "utils/cpu_test_utils.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace CPUTestUtils; + +namespace ov { +namespace test { + +namespace { +const std::vector> inputShapes = { + // greedy search + { + // B, H, L1, S + {{1, 8, -1, 64}, {{1, 8, 10, 64}, {1, 8, 1, 64}, {1, 8, 1, 64}, {1, 8, 20, 64}, {1, 8, 1, 64}}}, + // B, H, L0, S + {{1, 8, -1, 64}, {{1, 8, 0, 64}, {1, 8, 10, 64}, {1, 8, 11, 64}, {1, 8, 12, 64}, {1, 8, 32, 64}}}, + }, + // beam search + { + // B, H, L1, S + {{-1, 8, -1, 64}, {{4, 8, 10, 64}, {4, 8, 1, 64}, {4, 8, 1, 64}, {4, 8, 1, 64}, {4, 8, 1, 64}}}, + // B, H, L0, S + {{-1, 8, -1, 64}, {{4, 8, 0, 64}, {4, 8, 10, 64}, {4, 8, 11, 64}, {4, 8, 12, 64}, {4, 8, 13, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, + ConcatSDPTest, + ::testing::Combine(::testing::Values(ElementType::bf16, ElementType::f16), + ::testing::ValuesIn(inputShapes), + ::testing::Values(true, false), + ::testing::Values(true, false)), + ConcatSDPTest::getTestCaseName); + +} // namespace + +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 2a8f49b5dcfe0e..e7c006ab97427f 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -358,6 +358,8 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_VariableState/OVInferRequestVariableStateTest.*)"); // Issue: 141705 retVector.emplace_back(R"(.*smoke_arm_Deconv_2D_Planar_FP16/DeconvolutionLayerCPUTest.*INFERENCE_PRECISION_HINT=f16.*)"); + // Issue: 154882 + retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)"); #endif #if defined(OPENVINO_ARCH_ARM) @@ -529,6 +531,8 @@ std::vector disabledTestPatterns() { if (!ov::with_cpu_x86_avx512_core_fp16()) { // Skip fp16 tests for paltforms that don't support fp16 precision retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); + retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)"); + retVector.emplace_back(R"(.*ConcatSDPTest.*f16.*)"); } #elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) { @@ -560,6 +564,7 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)"); retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)"); retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)"); + retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)"); } // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems if (ov::with_cpu_x86_avx512_core_amx()) { diff --git a/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp b/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp index 35a29f97452d4b..9ae58561d4dfcd 100644 --- a/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/brgemm_executor_test.cpp @@ -28,18 +28,19 @@ void run_test(ov::element::Type rtPrec) { size_t K = 33; ov::intel_cpu::BrgemmKernel gemm(M, N, K, K, N, N, false, rtPrec); size_t nthr = 8; - bool is_bf16 = (rtPrec == ov::element::bf16); + bool is_f32 = (rtPrec == ov::element::f32); std::vector a_data(M * K, (1.0f/33)); std::vector b_data(K * N, 4.0f); std::vector c_data(nthr * M * N, 0.0f); std::vector wsp(nthr * 4 * 1024, 0.0f); - std::vector b_scracth(gemm.get_scratch_b_size(), 0.0f); - std::vector a_scracth(gemm.get_scratch_a_size(), 0.0f); - if (is_bf16) - gemm.copy_buffer_b(b_data.data(), b_scracth.data()); + std::vector a_scratch(gemm.get_scratch_a_size(), 0.0f); + std::vector b_scratch(gemm.get_scratch_b_size(), 0.0f); + if (!is_f32) { + gemm.copy_buffer_b(b_data.data(), b_scratch.data()); + } auto m_block_size = gemm.get_mblk_size(); auto m_blocks = (M + gemm.get_mblk_size() - 1) / m_block_size; - T* b_ptr = is_bf16 ? b_scracth.data() : b_data.data(); + void* b_ptr = !is_f32 ? static_cast(b_scratch.data()) : static_cast(b_data.data()); ov::parallel_for2d(nthr, m_blocks, [&](size_t i, size_t m_blk) { auto m_start = m_blk * m_block_size; auto m_end = std::min(m_start + m_block_size, M); @@ -49,7 +50,7 @@ void run_test(ov::element::Type rtPrec) { b_ptr, c_data.data() + i * M * N + m_start * N, wsp.data() + i * 4 * 1024, - a_scracth.data()); + a_scratch.data()); }); ov::parallel_for(nthr, [&](size_t i){ for (size_t m = 0; m < M; m++) { @@ -73,9 +74,13 @@ TEST_P(BrgemmKernelTest, simpleGemmTest) { GTEST_SKIP(); if (rtPrec == ov::element::f32 && !ov::with_cpu_x86_avx512_core()) GTEST_SKIP(); + if (rtPrec == ov::element::f16 && !ov::with_cpu_x86_avx512_core_fp16()) + GTEST_SKIP(); if (rtPrec == ov::element::bf16) { run_test(rtPrec); + } else if (rtPrec == ov::element::f16) { + run_test(rtPrec); } else { run_test(rtPrec); } @@ -83,6 +88,6 @@ TEST_P(BrgemmKernelTest, simpleGemmTest) { INSTANTIATE_TEST_SUITE_P(BrgemmKernelUnitTest, BrgemmKernelTest, - ::testing::Values(ov::element::f32, ov::element::bf16), + ::testing::Values(ov::element::f32, ov::element::bf16, ov::element::f16), BrgemmKernelTest::getTestCaseName); } // namespace brgemmUnitTest From dc7f4aee28f12afd73138fa7fae7452c2fef7802 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 15 Oct 2024 09:22:08 +0200 Subject: [PATCH 012/112] [DOCS] Fixed tests for Ovms folder path (#27049) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/articles_en/about-openvino/performance-benchmarks.rst | 2 +- .../openvino-ecosystem/openvino-security-add-on.rst | 2 +- docs/articles_en/openvino-workflow.rst | 2 +- .../openvino-workflow/running-inference/stateful-models.rst | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index aa60c44a2ad5c8..40b94210f6c43d 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -18,7 +18,7 @@ Performance Benchmarks This page presents benchmark results for `Intel® Distribution of OpenVINO™ toolkit `__ -and :doc:`OpenVINO Model Server <../ovms_what_is_openvino_model_server>`, for a representative +and :doc:`OpenVINO Model Server <../openvino-workflow/model-server/ovms_what_is_openvino_model_server>`, for a representative selection of public neural networks and Intel® devices. The results may help you decide which hardware to use in your applications or plan AI workload for the hardware you have already implemented in your solutions. Click the buttons below to see the chosen benchmark data. diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst index ea76392be4e2e6..2d5598a5eb8e9d 100644 --- a/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-security-add-on.rst @@ -17,7 +17,7 @@ In this release, one person performs the role of both the Model Developer and th Overview ######## -The OpenVINO™ Security Add-on works with the :doc:`OpenVINO™ Model Server <../../ovms_what_is_openvino_model_server>` on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. +The OpenVINO™ Security Add-on works with the :doc:`OpenVINO™ Model Server <../../openvino-workflow/model-server/ovms_what_is_openvino_model_server>` on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. The OpenVINO™ Security Add-on consists of three components that run in Kernel-based Virtual Machines (KVMs). These components provide a way to run security-sensitive operations in an isolated environment. A brief description of the three components are as follows. Click each triangled line for more information about each. diff --git a/docs/articles_en/openvino-workflow.rst b/docs/articles_en/openvino-workflow.rst index 0dda91f91fb552..942d6ed4b13a96 100644 --- a/docs/articles_en/openvino-workflow.rst +++ b/docs/articles_en/openvino-workflow.rst @@ -89,7 +89,7 @@ OpenVINO uses the following functions for reading, converting, and saving models | Deploy a model locally, reading the file directly from your application and utilizing about-openvino/additional-resources available to the system. | Deployment on a local system uses the steps described in the section on running inference. -| :doc:`Deployment Option 2. Using Model Server ` +| :doc:`Deployment Option 2. Using Model Server ` | Deploy a model remotely, connecting your application to an inference server and utilizing external about-openvino/additional-resources, with no impact on the app's performance. | Deployment on OpenVINO Model Server is quick and does not require any additional steps described in the section on running inference. diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst index 249fc8c4884cc1..86788b20249a3f 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models.rst @@ -140,4 +140,4 @@ sequences. You can find more examples demonstrating how to work with states in other articles: * `LLM Chatbot notebook <../../notebooks/stable-zephyr-3b-chatbot-with-output.html>`__ -* :doc:`Serving Stateful Models with OpenVINO Model Server <../../ovms_docs_stateful_models>` +* :doc:`Serving Stateful Models with OpenVINO Model Server <../../openvino-workflow/model-server/ovms_docs_stateful_models>` From 06eb81475a6274707f01a7a9ce1ad85c2f277df2 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 15 Oct 2024 09:57:03 +0200 Subject: [PATCH 013/112] [CPU] Optimize MemoryInput/Output for empty shapes (#27015) ### Details: Add a short path for "empty" state tensors processing in the MemoryInput and MemoryOutput nodes. ### Tickets: - CVS-152850 - CVS-153035 --- src/plugins/intel_cpu/src/node.cpp | 9 ++-- src/plugins/intel_cpu/src/nodes/memory.cpp | 55 +++++++++++++++------- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 34e48dea50cbfa..7c23d55fc4147a 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -655,7 +655,7 @@ std::vector Node::getChildEdgesAtPort(int inputNum) const { if (!edge) OPENVINO_THROW("Node ", getName(), " contains dead weak ptr"); if (edge->getInputNum() == inputNum) - res.push_back(edge); + res.emplace_back(std::move(edge)); } return res; } @@ -793,11 +793,10 @@ void Node::redefineOutputMemory(const std::vector &newOutputShapes) void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_shape) { const auto edges = getChildEdgesAtPort(port); + static const VectorDims single_element_shape = {1}; + // avoid 0D shape incompatible - auto new_shape = new_output_shape; - if (new_shape.empty()) { - new_shape.push_back(1); - } + const auto& new_shape = new_output_shape.empty() ? single_element_shape : new_output_shape; const auto& curr_desc = edges[0]->getMemory().getDesc(); if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) { diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index e66b148c6f99ee..74a3b670dad126 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -300,21 +300,27 @@ void MemoryOutput::runStatic(dnnl::stream strm) { void MemoryOutput::runDynamic(dnnl::stream strm) { //first we have to resize the output memory auto inputMem = getSrcMemoryAtPort(0); - const auto& newDims = inputMem->getStaticDims(); - OPENVINO_ASSERT(extMemDesc, - "MemoryOutput ", - getName(), - " uninitialized assigned memory"); - - auto newExternDesc = extMemDesc->cloneWithNewDims(newDims); OPENVINO_ASSERT(assignedMem, "MemoryOutput ", getName(), " uninitialized assigned memory"); - assignedMem->redefineDesc(newExternDesc); - runStatic(strm); + const auto& newShape = inputMem->getShape(); + const auto& stateShape = assignedMem->getShape(); + + if (stateShape.isDynamic() || stateShape.getStaticDims() != newShape.getStaticDims()) { + OPENVINO_ASSERT(extMemDesc, + "MemoryOutput ", + getName(), + " uninitialized assigned memory"); + auto newExternDesc = extMemDesc->cloneWithNewDims(newShape.getStaticDims()); + assignedMem->redefineDesc(newExternDesc); + } + + if (!newShape.hasZeroDims()) { // no need to copy data for empty tensor + runStatic(strm); + } } bool MemoryOutputStub::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { @@ -593,31 +599,44 @@ void MemoryInput::runDynamic(dnnl::stream strm) { getName(), " assigned state has null memory ptr"); - // check whether we can share memory block - const auto& stateDims = assignedMem->getStaticDims(); - const bool hasZeroDims = std::count(std::begin(stateDims), std::end(stateDims), 0) > 0; - auto internDesc = getBaseMemDescAtOutputPort(0)->cloneWithNewDims(stateDims, hasZeroDims); - OPENVINO_ASSERT(memBlock, "MemoryInput ", getName(), " has uninitialized memory block."); + // check whether we can share memory block + const auto& shape = assignedMem->getShape(); + const bool hasZeroDims = shape.hasZeroDims(); + const bool processInitGraph = needInitGraphProcessing(); + const auto& stateDims = shape.getStaticDims(); + + if (hasZeroDims && !processInitGraph) { + // fast track as we don't really need to share memory and transfer any data for empty tensors + memBlock->reset(); + redefineOutputMemory(0, stateDims); + return; + } + + auto dst = getDstMemoryAtPort(0); + auto currentOutputDesc = dst->getDescPtr(); + + auto internDesc = currentOutputDesc->isDefined() && (currentOutputDesc->getShape().getStaticDims() == stateDims) + ? currentOutputDesc + : getBaseMemDescAtOutputPort(0)->cloneWithNewDims(stateDims, hasZeroDims); + if (internDesc->isCompatible(assignedMem->getDesc())) { memBlock->setMemBlock(assignedMem->getMemoryBlock()); } else { memBlock->reset(); } - const bool processInitGraph = needInitGraphProcessing(); //reshape output const auto& newDims = processInitGraph ? getSrcMemoryAtPort(0)->getStaticDims() : stateDims; - redefineOutputMemory({newDims}); + redefineOutputMemory(0, newDims); //copy data when necessary auto src = processInitGraph ? getSrcMemoryAtPort(0) : assignedMem; - auto dst = getDstMemoryAtPort(0); if (src->getData() != dst->getData()) { dst->load(*src); } @@ -847,6 +866,6 @@ void MemoryInputSDPA::resolveInPlaceEdges(Edge::LOOK look) { } } -} // namespace node +} // namespace node } // namespace intel_cpu } // namespace ov From 96032f3b532e7686739d767bccd811c1056978ee Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Tue, 15 Oct 2024 10:07:44 +0200 Subject: [PATCH 014/112] [Core/Ref] Fix Proposal swap_xy selection (#26986) ### Details: - Fixes Proposal references implementation image shape setup. - Adds appropriate test. ### Tickets: - CVS-100566 --- .../include/openvino/reference/proposal.hpp | 14 +- .../functional/op_reference/proposal.cpp | 217 +++++++++++++----- 2 files changed, 161 insertions(+), 70 deletions(-) diff --git a/src/core/reference/include/openvino/reference/proposal.hpp b/src/core/reference/include/openvino/reference/proposal.hpp index f80faafc5efd27..a2d727b6156aea 100644 --- a/src/core/reference/include/openvino/reference/proposal.hpp +++ b/src/core/reference/include/openvino/reference/proposal.hpp @@ -319,6 +319,11 @@ static void proposal_exec(const T* class_probs, const Shape& output_shape, const Shape& out_probs_shape, const op::v0::Proposal::Attributes& attrs) { + const auto batch_num = static_cast(class_probs_shape[0]); + const auto coordinates_offset = attrs.framework == "tensorflow" ? 0.f : 1.f; + const auto initial_clip = attrs.framework == "tensorflow"; + const auto swap_xy = attrs.framework == "tensorflow"; + const T* p_bottom_item = class_probs; const T* p_d_anchor_item = bbox_deltas; T* p_roi_item = output; @@ -328,8 +333,8 @@ static void proposal_exec(const T* class_probs, const unsigned int bottom_H = static_cast(class_probs_shape[2]); const unsigned int bottom_W = static_cast(class_probs_shape[3]); // input image height and width - const T img_H = image_shape[0]; - const T img_W = image_shape[1]; + const T img_H = image_shape[swap_xy ? 1 : 0]; + const T img_W = image_shape[swap_xy ? 0 : 1]; // scale factor for H and W, depends on shape of image_shape // can be split into H and W {image_height, image_width, scale_height, // scale_width} @@ -350,11 +355,6 @@ static void proposal_exec(const T* class_probs, std::vector anchors = generate_anchors(attrs, anchor_count); - unsigned int batch_num = static_cast(class_probs_shape[0]); - float coordinates_offset = attrs.framework == "tensorflow" ? 0.0f : 1.0f; - bool initial_clip = attrs.framework == "tensorflow"; - bool swap_xy = attrs.framework == "tensorflow"; - for (unsigned int batch_idx = 0; batch_idx < batch_num; ++batch_idx) { std::fill(roi_indices.begin(), roi_indices.end(), 0); num_rois = 0; diff --git a/src/plugins/template/tests/functional/op_reference/proposal.cpp b/src/plugins/template/tests/functional/op_reference/proposal.cpp index aa49a6b7330166..435a279588af07 100644 --- a/src/plugins/template/tests/functional/op_reference/proposal.cpp +++ b/src/plugins/template/tests/functional/op_reference/proposal.cpp @@ -88,10 +88,6 @@ struct ProposalV4Params { const int feature_stride, const int pre_nms_topn, const int post_nms_topn, - const size_t image_shape_num, - const size_t image_h, - const size_t image_w, - const size_t image_z, const std::vector& ratios, const std::vector& scales, const size_t batch_size, @@ -101,19 +97,22 @@ struct ProposalV4Params { const ov::element::Type& iType, const std::vector& clsScoreValues, const std::vector& bboxPredValues, + const std::vector& inputInfoValues, const std::vector& proposalValues, const std::vector& probsValues, + const std::string& framework, const std::string& test_name = "") : inType(iType), outType(iType), clsScoreData(CreateTensor(iType, clsScoreValues)), bboxPredData(CreateTensor(iType, bboxPredValues)), + imageInfoData(CreateTensor(iType, inputInfoValues)), refProposalData(CreateTensor(Shape{batch_size * post_nms_topn, 5}, iType, proposalValues)), refProbsData(CreateTensor(Shape{batch_size * post_nms_topn}, iType, probsValues)), testcaseName(test_name) { clsScoreShape = Shape{batch_size, anchor_num * 2, feat_map_height, feat_map_width}; bboxPredShape = Shape{batch_size, anchor_num * 4, feat_map_height, feat_map_width}; - imageShapeShape = Shape{image_shape_num}; + imageInfoShape = Shape{inputInfoValues.size()}; attrs.base_size = min_bbox_size; attrs.min_size = min_bbox_size; @@ -129,25 +128,19 @@ struct ProposalV4Params { attrs.normalize = false; attrs.box_size_scale = 1.0f; attrs.box_coordinate_scale = 1.0f; - attrs.framework = ""; + attrs.framework = framework; attrs.infer_probs = true; - - std::vector inputShapeValues; - inputShapeValues.push_back(static_cast(image_h)); - inputShapeValues.push_back(static_cast(image_w)); - inputShapeValues.push_back(static_cast(image_z)); - imageShapeData = CreateTensor(iType, inputShapeValues); } ov::op::v4::Proposal::Attributes attrs; ov::PartialShape clsScoreShape; ov::PartialShape bboxPredShape; - ov::PartialShape imageShapeShape; + ov::PartialShape imageInfoShape; ov::element::Type inType; ov::element::Type outType; ov::Tensor clsScoreData; ov::Tensor bboxPredData; - ov::Tensor imageShapeData; + ov::Tensor imageInfoData; ov::Tensor refProposalData; ov::Tensor refProbsData; std::string testcaseName; @@ -192,7 +185,7 @@ class ReferenceProposalV4LayerTest : public testing::TestWithParam& obj) { @@ -200,9 +193,11 @@ class ReferenceProposalV4LayerTest : public testing::TestWithParam CreateFunction(const ProposalV4Params& params) { const auto class_probs_param = std::make_shared(params.inType, params.clsScoreShape); const auto bbox_deltas_param = std::make_shared(params.inType, params.bboxPredShape); - const auto image_shape_param = std::make_shared(params.inType, params.imageShapeShape); + const auto image_shape_param = std::make_shared(params.inType, params.imageInfoShape); const auto Proposal = std::make_shared(class_probs_param, bbox_deltas_param, image_shape_param, params.attrs); return std::make_shared(Proposal->outputs(), @@ -235,21 +230,21 @@ std::vector generateProposalV1Params() { std::vector proposalV1Params{ ProposalV1Params( - 0.7f, - 16, - 16, - 6000, - 10, // iou_threshold, min_nnox_size, feature_stride,pre_nms_topn, post_nms_topn - 3, - 210, - 350, - 1, // image_shape_num, image_h, image_w, image_z + 0.7f, // iou_threshold + 16, // min_nnox_size + 16, // feature_stride + 6000, // pre_nms_topn + 10, // post_nms_topn + 3, // image_shape_num + 210, // image_h + 350, // image_w + 1, // image_z {0.5f}, // ratios {32.0f}, // scales - 1, - 1, - 10, - 10, // batch_size, anchor_num, feat_map_height, feat_map_width + 1, // batch_size + 1, // anchor_num + 10, // feat_map_height + 10, // feat_map_width IN_ET, std::vector{ 0.000240f, 0.003802f, 0.111432f, 0.000503f, 0.007887f, 0.144701f, 0.399074f, 0.004680f, // 0 @@ -351,22 +346,18 @@ std::vector generateProposalV4Params() { using T = typename element_type_traits::value_type; std::vector proposalV4Params{ - ProposalV4Params( - 0.7f, - 16, - 16, - 6000, - 10, // iou_threshold, min_nnox_size, feature_stride,pre_nms_topn, post_nms_topn - 3, - 210, - 350, - 1, // image_shape_num, image_h, image_w, image_z + ProposalV4Params{ + 0.7f, // iou_threshold + 16, // min_bbox_size + 16, // feature_stride + 6000, // pre_nms_topn + 10, // post_nms_topn {0.5f}, // ratios {32.0f}, // scales - 1, - 1, - 10, - 10, // batch_size, anchor_num, feat_map_height, feat_map_width + 1, // batch_size + 1, // anchor_num + 10, // feat_map_height + 10, // feat_map_width IN_ET, std::vector{ 0.000240f, 0.003802f, 0.111432f, 0.000503f, 0.007887f, 0.144701f, 0.399074f, 0.004680f, // 0 @@ -447,6 +438,7 @@ std::vector generateProposalV4Params() { 0.026623f, 0.117951f, -0.076234f, -0.811997f, 0.01301f, 0.020042f, 0.173756f, -0.036191f, -0.068887f, 0.0229f, 0.245465f, 0.214282f, -0.011054f, 0.132813f, 0.241014f, -0.148763f, }, + std::vector{210, 350, 1}, std::vector{ 0.000000f, 0.000000f, 0.000000f, 349.000000f, 209.000000f, // 0 0.000000f, 0.000000f, 0.000000f, 237.625443f, 209.000000f, // 5 @@ -470,36 +462,135 @@ std::vector generateProposalV4Params() { 0.0008570f, 0.0002190f, 0.0000000f, - }), + }, + ""}, + ProposalV4Params{ + 0.7f, // iou_threshold + 16, // min_bbox_size + 16, // feature_stride + 6000, // pre_nms_topn + 10, // post_nms_topn + {0.5f}, // ratios + {32.0f}, // scales + 1, // batch_size + 1, // anchor_num + 10, // feat_map_height + 10, // feat_map_width + IN_ET, + std::vector{ + 0.000240f, 0.003802f, 0.111432f, 0.000503f, 0.007887f, 0.144701f, 0.399074f, 0.004680f, // 0 + 0.139741f, 0.002386f, 0.030003f, 0.276552f, 0.000267f, 0.022971f, 0.287953f, 0.050235f, // 8 + 0.002580f, 0.206311f, 0.000146f, 0.009656f, 0.175462f, 0.000147f, 0.014718f, 0.272348f, // 16 + 0.065199f, 0.003286f, 0.185335f, 0.003720f, 0.025932f, 0.251401f, 0.001465f, 0.090447f, // 24 + 0.488469f, 0.092259f, 0.019306f, 0.379091f, 0.005311f, 0.010369f, 0.087615f, 0.042003f, // 32 + 0.073871f, 0.416763f, 0.044282f, 0.069776f, 0.313032f, 0.000457f, 0.017346f, 0.089762f, // 40 + 0.000820f, 0.103986f, 0.367993f, 0.026315f, 0.035701f, 0.299252f, 0.000135f, 0.017825f, // 48 + 0.150119f, 0.000076f, 0.050511f, 0.269601f, 0.026680f, 0.003541f, 0.189765f, 0.000051f, // 56 + 0.004315f, 0.193150f, 0.000032f, 0.007254f, 0.185557f, 0.051526f, 0.000657f, 0.117579f, // 64 + 0.000115f, 0.010179f, 0.293187f, 0.000025f, 0.006505f, 0.175345f, 0.032587f, 0.000469f, // 72 + 0.098443f, 0.000121f, 0.009600f, 0.322782f, 0.000032f, 0.004543f, 0.166860f, 0.044911f, // 80 + 0.000187f, 0.102691f, 0.000242f, 0.005502f, 0.107865f, 0.000191f, 0.005336f, 0.086893f, // 88 + 0.078422f, 0.000345f, 0.079096f, 0.000281f, 0.016388f, 0.214072f, 0.000107f, 0.012027f, // 96 + 0.192754f, 0.049531f, 0.000386f, 0.149893f, 0.000374f, 0.016965f, 0.204781f, 0.000163f, // 104 + 0.016272f, 0.215277f, 0.032298f, 0.000857f, 0.133426f, 0.000614f, 0.020215f, 0.165789f, // 112 + 0.000225f, 0.036951f, 0.262195f, 0.087675f, 0.004596f, 0.147764f, 0.000219f, 0.010502f, // 120 + 0.163394f, 0.000152f, 0.023116f, 0.241702f, 0.081800f, 0.002197f, 0.146637f, 0.000193f, // 128 + 0.012017f, 0.133497f, 0.000375f, 0.028605f, 0.309179f, 0.065962f, 0.005508f, 0.155530f, // 136 + 0.000186f, 0.004540f, 0.079319f, 0.000799f, 0.031003f, 0.303045f, 0.051473f, 0.017770f, // 144 + 0.206188f, 0.000202f, 0.004291f, 0.061095f, 0.001109f, 0.018094f, 0.156639f, 0.026062f, // 152 + 0.005270f, 0.148651f, 0.000026f, 0.007300f, 0.096013f, 0.000383f, 0.022134f, 0.129511f, // 160 + 0.080882f, 0.003416f, 0.129922f, 0.000037f, 0.010040f, 0.130007f, 0.000116f, 0.014904f, // 168 + 0.171423f, 0.082893f, 0.000921f, 0.154976f, 0.000142f, 0.016552f, 0.209696f, 0.000227f, // 176 + 0.022418f, 0.228501f, 0.111712f, 0.001987f, 0.158164f, 0.001200f, 0.027049f, 0.308222f, // 184 + 0.001366f, 0.038146f, 0.287945f, 0.072526f, 0.016064f, 0.257895f, 0.000595f, 0.016962f, // 192 + }, + std::vector{ + 0.006756f, -0.055635f, 0.030843f, 0.007482f, 0.009056f, -0.041824f, 0.119722f, 0.168988f, + 0.002822f, 0.039733f, 0.109005f, 0.245152f, -0.013196f, -0.018222f, -0.170122f, -0.374904f, + -0.005455f, -0.034059f, -0.006787f, 0.072005f, -0.017933f, -0.007358f, 0.034149f, 0.123846f, + 0.128319f, 0.016107f, -0.615487f, -1.235094f, -0.024253f, -0.019406f, 0.134142f, 0.157853f, + -0.021119f, 0.007383f, 0.089365f, 0.092854f, 0.062491f, 0.002366f, 0.122464f, -0.003326f, + 0.015468f, -0.034088f, 0.079009f, 0.075483f, 0.011972f, 0.042427f, 0.106865f, 0.158754f, + 0.071211f, -0.034009f, 0.007985f, -0.441477f, 0.009046f, -0.028515f, 0.095372f, 0.119598f, + -0.007553f, -0.0072f, 0.105072f, 0.084314f, 0.23268f, -0.02906f, -0.408454f, -1.13439f, + 0.016202f, -0.037859f, 0.130873f, 0.129652f, 0.002064f, -0.011969f, 0.171623f, 0.050218f, + 0.113831f, 0.028922f, 0.017785f, 0.059708f, 0.037658f, -0.011245f, 0.097197f, 0.137491f, + 0.024218f, 0.04739f, 0.091978f, 0.217333f, 0.088418f, -0.004662f, -0.095168f, -0.397928f, + 0.02639f, -0.008501f, 0.068487f, 0.108465f, 0.020069f, 0.018829f, 0.040206f, 0.068473f, + 0.226458f, -0.072871f, -0.672384f, -1.447558f, 0.039598f, 0.017471f, 0.187288f, 0.08409f, + 0.017152f, -0.00516f, 0.183419f, 0.068469f, 0.063944f, 0.160725f, -0.022493f, -0.132291f, + 0.010542f, 0.036318f, 0.074042f, -0.013323f, 0.00808f, 0.060365f, 0.120566f, 0.21866f, + 0.046324f, 0.088741f, 0.029469f, -0.517183f, 0.00917f, 0.011915f, 0.053674f, 0.140168f, + 0.0033f, 0.022759f, -0.006196f, 0.063839f, 0.083726f, -0.088385f, -0.57208f, -1.454211f, + 0.020655f, 0.010788f, 0.134951f, 0.109709f, 0.015445f, -0.015363f, 0.109153f, 0.051209f, + 0.024297f, 0.139126f, -0.12358f, -0.127979f, 0.004587f, 0.004751f, 0.047292f, 0.027066f, + 0.011003f, 0.069887f, 0.117052f, 0.267419f, 0.039306f, 0.077584f, 0.02579f, -0.496149f, + -0.005569f, 0.015494f, -0.011662f, 0.105549f, -0.007015f, 0.031984f, -0.075742f, 0.0852f, + 0.023886f, -0.053107f, -0.325533f, -1.329066f, 0.004688f, 0.034501f, 0.089317f, 0.042463f, + 0.004212f, -0.015128f, 0.00892f, 0.028266f, 0.009997f, 0.157822f, 0.020116f, -0.142337f, + 0.008199f, 0.046564f, 0.083014f, 0.046307f, 0.006771f, 0.084997f, 0.141935f, 0.228339f, + -0.020308f, 0.077745f, -0.018319f, -0.522311f, 0.010432f, 0.024641f, 0.020571f, 0.097148f, + 0.002064f, 0.035053f, -0.121995f, 0.012222f, -0.030779f, 0.100481f, -0.331737f, -1.257669f, + -0.013079f, 0.021227f, 0.159949f, 0.120097f, 0.005765f, -0.012335f, -0.005268f, 0.042067f, + -0.043972f, 0.102556f, 0.180494f, -0.084721f, -0.011962f, 0.031302f, 0.112511f, 0.027557f, + -0.002085f, 0.082978f, 0.149409f, 0.195091f, -0.033731f, 0.019861f, -0.064047f, -0.471328f, + -0.004093f, 0.016803f, 0.044635f, 0.058912f, -0.018735f, 0.035536f, -0.050373f, -0.002794f, + -0.086705f, 0.038435f, -0.301466f, -1.071246f, -0.028247f, 0.018984f, 0.254702f, 0.141142f, + -0.017522f, 0.014843f, 0.079391f, 0.079662f, -0.051204f, 0.048419f, 0.235604f, -0.185797f, + -0.019569f, 0.02678f, 0.162507f, 0.046435f, -0.004606f, 0.08806f, 0.18634f, 0.193957f, + -0.024333f, -0.01298f, -0.17977f, -0.65881f, -0.003778f, 0.007418f, 0.065439f, 0.104549f, + -0.027706f, 0.03301f, 0.057492f, 0.032019f, -0.135337f, 0.000269f, -0.250203f, -1.181688f, + -0.027022f, -0.006755f, 0.206848f, 0.129268f, -0.003529f, 0.013445f, 0.181484f, 0.139955f, + -0.036587f, 0.065824f, 0.288751f, -0.110813f, -0.015578f, 0.044818f, 0.17756f, 0.006914f, + 0.002329f, 0.068982f, 0.189079f, 0.184253f, 0.00301f, -0.039168f, -0.010855f, -0.393254f, + 0.000028f, 0.001906f, 0.07217f, 0.063305f, -0.026144f, 0.028842f, 0.139149f, 0.023377f, + 0.023362f, 0.023559f, -0.145386f, -0.863572f, -0.015749f, -0.021364f, 0.172571f, 0.078393f, + -0.037253f, 0.014978f, 0.221502f, 0.189111f, -0.048956f, 0.085409f, 0.325399f, -0.058294f, + -0.028495f, 0.021663f, 0.19392f, 0.02706f, 0.006908f, 0.065751f, 0.176395f, 0.138375f, + 0.012418f, -0.031228f, -0.008762f, -0.427345f, -0.013677f, -0.002429f, 0.069655f, 0.019505f, + -0.036763f, 0.022528f, 0.201062f, 0.022205f, 0.024528f, 0.06241f, -0.076237f, -0.840695f, + -0.007268f, -0.027865f, 0.211056f, 0.074744f, -0.053563f, 0.006863f, 0.301432f, 0.192879f, + -0.021944f, 0.100535f, 0.19031f, -0.133746f, -0.006151f, 0.023944f, 0.13561f, -0.03259f, + 0.000618f, 0.063736f, 0.180904f, 0.12393f, 0.001275f, -0.0306f, -0.032822f, -0.496515f, + 0.009757f, 0.014602f, 0.004532f, -0.039969f, -0.015984f, 0.047726f, 0.099865f, 0.003163f, + 0.026623f, 0.117951f, -0.076234f, -0.811997f, 0.01301f, 0.020042f, 0.173756f, -0.036191f, + -0.068887f, 0.0229f, 0.245465f, 0.214282f, -0.011054f, 0.132813f, 0.241014f, -0.148763f, + }, + std::vector{210, 350, 1, 1}, + std::vector{0.f, 11.9688f, 4.02532f, 204.528f, 182.586f, 0.f, 33.7915f, 48.4886f, 210.f, + 238.505f, 0.f, 0.f, 0.f, 204.428f, 337.029f, 0.f, 72.611f, 9.87545f, + 203.687f, 212.299f, 0.f, 5.08432f, 4.19913f, 208.719f, 249.225f, 0.f, 23.6503f, + 57.8165f, 210.f, 350.f, 0.f, 84.8804f, 9.47241f, 156.822f, 243.003f, 0.f, + 101.663f, 15.5542f, 166.083f, 327.839f, 0.f, 13.9738f, 0.f, 210.f, 128.482f, + 0.f, 77.8929f, 29.663f, 186.561f, 313.287f + + }, + std::vector< + T>{0.309179, 0.308222, 0.303045, 0.241702, 0.192754, 0.165789, 0.15553, 0.154976, 0.146637, 0.129511}, + "tensorflow"}, }; return proposalV4Params; } std::vector generateProposalV1CombinedParams() { - const std::vector> proposalTypeParams{ - generateProposalV1Params(), - generateProposalV1Params(), - generateProposalV1Params(), - generateProposalV1Params()}; + std::vector> proposalTypeParams{generateProposalV1Params(), + generateProposalV1Params(), + generateProposalV1Params(), + generateProposalV1Params()}; std::vector combinedParams; - - for (const auto& params : proposalTypeParams) { - combinedParams.insert(combinedParams.end(), params.begin(), params.end()); - } + for (auto& params : proposalTypeParams) + std::move(params.begin(), params.end(), std::back_inserter(combinedParams)); return combinedParams; } std::vector generateProposalV4CombinedParams() { - const std::vector> proposalTypeParams{ - generateProposalV4Params(), - generateProposalV4Params(), - generateProposalV4Params(), - generateProposalV4Params()}; + std::vector> proposalTypeParams{generateProposalV4Params(), + generateProposalV4Params(), + generateProposalV4Params(), + generateProposalV4Params()}; std::vector combinedParams; - - for (const auto& params : proposalTypeParams) { - combinedParams.insert(combinedParams.end(), params.begin(), params.end()); - } + for (auto& params : proposalTypeParams) + std::move(params.begin(), params.end(), std::back_inserter(combinedParams)); return combinedParams; } From 03773f749eab0082b5d0f1ce7efec31e50106a06 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Tue, 15 Oct 2024 09:15:39 +0100 Subject: [PATCH 015/112] [CI] [GHA] Use the default `gcc` in Ubuntu 20 (#26993) ### Tickets: - *154652* --------- Co-authored-by: Mikhail Ryzhov --- .github/dockerfiles/docker_tag | 2 +- .github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile | 2 +- .github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile | 7 ------- .../ov_build/ubuntu_20_04_x64_nvidia/Dockerfile | 7 ------- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag index 8e5386a30ec997..56faa37d1da67f 100644 --- a/.github/dockerfiles/docker_tag +++ b/.github/dockerfiles/docker_tag @@ -1 +1 @@ -pr-26656 \ No newline at end of file +pr-26993 \ No newline at end of file diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile index c7d0e95164f414..7653fe6abb7434 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_arm64/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update && \ libhdf5-dev \ # For Java API default-jdk \ - # Compiler + # Compiler, required for multi-isa build gcc-10 \ g++-10 \ && \ diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile index 53829ad50b2975..1620e674ef67d5 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_x64/Dockerfile @@ -30,9 +30,6 @@ RUN apt-get update && \ python3.9-distutils \ # For Java API default-jdk \ - # Compiler \ - gcc-10 \ - g++-10 \ && \ rm -rf /var/lib/apt/lists/* @@ -42,10 +39,6 @@ RUN chmod +x /install_build_dependencies.sh && \ /install_build_dependencies.sh && \ rm -rf /var/lib/apt/lists/* -# Set gcc-10 as a default compiler -RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30 - # Install sscache ARG SCCACHE_VERSION="v0.7.5" ENV SCCACHE_HOME="/opt/sccache" \ diff --git a/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile index 5df369bbb6398a..0a4d7ef90aa115 100644 --- a/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile +++ b/.github/dockerfiles/ov_build/ubuntu_20_04_x64_nvidia/Dockerfile @@ -35,9 +35,6 @@ RUN apt-get update && \ python3.11-distutils \ # For Java API default-jdk \ - # Compiler \ - gcc-10 \ - g++-10 \ && \ rm -rf /var/lib/apt/lists/* @@ -47,10 +44,6 @@ RUN chmod +x /install_build_dependencies.sh && \ /install_build_dependencies.sh && \ rm -rf /var/lib/apt/lists/* -# Set gcc-10 as a default compiler -RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 30 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 30 - # Install sscache ARG SCCACHE_VERSION="v0.7.5" ENV SCCACHE_HOME="/opt/sccache" \ From 01ceeb81694ffe95896aed4a48ea28c84d4bdb56 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 15 Oct 2024 16:21:02 +0800 Subject: [PATCH 016/112] [CPU]Fix heap buffer overflow in DenormalNullifyCheck (#27047) ### Details: - *try to fix the SEH abort in windows test* ### Tickets: - *CVS-150527* --- .../custom/subgraph_tests/src/common/denormal_check.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/denormal_check.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/denormal_check.cpp index b98d4c61a1fb43..39fe70ebd87df4 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/denormal_check.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/denormal_check.cpp @@ -36,8 +36,8 @@ void SetUp() override { targetStaticShapes.push_back({inpShape}); targetDevice = ov::test::utils::DEVICE_CPU; - const auto elemsCount = shape_size(inpShape); const auto rtPrc = ov::element::f32; + const auto elemsCount = shape_size(inpShape) * rtPrc.size(); ov::ParameterVector params {std::make_shared(rtPrc, ov::Shape(inpShape))}; pConstStorage.reset(new ov::AlignedBuffer(elemsCount, alignment)); From 2d00d75ab57e0f064ce6915049d8ab649bc2cc60 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 15 Oct 2024 12:44:56 +0400 Subject: [PATCH 017/112] [OVC][PT FE] Cover leftovers for torch.export.ExportedProgram support (#27042) **Details:** Cover leftovers for ExportedProgram support **Ticket:** TBD Signed-off-by: Kazantsev, Roman --- tools/ovc/openvino/tools/ovc/convert.py | 3 +- .../moc_frontend/pytorch_frontend_utils.py | 57 ++++++++++--------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tools/ovc/openvino/tools/ovc/convert.py b/tools/ovc/openvino/tools/ovc/convert.py index 782fa25ab2dd8b..77693ad4be2ca1 100644 --- a/tools/ovc/openvino/tools/ovc/convert.py +++ b/tools/ovc/openvino/tools/ovc/convert.py @@ -27,7 +27,7 @@ def convert_model( Framework-agnostic parameters: :param input_model: - Model object in original framework (PyTorch, Tensorflow) or path to model file. + Model object in original framework (PyTorch, TensorFlow) or path to model file. Supported formats of input model: @@ -35,6 +35,7 @@ def convert_model( torch.nn.Module torch.jit.ScriptModule torch.jit.ScriptFunction + torch.export.ExportedProgram TF tf.compat.v1.Graph diff --git a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py index 0119a541494cb9..d3b77c9a61f566 100644 --- a/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py +++ b/tools/ovc/openvino/tools/ovc/moc_frontend/pytorch_frontend_utils.py @@ -89,37 +89,38 @@ def get_pytorch_decoder_for_model_on_disk(argv, args): else: input_model = argv.input_model - if isinstance(input_model, (str, pathlib.Path)): - # attempt to load scripted model - try: - inputs = prepare_torch_inputs(example_inputs) - model = torch.jit.load(input_model) - model.eval() - decoder = TorchScriptPythonDecoder( - model, - example_input=inputs, - shared_memory=args.get("share_weights", True), - module_extensions=extract_module_extensions(args)) + if not isinstance(input_model, (str, pathlib.Path)): + return False + + # attempt to load scripted model + try: + inputs = prepare_torch_inputs(example_inputs) + model = torch.jit.load(input_model) + model.eval() + decoder = TorchScriptPythonDecoder( + model, + example_input=inputs, + shared_memory=args.get("share_weights", True), + module_extensions=extract_module_extensions(args)) + argv.input_model = decoder + argv.framework = 'pytorch' + return True + except: + pass + # attempt to load exported model + try: + exported_program = torch.export.load(input_model) + if hasattr(torch, "export") and isinstance(exported_program, (torch.export.ExportedProgram)): + from packaging import version + if version.parse(torch.__version__) >= version.parse("2.2"): + exported_program = exported_program.run_decompositions() + gm = exported_program.module() + decoder = TorchFXPythonDecoder(gm, dynamic_shapes=True) argv.input_model = decoder argv.framework = 'pytorch' return True - except: - pass - if isinstance(input_model, (str, pathlib.Path)): - # attempt to load exported model - try: - exported_program = torch.export.load(input_model) - if hasattr(torch, "export") and isinstance(exported_program, (torch.export.ExportedProgram)): - from packaging import version - if version.parse(torch.__version__) >= version.parse("2.2"): - exported_program = exported_program.run_decompositions() - gm = exported_program.module() - decoder = TorchFXPythonDecoder(gm, dynamic_shapes=True) - argv.input_model = decoder - argv.framework = 'pytorch' - return True - except: - pass + except: + pass return False From 6beeb762f2234a9f08e26b4b7ae048aee906942f Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Tue, 15 Oct 2024 10:52:17 +0200 Subject: [PATCH 018/112] [DOCS] tiny NPU adjustment-mstr (#27033) --- .../learn-openvino/llm_inference_guide/genai-guide-npu.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst index a77527db114bc7..4585ca97488023 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst @@ -102,7 +102,7 @@ Use the following code snippet to change the default settings: .. code-block:: python - pipeline_config = { "MAX_PROMPT_LEN": 1500, "MIN_RESPONSE_LEN": 500 } + pipeline_config = { "MAX_PROMPT_LEN": 1024, "MIN_RESPONSE_LEN": 512 } pipe = ov_genai.LLMPipeline(model_path, "NPU", pipeline_config) .. tab-item:: C++ @@ -110,7 +110,7 @@ Use the following code snippet to change the default settings: .. code-block:: cpp - ov::AnyMap pipeline_config = { { "MAX_PROMPT_LEN", 1500 }, { "MIN_RESPONSE_LEN", 500 } }; + ov::AnyMap pipeline_config = { { "MAX_PROMPT_LEN", 1024 }, { "MIN_RESPONSE_LEN", 512 } }; ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); From d8d1149f4d87425bbf6573dd02cbb1fb5367b0dc Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Tue, 15 Oct 2024 11:02:05 +0200 Subject: [PATCH 019/112] Initial py3.8 removal (#26468) ### Details: - *item1* - *...* ### Tickets: - CVS-151787 --------- Co-authored-by: Alina Kladieva --- .github/github_org_control/configs.py | 4 ++-- cmake/developer_package/ncc_naming_style/requirements_dev.txt | 1 - scripts/setupvars/setupvars.bat | 2 +- scripts/setupvars/setupvars.ps1 | 2 +- scripts/setupvars/setupvars.sh | 2 +- .../src/openvino/preprocess/torchvision/requirements.txt | 2 +- tests/constraints.txt | 1 - tests/layer_tests/requirements.txt | 4 ++-- thirdparty/open_model_zoo | 2 +- 9 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/github_org_control/configs.py b/.github/github_org_control/configs.py index 872638bb657fdf..3df12803c77de0 100644 --- a/.github/github_org_control/configs.py +++ b/.github/github_org_control/configs.py @@ -14,8 +14,8 @@ from pathlib import Path -if sys.version_info[:2] < (3, 8): - raise Exception("Python version must be >= 3.8") +if sys.version_info[:2] < (3, 9): + raise Exception("Python version must be >= 3.9") class ConfigException(Exception): diff --git a/cmake/developer_package/ncc_naming_style/requirements_dev.txt b/cmake/developer_package/ncc_naming_style/requirements_dev.txt index a304b713cb3a2c..724ea2bf15721d 100644 --- a/cmake/developer_package/ncc_naming_style/requirements_dev.txt +++ b/cmake/developer_package/ncc_naming_style/requirements_dev.txt @@ -1,4 +1,3 @@ -clang==12.0.1; python_version == '3.8' clang==12.0.1; python_version == '3.9' clang==14.0; python_version == '3.10' clang==14.0; python_version == '3.11' diff --git a/scripts/setupvars/setupvars.bat b/scripts/setupvars/setupvars.bat index fac3e7f66c4ed4..8a09d974ecb295 100644 --- a/scripts/setupvars/setupvars.bat +++ b/scripts/setupvars/setupvars.bat @@ -67,7 +67,7 @@ set "PATH=%OPENVINO_LIB_PATHS%;%PATH%" :: Check if Python is installed set PYTHON_VERSION_MAJOR=3 -set MIN_REQUIRED_PYTHON_VERSION_MINOR=8 +set MIN_REQUIRED_PYTHON_VERSION_MINOR=9 set MAX_SUPPORTED_PYTHON_VERSION_MINOR=13 python --version 2>NUL diff --git a/scripts/setupvars/setupvars.ps1 b/scripts/setupvars/setupvars.ps1 index 7dacef5df4306b..2f0f960c1a08e3 100644 --- a/scripts/setupvars/setupvars.ps1 +++ b/scripts/setupvars/setupvars.ps1 @@ -63,7 +63,7 @@ Write-Host "[setupvars] OpenVINO environment initialized" # Check if Python is installed $PYTHON_VERSION_MAJOR = 3 -$MIN_REQUIRED_PYTHON_VERSION_MINOR = 8 +$MIN_REQUIRED_PYTHON_VERSION_MINOR = 9 $MAX_SUPPORTED_PYTHON_VERSION_MINOR = 13 try diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index 3b4fb9407f9090..422bc4a035dd8b 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -100,7 +100,7 @@ if command -v lsb_release >/dev/null 2>&1; then fi PYTHON_VERSION_MAJOR="3" -MIN_REQUIRED_PYTHON_VERSION_MINOR="8" +MIN_REQUIRED_PYTHON_VERSION_MINOR="9" MAX_SUPPORTED_PYTHON_VERSION_MINOR="13" check_python_version () { diff --git a/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt b/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt index 23ba17d4918e71..201d5085bd1583 100644 --- a/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt +++ b/src/bindings/python/src/openvino/preprocess/torchvision/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch>=1.13 -torchvision; platform_machine == 'arm64' and python_version >= '3.8' +torchvision; platform_machine == 'arm64' and python_version >= '3.9' torchvision; platform_machine != 'arm64' pillow>=9.0 \ No newline at end of file diff --git a/tests/constraints.txt b/tests/constraints.txt index f09da0d3b409e9..053e9e93855aba 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -6,7 +6,6 @@ Jinja2>=2.11.2 pandas>=1.3.5 pymongo>=3.12.0 PyYAML>=5.4.1 -scipy>=1.7; python_version <= "3.8" scipy>=1.11.1; python_version >= "3.9" sympy>=1.10 wheel>=0.38.1 diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt index 6799b32036df97..cb8e71f0c7fe7f 100644 --- a/tests/layer_tests/requirements.txt +++ b/tests/layer_tests/requirements.txt @@ -4,9 +4,9 @@ numpy onnxruntime requests torch -torchvision; platform_machine == 'arm64' and python_version >= '3.8' +torchvision; platform_machine == 'arm64' and python_version >= '3.9' torchvision; platform_machine != 'arm64' -sympy; platform_machine == 'arm64' and python_version >= '3.8' +sympy; platform_machine == 'arm64' and python_version >= '3.9' sympy; platform_machine != 'arm64' transformers packaging diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index f798fd62d66c27..e7df86da686d2e 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit f798fd62d66c273c757ab9c6038a47a364b726d0 +Subproject commit e7df86da686d2e1600282422e54f66c2fecea160 From 652615597ef0fc0cdd7de492337911da7d3f058b Mon Sep 17 00:00:00 2001 From: tadamczx <156996781+tadamczx@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:39:46 +0200 Subject: [PATCH 020/112] [DOCS] Removed OMZ from the build (#27052) --- docs/CMakeLists.txt | 12 ------------ docs/documentation_build_instructions.md | 1 - 2 files changed, 13 deletions(-) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index eedfe078cbd552..2dfb6bb8d04e81 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -8,7 +8,6 @@ set(ENABLE_CPP_API OFF CACHE BOOL "Build with C/C++ API.") set(ENABLE_PYTHON_API OFF CACHE BOOL "Build with Python API.") set(ENABLE_GENAI_API OFF CACHE BOOL "Build with GenAI API.") set(ENABLE_NOTEBOOKS OFF CACHE BOOL "Build with openvino notebooks.") -set(ENABLE_OMZ OFF CACHE BOOL "Build with open_model_zoo.") set(ENABLE_OVMS OFF CACHE BOOL "Build with ovms.") set(OVMS_DOCS_DIR "" CACHE PATH "Path to model server documentation dir.") @@ -90,17 +89,6 @@ function(build_docs) list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OVMS") endif() - if(${ENABLE_OMZ}) - list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "STARTED preprocessing OMZ") - list(APPEND commands - COMMAND ${Python3_EXECUTABLE} ${OpenVINO_SOURCE_DIR}/thirdparty/open_model_zoo/ci/prepare-documentation.py ${CMAKE_BINARY_DIR}/open_model_zoo) - list(APPEND commands COMMAND ${Python3_EXECUTABLE} ${FILE_HELPER_SCRIPT} - --filetype=md - --input_dir=${CMAKE_BINARY_DIR}/open_model_zoo - --output_dir=${SPHINX_SOURCE_DIR} - --exclude_dir=${SPHINX_SOURCE_DIR}) - list(APPEND commands COMMAND ${CMAKE_COMMAND} -E cmake_echo_color --green "FINISHED preprocessing OMZ") - endif() # Preprocess docs add_custom_target(preprocess_docs diff --git a/docs/documentation_build_instructions.md b/docs/documentation_build_instructions.md index 490da1b1029bd3..d9219454b86a19 100644 --- a/docs/documentation_build_instructions.md +++ b/docs/documentation_build_instructions.md @@ -45,5 +45,4 @@ Depending on the needs, following variables can be added to first cmake call: - building C/C++ API: `-DENABLE_CPP_API=ON` - building Python API: `-DENABLE_PYTHON_API=ON` - building Notebooks: `-DENABLE_NOTEBOOKS=ON` -- building OMZ: `-DENABLE_OMZ=ON` - building OVMS: `-DENABLE_OVMS=ON -DOVMS_DOCS_DIR=` From 8effdd465125e00dbe753046a54bdff3fa752ea8 Mon Sep 17 00:00:00 2001 From: Vishniakov Nikolai Date: Tue, 15 Oct 2024 12:46:57 +0200 Subject: [PATCH 021/112] [OV JS][DOCS] Edits in JS API docs (#26875) ### Details: - Add Node.js API contributing guide - Edits into `openvino-node` readme - Minor documentation fixes ### Tickets: - 149666 --------- Co-authored-by: Sebastian Golebiewski --- CONTRIBUTING.md | 1 + src/bindings/js/docs/CODESTYLE.md | 6 +++ src/bindings/js/docs/README.md | 11 ++--- src/bindings/js/docs/code_examples.md | 20 +++++--- src/bindings/js/docs/test_examples.md | 11 +++-- src/bindings/js/node/CONTRIBUTING.md | 67 +++++++++++++++++++++++++++ src/bindings/js/node/README.md | 30 ++++++++++-- src/bindings/js/node/package.json | 5 +- 8 files changed, 127 insertions(+), 24 deletions(-) create mode 100644 src/bindings/js/node/CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7169ebc2ba2c9b..c30ce12665ab33 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,6 +115,7 @@ Choose the component your Good First Issue is related to. You can run tests to m - [C API](https://github.com/openvinotoolkit/openvino/tree/master/src/bindings/c) - [Core](https://github.com/openvinotoolkit/openvino/tree/master/src/core) - [Python API](https://github.com/openvinotoolkit/openvino/tree/master/src/bindings/python) +- [Node.js API](https://github.com/openvinotoolkit/openvino/tree/master/src/bindings/js/node) ##### Frontends - [IR Frontend](https://github.com/openvinotoolkit/openvino/tree/master/src/frontends/ir) diff --git a/src/bindings/js/docs/CODESTYLE.md b/src/bindings/js/docs/CODESTYLE.md index 0ebfd322767b57..2441663d6cc424 100644 --- a/src/bindings/js/docs/CODESTYLE.md +++ b/src/bindings/js/docs/CODESTYLE.md @@ -1,9 +1,14 @@ # Code Style Guide +Node.js bindings contain two parts: C++ and Typescript/JavaScript. + This article presents the coding standards for JavaScript and TypeScript parts of **openvino-node** package. The following rules will help maintain code quality and consistency throughout the codebase. +For C++ codestyle rules, refer to [this document](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/coding_style.md). + Make sure your IDE has ESLint plugin installed. Its rules are specified in the [.eslint-global.js file](../.eslintrc-global.js). Keep in mind that your PR will not be approved if it does not meet the following requirements. + ## General Rules ### 1. Semicolons @@ -89,6 +94,7 @@ Make sure your IDE has ESLint plugin installed. Its rules are specified in the [ - Special case for the `catch` keyword: No space after `catch` - **Enforced By**: `keyword-spacing: ['error', { overrides: { catch: { after: false } } }]` + ## Additional Resources For further details on each rule, refer to the [ESLint documentation](https://eslint.org/docs/rules/). diff --git a/src/bindings/js/docs/README.md b/src/bindings/js/docs/README.md index bada676878847f..f0c70cf4dd9aed 100644 --- a/src/bindings/js/docs/README.md +++ b/src/bindings/js/docs/README.md @@ -2,10 +2,10 @@ ## Folders -- `./docs` - documentation -- `./node` - openvino-node npm package +- [./docs](../docs/) - documentation +- [./node](../node/) - openvino-node npm package -## openvino-node Package Developer Documentation +## `openvino-node` Package Developer Documentation ### Components @@ -28,7 +28,6 @@ ```bash cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DENABLE_FASTER_BUILD=ON \ -DCPACK_GENERATOR=NPM \ -DENABLE_SYSTEM_TBB=OFF -UTBB* \ -DENABLE_TESTS=OFF \ @@ -75,9 +74,9 @@ [OpenVINO™ Node.js Bindings Examples of Usage](../../../../samples/js/node/README.md) -## Contribution +## Contributing -If you want to contribute to the project, refer to the [code style rules](./CODESTYLE.md) and [contribution guide](../../../../CONTRIBUTING.md) first. +Your contributions are welcome! Make sure to read the [Contribution Guide](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/node/CONTRIBUTING.md) to learn how you can get involved. ## See Also diff --git a/src/bindings/js/docs/code_examples.md b/src/bindings/js/docs/code_examples.md index 13bfa14812d54b..08d92e7307dbfe 100644 --- a/src/bindings/js/docs/code_examples.md +++ b/src/bindings/js/docs/code_examples.md @@ -1,22 +1,24 @@ # How to extend the OpenVINO™ JavaScript API code -## Build the OpenVINO™ JavaScript API +## Build the OpenVINO™ JavaScript API + For detailed build instructions, refer to the [OpenVINO™ JavaScript API documentation](./README.md). + ## Project's naming conventions + When implementing the C++ sources for the JavaScript API, it is essential to adhere to the OpenVINO naming conventions described in the [OpenVINO Coding Style Guide](../../../../docs/dev/coding_style.md). In summary, the naming style employs `Snake Case` for methods, functions, and variables, while `Camel Case` is used for class names. Additionally, the naming of entities in the C++ sources should closely mirror their equivalents in the C++ API to maintain consistency. For methods that are exposed to JavaScript, the naming convention transitions to `Camel Case`, aligning with common JavaScript practices. As an example, a method in the C++ API named `get_element_type` would be represented in the JavaScript API as `getElementType()`. + ## node-addon-api module [node addon api](https://github.com/nodejs/node-addon-api) is used to create OpenVINO JavaScript API for Node.js. The quickest way to learn is to follow the official [examples](https://github.com/nodejs/node-addon-examples). It is recommended to check out the tutorial on [how to create a JavaScript object from a C++ object](https://github.com/nodejs/node-addon-examples/tree/main/src/2-js-to-native-conversion/object-wrap-demo/node-addon-api). - - - ## Adding a new class and method + To introduce a new `MyTensor` class that interacts with the `ov::Tensor` class, follow these steps: - The class should facilitate construction from an ov::Tensor instance and allow initialization from a JavaScript element type and shape. - It should also provide a getElementType method that retrieves the ov::Tensor element type. @@ -25,7 +27,7 @@ Begin by creating a header file for the `MyTensor` class in the OpenVINO reposit ```cpp class MyTensor : public Napi::ObjectWrap { public: - // Constructor for the wrapper class + // Constructor for the wrapper class MyTensor(const Napi::CallbackInfo& info); // It returns a JavaScript class definition @@ -75,12 +77,15 @@ add_library(${PROJECT_NAME} SHARED ) ``` + ### Argument validation and conversion When binding JavaScript arguments with C++ functions, it is crucial to validate and convert the arguments appropriately. The template `ov::js::validate` function is a utility that facilitates this process. It is particularly useful for handling different overloads of functions and ensuring standardized error messages when arguments do not match expected signatures. Before implementing a new conversion function, such as `js_to_cpp`, review the existing [helper methods](../../node/include/helper.hpp) to see if one already meets your requirements. + ### New class initialization + When a new class is introduced to the `openvino-node` module, it must be initialized upon module loading. This is done in the [addon.cpp](../../src/addon.cpp) file. The initialization process registers the class with the Node.js environment so that it can be used within JavaScript code. ```cpp Napi::Object init_module(Napi::Env env, Napi::Object exports) { @@ -100,6 +105,7 @@ struct AddonData { ``` ### Document the new functionality + The last step is to add the TypeScript type definitions and describe the new functionality. ```typescript /** @@ -132,9 +138,9 @@ export interface NodeAddon { Now that coding is finished, remember to rebuild the project and test it out. -To learn how to test your code, refer to the guide on [how to test OpenVINO™ JavaScript API.](./test_examples.md) +To learn how to test your code, refer to the guide on [how to test OpenVINO™ JavaScript API.](./test_examples.md) ## See also * [OpenVINO™ README](../../../../README.md) * [OpenVINO™ bindings README](../../README.md) - * [Developer documentation](../../../../docs/dev/index.md) \ No newline at end of file + * [Developer documentation](../../../../docs/dev/index.md) diff --git a/src/bindings/js/docs/test_examples.md b/src/bindings/js/docs/test_examples.md index b8ff0c8ff7c9d0..0e75cb56f3a700 100644 --- a/src/bindings/js/docs/test_examples.md +++ b/src/bindings/js/docs/test_examples.md @@ -1,6 +1,6 @@ # How to test the OpenVINO™ JavaScript API -## Build the OpenVINO™ JavaScript API +## Build the OpenVINO™ JavaScript API For detailed build instructions, refer to the [OpenVINO™ JavaScript API documentation](./README.md). @@ -17,14 +17,14 @@ npm run test To run specific test files, you can pass one or more glob patterns: ```shell -node --test "tests/unit/core.test.js" "tests/unit/*model.test.js" +node --test "tests/unit/core.test.js" "tests/unit/*model.test.js" ``` Before executing individual test files, a one-time setup is required. If you have not previously executed `npm run test`, initiate the setup by running the following command: ```shell npm run test_setup -``` +``` More information on running tests from the command line can be found in the [Node.js documentation]( https://nodejs.org/docs/latest/api/test.html#running-tests-from-the-command-line). @@ -45,11 +45,11 @@ It is recommended to run the code style check each time new tests are added. ## Writing OpenVINO™ JavaScript API tests + ### Before start Follow and complete [Examples of OpenVINO™ JavaScript API code](./code_examples.md). - ### Adding new test-case in the correct place Each new test should verify the correct behavior of the new functionality (e.g. class, method). @@ -57,7 +57,8 @@ Unit test files are located in the `/src/bindings/js/node/tests/u Always add tests to the correct locations and create new files only when necessary. *Remember to include the license on top of each new file*. -### Test writing guidelines + +### Test writing guidelines Each test file starts with a `describe` block to group all tests related to a specific class or module. The name of the `describe` block should match the name of the class or module being tested, for example *ov.Core tests*. Within the `describe` block, individual tests are defined using `test` or `it` blocks, with the name of the test reflecting what is being tested. If multiple tests relate to the same method, they can be grouped within a nested `describe` block. diff --git a/src/bindings/js/node/CONTRIBUTING.md b/src/bindings/js/node/CONTRIBUTING.md new file mode 100644 index 00000000000000..aacef418aeed2d --- /dev/null +++ b/src/bindings/js/node/CONTRIBUTING.md @@ -0,0 +1,67 @@ +# Contributing to OpenVINO™ Node.js API + +Your commitment to this project is greatly appreciated and the following guide is intended to help you contribute. + +Make sure to read [main contribution guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md) first. It covers most topics related to contributing to OpenVINO. + + +## TLDR + +1. Decide what you want to change. +2. Create your fork of the OpenVINO repository. +3. Create a branch with a meaningful name for your changes. +4. Align the code style, commit the changes, and run tests. +5. Create a Pull Request, which clearly describes what has been changed and why. +6. Go through the Code Review. +7. Get your awesome code merged! + +Read the section below for more details. + + +## How to Decide What to Change + +In case of minor fixes, like changing variable names, additional parameter checks, etc., go to the next step. + +However, if you want to bring significant changes, for example, the extension of architecture or a big part of functionality, that involves a large amount +of source code, open [an issue](https://github.com/openvinotoolkit/openvino/issues/new?assignees=octocat&labels=enhancement%2Cfeature&projects=&template=feature_request.yml&title=%5BFeature+Request%5D%3A+) first and discuss your idea with +codeowners. It will prevent you from doing extra work. + +You can also take one of the well-described tasks from the [Good First Issue](https://github.com/orgs/openvinotoolkit/projects/3/views/14) section. It can be a great start to contributing with codeowners' support! + + +## Let's code + +Get familiar with Node.js API architecture and code samples. +Refer to the [guide](../docs/code_examples.md), which will help you understand the component structure and the code style. + +The environment setup and build instructions can be found in [Building the Node.js API](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/docs/README.md#openvino-node-package-developer-documentation). + +Run tests! If you add a new functionality, make sure that it is covered by tests first. +Read [the guide](../docs/test_examples.md) for more details about the tests and their runs. +Many CI checks will run after getting a Code Review. Make sure that +all checks have passed. CI checks are composed of both functional tests and code-style checks and may fail because of warnings/errors in both stages. + +Remember to follow [our codestyle](../docs/CODESTYLE.md). +By following the provided guide and using an automotive code style checking tool, like +**eslint** and **clang-format-9**, you will save some time and help with the code review of proposed changes. + + +## Description of the Pull Request + +Append all PR titles with the `[OV JS]` tag. Provide any relevant details in the description, as it will definitely help with the review. The minimum requirement is a compact, bulleted list of proposed changes. + +Use the following template: +``` +*Describe what is the purpose of this PR* + +### Details: +- *Describe your changes.* +- ... + +``` + + +## License + +By contributing to the OpenVINO project, you agree that your contributions will be +licensed under the terms of the [LICENSE](https://github.com/openvinotoolkit/openvino/blob/master/LICENSE). diff --git a/src/bindings/js/node/README.md b/src/bindings/js/node/README.md index e2c38f2a18e516..c927bd0b360ed4 100644 --- a/src/bindings/js/node/README.md +++ b/src/bindings/js/node/README.md @@ -1,8 +1,14 @@ # OpenVINO™ Node.js Bindings -Use OpenVINO JavaScript API for your Node.js application. +Use OpenVINO to deploy deep learning models easily in Node.js applications. -## Usage +## Introduction + +OpenVINO™ is an open-source toolkit designed for high-performance deep learning inference. +Node.js API provides bindings to subset APIs from OpenVINO Runtime. +The Node.js bindings enable JavaScript developers to use the capabilities of OpenVINO in their applications. + +## Quick Start Install the **openvino-node** package: ```bash @@ -14,15 +20,21 @@ Use the **openvino-node** package: const { addon: ov } = require('openvino-node'); ``` +Refer to the complete description of the `addon` API in the [documentation](https://docs.openvino.ai/2024/api/nodejs_api/addon.html). + +See the [samples](https://github.com/openvinotoolkit/openvino/blob/master/samples/js/node/README.md) for more details on how to use it. + ## Usage in Electron applications To use the package in development of Electron applications on Windows, make sure that **Desktop development with C++** component from [Build Tools for Visual Studio](https://aka.ms/vs/17/release/vs_BuildTools.exe) is installed. -## Build From Sources +## Supported Platforms -For more details, refer to the [OpenVINO™ JavaScript API Developer Documentation](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/docs/README.md#openvino-node-package-developer-documentation) +- Windows x86 +- Linux x86/ARM +- MacOS x86/ARM ## Documentation & Samples @@ -31,11 +43,19 @@ For more details, refer to the [OpenVINO™ JavaScript API Developer Documentati ## Live Sample -You can run this sample in the browser; no installation is required. +You can run the following sample in the browser, no installation is required. [Codesandbox](https://codesandbox.io/) is a free online service with limited resources. For optimal performance and more control, it is recommended to run the sample locally. - [hello-classification-sample](https://codesandbox.io/p/devbox/openvino-node-hello-classification-sample-djl893) +## Build From Sources + +For more details, refer to the [OpenVINO™ JavaScript API Developer Documentation](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/docs/README.md#openvino-node-package-developer-documentation) + +## Contributing + +Contributions are always welcome! Read the [Contribution Guide](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/node/CONTRIBUTING.md) to learn how you can get involved. + ## See Also * [OpenVINO™ README](https://github.com/openvinotoolkit/openvino/blob/master/README.md) diff --git a/src/bindings/js/node/package.json b/src/bindings/js/node/package.json index d00633c93b062a..8bc6bbd4bb1d46 100644 --- a/src/bindings/js/node/package.json +++ b/src/bindings/js/node/package.json @@ -48,5 +48,8 @@ "remote_path": "./repositories/openvino/nodejs_bindings/{version}/{platform}/", "package_name": "openvino_nodejs_bindings_{platform}_{version}_{arch}.tar.gz", "host": "https://storage.openvinotoolkit.org" - } + }, + "keywords": [ + "OpenVINO" + ] } From b2cd7aa2c548cf25634d515a4159c9618565b531 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 15 Oct 2024 12:49:45 +0200 Subject: [PATCH 022/112] [CPU] Support degenerate matrices in MatMul (#26830) ### Details: According to the empty sum convention, degenerate case of matrix multiplication where the collapsing dimension is zero (e.g. A[6, 0] x B[0, 5] = C[6, 5]) results in a matrix filled with zeroes. This PR adds a special processing for this degenerate case. ### Tickets: - CVS-152850 --- src/plugins/intel_cpu/src/nodes/matmul.cpp | 21 +++ src/plugins/intel_cpu/src/nodes/matmul.h | 2 + src/plugins/intel_cpu/src/nodes/memory.cpp | 2 +- .../instances/common/matmul.cpp | 4 +- .../subgraph_tests/lora_pattern.cpp | 21 +++ .../include/subgraph_tests/lora_pattern.hpp | 23 +++ .../subgraph/lora_pattern.hpp | 44 ++++++ .../src/subgraph/lora_pattern.cpp | 143 ++++++++++++++++++ 8 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp create mode 100644 src/tests/functional/plugin/shared/include/subgraph_tests/lora_pattern.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/lora_pattern.hpp create mode 100644 src/tests/functional/shared_test_classes/src/subgraph/lora_pattern.cpp diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 50cb3353612996..92d8f356728ed9 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -543,6 +543,20 @@ void MatMul::prepareParams() { if (!src0MemPtr || !src0MemPtr->isDefined() || !src1MemPtr || !src1MemPtr->isDefined()) OPENVINO_THROW(errorPrefix, " has undefined input memory"); + // check for a degenerate case. In this context the degenerate case is a matrix multiplication where the + // collapsing dimension is zero, e.g., AB=C, where A has the shape [10, 0] and B has the shape [0, 20], + // consequently C has shape [10, 20]. In this scenario C is a null matrix (a matrix filled with zeroes) + // according to the empty sum convention. + if (src0MemPtr->getDesc().getShape().hasZeroDims() && src0MemPtr->getDesc().getShape().hasZeroDims() && + !dstMemPtr->getDesc().getShape().hasZeroDims()) { + // todo: obviously we need a special executor that would process fused ops providing a correct result + OPENVINO_ASSERT(!withBiases && fusedWith.empty(), + "Matmul doesn't support a degenerate case when other ops are fused"); + //reset executor + execPtr.reset(); + return; + } + const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) OPENVINO_THROW(errorPrefix, " did not set preferable primitive descriptor"); @@ -646,6 +660,9 @@ void MatMul::prepareParams() { void MatMul::execute(dnnl::stream strm) { if (execPtr) { execPtr->exec(primArgs, strm); + } else if (hasEmptyInputTensors()) { + // this is a degenerate case, fill output with zeroes + getDstMemoryAtPort(0)->nullify(); } else { OPENVINO_THROW(errorPrefix, " doesn't have an initialized executor"); } @@ -691,6 +708,10 @@ const std::vector& MatMul::getDefaultImplPriority() { return priorities; } +bool MatMul::isExecutable() const { + return !hasEmptyOutputTensors(); +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h index 7b8f064e17260b..2e487148d0ec0c 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.h +++ b/src/plugins/intel_cpu/src/nodes/matmul.h @@ -43,6 +43,8 @@ class MatMul : public Node { const std::vector& getDefaultImplPriority() override; bool canBeExecutedInInt8() const override; + bool isExecutable() const override; + protected: AttrPtr initPrimitiveAttr() override; AttrPtr initPrimitiveAttr(const VectorDims& dims); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 74a3b670dad126..88693ebfa49fdf 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -419,7 +419,7 @@ MemoryInputBase::~MemoryInputBase() { } MemoryOutputBase& MemoryInputBase::getOutputNode() { - OPENVINO_ASSERT(outputNode, "MemoryOutput ", getName(), " doesn't have sibling input"); + OPENVINO_ASSERT(outputNode, "MemoryInput ", getName(), " doesn't have sibling output"); return *outputNode; } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/matmul.cpp index 934a0f4bc95f18..9b5d7287875d7c 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/common/matmul.cpp @@ -35,8 +35,8 @@ const std::vector IS = { const std::vector IS_Dynamic = { { { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} - {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 - {{-1, -1}, {{12, 55}, {7, 33}}} // input 1 + {{-1, -1}, {{55, 12}, {33, 7}, {33, 0}, {0, 33}}}, // input 0 + {{-1, -1}, {{12, 55}, {7, 33}, {0, 33}, {33, 0}}} // input 1 }, {false, false} }, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp new file mode 100644 index 00000000000000..d85ced5f07a92e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/lora_pattern.hpp" + +using namespace ov::test; + +namespace { + +INSTANTIATE_TEST_SUITE_P(smoke, + LoraPatternConvolution, + ::testing::Values(ov::test::utils::DEVICE_CPU), + LoraPatternBase::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke, + LoraPatternMatmul, + ::testing::Values(ov::test::utils::DEVICE_CPU), + LoraPatternBase::getTestCaseName); + +} // namespace diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/lora_pattern.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/lora_pattern.hpp new file mode 100644 index 00000000000000..8f9687b7b93b2a --- /dev/null +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/lora_pattern.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/lora_pattern.hpp" + +namespace ov { +namespace test { + +TEST_P(LoraPatternMatmul, empty_tensors) { + targetStaticShapes = {{{{1, 20, K}}, {{N, K}}}}; + run_test_empty_tensors(); +} + +TEST_P(LoraPatternConvolution, empty_tensors) { + targetStaticShapes = {{{1, num_channels, 64, 64}}}; + run_test_empty_tensors(); +} + +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/lora_pattern.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/lora_pattern.hpp new file mode 100644 index 00000000000000..16764d37dcf688 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/lora_pattern.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +class LoraPatternBase : public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void run_test_empty_tensors(); + +protected: + static constexpr auto t4_name = "lora/MatMul.B"; + static constexpr auto t5_name = "lora/MatMul.alpha"; + static constexpr auto t6_name = "lora/MatMul.A"; + static constexpr auto netType = ov::element::f32; +}; + +class LoraPatternMatmul : public LoraPatternBase, public testing::WithParamInterface { +public: + void SetUp() override; + +protected: + static constexpr size_t K = 563ul; // Weights matrix K dimension + static constexpr size_t N = 2048ul; // Weights matrix N dimension +}; + +class LoraPatternConvolution : public LoraPatternBase, public testing::WithParamInterface { +public: + void SetUp() override; + +protected: + static constexpr size_t num_channels = 320ul; +}; + +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/src/subgraph/lora_pattern.cpp b/src/tests/functional/shared_test_classes/src/subgraph/lora_pattern.cpp new file mode 100644 index 00000000000000..6f74fd09b022a6 --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/subgraph/lora_pattern.cpp @@ -0,0 +1,143 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/lora_pattern.hpp" + +#include "common_test_utils/node_builders/eltwise.hpp" +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + + +std::string LoraPatternBase::getTestCaseName(const testing::TestParamInfo& obj) { + auto device_name = obj.param; + return std::string{"targetDevice="} + device_name; //NOLINT +} + +constexpr ov::element::Type LoraPatternBase::netType; //redundant variable definition for C++ prior to C++17 + +void LoraPatternBase::run_test_empty_tensors() { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + generate_inputs(targetStaticShapes.front()); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + + inferRequest.infer(); + auto outputs = function->outputs(); + + auto tx_result = inferRequest.get_tensor(outputs[0]); + auto tz_result = inferRequest.get_tensor(outputs[1]); + ov::test::utils::compare(tx_result, tz_result, 1e-4, 1e-4); +} + +void LoraPatternMatmul::SetUp() { + targetDevice = this->GetParam(); + + ov::PartialShape shape_x = {-1, -1, K}; + ov::PartialShape shape_w = {N, K}; + + auto param_y = std::make_shared(netType, shape_x); + auto param_w = std::make_shared(netType, shape_w); + + // "Main" matrix multiplication from the original transformer model + auto tx = std::make_shared(param_y, param_w, false, true); + + // LoRA parameters from states + auto variable_t4 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({N, -1}), netType, t4_name}); + auto t4 = std::make_shared(variable_t4); + auto t4_assign = std::make_shared(t4, variable_t4); + + auto variable_t5 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), netType, t5_name}); + auto t5 = std::make_shared(variable_t5); + auto t5_assign = std::make_shared(t5, variable_t5); + + auto variable_t6 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, K}), netType, t6_name}); + auto t6 = std::make_shared(variable_t6); + auto t6_assign = std::make_shared(t6, variable_t6); + + // Apply LoRA parameters to the current activations + auto t5810 = std::make_shared(param_y, t6, false, true); + auto t5811 = std::make_shared(t5810, t5); + auto t5812 = std::make_shared(t5811, t4, false, true); + + // Mix LoRA part into normally computed activations after the "main" MatMul + auto tz = std::make_shared(tx, t5812); + + auto result_x = std::make_shared(tx); + auto result_z = std::make_shared(tz); + + function = std::make_shared(ov::ResultVector({result_x, result_z}), + ov::SinkVector({t4_assign, t5_assign, t6_assign}), + ov::ParameterVector({param_y, param_w})); +} + +void LoraPatternConvolution::SetUp() { + targetDevice = this->GetParam(); + + ov::PartialShape shape_x = {-1, num_channels, -1, -1}; + + auto param_y = std::make_shared(netType, shape_x); + + // Original Convolution that is modified by LoRA adapter later + auto tx = ov::test::utils::make_convolution(param_y, + netType, + {1, 1}, + {1, 1}, + {0, 0}, + {0, 0}, + {1, 1}, + ov::op::PadType::EXPLICIT, + num_channels); + + // LoRA parameters from states + auto variable_t4 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({num_channels, -1}), netType, t4_name}); + auto t4 = std::make_shared(variable_t4); + auto t4_assign = std::make_shared(t4, variable_t4); + + auto variable_t5 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({1, -1}), netType, t5_name}); + auto t5 = std::make_shared(variable_t5); + auto t5_assign = std::make_shared(t5, variable_t5); + + auto variable_t6 = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape({-1, num_channels}), netType, t6_name}); + auto t6 = std::make_shared(variable_t6); + auto t6_assign = std::make_shared(t6, variable_t6); + + // LoRA pattern with additional Transposes to move channel dimensions into positions where MatMul can be applied + auto t4940 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + + auto t4941 = std::make_shared(param_y, t4940); + auto t4942 = std::make_shared(t4941, t6, false, true); + auto t4943 = std::make_shared(t4942, t5); + auto t4944 = std::make_shared(t4943, t4, false, true); + + auto t4945 = + std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{2, 3, 0, 1}); + auto t4946 = std::make_shared(t4944, t4945); + + // Mix LoRA part into normally computed activations after the "main" MatMul + auto tz = std::make_shared(tx, t4946); + + auto result_x = std::make_shared(tx); + auto result_z = std::make_shared(tz); + + function = std::make_shared(ov::ResultVector({result_x, result_z}), + ov::SinkVector({t4_assign, t5_assign, t6_assign}), + ov::ParameterVector({param_y})); +} + +} // namespace test +} // namespace ov \ No newline at end of file From 296ab9a5bae883c91e9debdcb514b40b95975fad Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 15 Oct 2024 13:16:46 +0200 Subject: [PATCH 023/112] [Docs] Update files for ovms benchmarks (#27058) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../OV-2024.4-Performance-Data.xlsx | Bin 329057 -> 335026 bytes .../OV-2024.4-platform_list.pdf | Bin 240967 -> 240925 bytes .../OV-2024.4-system-info-detailed.xlsx | Bin 83914 -> 83937 bytes .../data/graph-data-ovms.json | 1705 ++++++++--------- .../benchmarks_files/graph-config.json | 28 +- 5 files changed, 845 insertions(+), 888 deletions(-) diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-2024.4-Performance-Data.xlsx b/docs/sphinx_setup/_static/benchmarks_files/OV-2024.4-Performance-Data.xlsx index 057e132d384167216531468e24277cbc53c072d1..9b53d90e0862dbdb9a1e614cf9ae1950ccc6c4f2 100644 GIT binary patch delta 149881 zcmYIvV|XUP5^l7yjg761ZQI${wyiI=ZQI(|wr$&XZq7OPKKIwCyKB0t`mMKWdUnH6 z>swLrB`aj`HPSFUNxL(llUnQiBj( z{zh47bLMv;FSP}y1$K@s8H2-m2#ZZU*fnjJl zSo=DnUUqqsuk$QQVUwg4vxPXNlA)e88$PS9UC%{`Aj}>nQG#Bt}B+ z?qwx`oA>9mAp799mE8UJGf4~`&nPkOGih8`WU0As@IoVJH5Ed#=H({{k5MT@7mgeW(PkEv92f166ftlW(e%ycX9}4ln5F zSq9qb9zpwACO2B^Z3JllNEXLZ?R~GRY6`Lu__@)&-$srT($kpr$Pl-a2R?Y@Z&9NF z{SpK#IX&XS$Z3@eBrc&YwowlHAEKrUpMyrV8KRuR5ohp-AZS z1cm73O<-l=jS#ccK9V;tU>T{e_$*W}*Xe=zWk)EM#`(jqUVfw4FMY_cq#>xGc#prJJ{)dZfIS~{6AF-ywujRP^0#+X7ZWvJ1K zfL(n|D80=Tm%F%KQfP%57R;3ib^fG~FcTi!KY_7$Swbik7Z$9UN%hIEiop(C>eBzYSp<9N zQlDxA0Y)x`HFjylun7+-n|v2rA+bKh5OM0lZ&#m~hyMRsxR3nH+BDVafJ?z$Vkc+N zMu(K8B?R}^DSl#=cBRypc4dRwy`o2Sg6h=9YfGdruE-3uOTvU~3j}Qak9$M;XX77D=j?U;m2BpCNU81ENp67gAui8xCa2k-RvUnzFpUd}5$Z1Mp6+^JZ5I56=#+>YA zl|+x~SOCz06rJZZltypWPb;=WO-xyXCPc*VEj3}vt}T^J_fK;8MWiN6I#a{cRmx^2 za);F7A3ki7NB*}CC=?>w@paDMqEy`fwxbfvdxY?U)MZAoCkN(xB{1aQEtnKUHFS1p zd9_JB#LQ6mgTTiMGrdoO|DirSZ?G_IZ+1)hZZaUrj&F2q)1Z}$-s;Pg8FtR%A2R

63{db42YfN2Nx#ZAQ9% zQ_{UKfp3zuG#DU%6y^d7uGdG8`oVnY{Qciu(yAg@#)MoU|BdHwZQo~{H!v;wm14`P z=w3j^_dd(e7sXIs$uLr#GxBR3o`y|fjNtZg*JakT_cJ+(WQ%~}tf6tFRNkl=z`%q%d-JK*GI?Ui}Tc${#{7VxMLB%zXTT4+xAad*NS<; zX71DDjUISrF8CSI!8%QbdqM&^>x-V*W3YgD^NjM5iq>wd98-R*^=iNIiTI?*L#^DU@qYn69v!xR6^^96OJUGFly;6D7o#eCX+ zyXfi!1u{Jw<#v|O|F0QrBVa%Q>Udz>r(=`Zy}O@tu#@g<$CS&f7mAqnVopVA=q z&;c?tb9Q!&-28DJESxfZvVke8$9FLNfv0Doo_by1*X}osK}LNcQP^!3q*$n6;g&3l z5ZpUTjWg^QAmL?@QSLXG-i6u3nn0~PY)^UXXuZXWEDziY%uqSc7UWgycD`F;dq#t^ z^c;tkXqc!DlxGf*Y=Q|Q5XWo^!}F9uz1oe0h7n|NDB>}PIeWd85QI`rWKq|nLkf0B z!d#91zBI_`KzJ4WGf`#*xhmps!-Y7*sxfU&XjmN3aJ9Yq+v~i5cSL>u%-i92;`r1X zv)_*3!`{)Bsnb-(jom}ld$u~(w~}~xfu8_Of(DJ2r}t>o6mUHRQ!cXUX z1&&CX#X<>4B+X44rPemOuzY`FHihCWb5zHH!{P#|e>}U@e+B;Mqfn(gWeNj&UyIM0j{aNAkFGNbV2s~**v`D+NHgI z;gaO9TF`|vb0rD$WqE-o1#}!Csp&hemLlk*k%rJ=FjNzPR4`&O5SoaB3ZFbD_;p(O zjOWRZVJ<0S#Y&Q{Zd71WDA&JY$V74o78G!$_AZsjCI8K^$j1tYJ!HXL$Tp?xHPgTz z^zndJNL+UufQ*7Vg3ET5NJ4B-Kp2%StinXN3icNi0zzf{t#j_($A7wEWSZC`_mhk0 z%vQ6Iq)bGdLvrb?C^6}Z!I@HWkexAsfs~9Qj!X8{|ACwpd$w*D17UUXf9lscrXvCS zp@w+WYf+04?*_6{*8=?RnMBJ?VrJ66vI) zfmCG794L~W2>p#!JGG2nCH(J91O7&M=dM{Nn?c1dJnVxPF3ZCkrFck|gh708^2n4r zT~>{CR*%Qr&yAE58fDzRRc7P)q5%(8gg^3iT zP#d7(B_z;Y+Bgkx8FZME{tGukJq%Zac*z9DY$CjeZde;N0)&+cDLO1)y1EZ%z2|BU zptS1}-;=+216G{yf+7_BOcAA(SxeB^qpw766{1(V8)A_SfuUb(4&qCGVtF|9e{9)+ zj=ByENKA-;_W$Afx*|_NZSGQr&?0n1QuH8j54y0mTH7mmk}$mg@_TcICn4 z3*OH+Z-NxNtD1`%nNRF&pzvuo*;LgNUmu){OAEAritxh}apHfAD}~Y{_E5=ggAO7R_{oFdR^=};>92q&(`b#B$#@sDcyMuRvn%K0ssPXIS`c4%yq+^NHl$ruDVd|+bq<$-li0ziQp z?$F1Pq3Sc6%L9J_0kJ3GlVB(8P$B^WyM~**4v41eC0L!zDT|IxMexXpk`E=vL{0(M zYOUxX>$Q5GUS`>@L-!UBwW?^V{&kAW9L55|Pz!<>^*Ton!_V#|a$(LLf82^JM^1rh zyfM20Y-{EXP1HbQvGVJg)tQ#T`euXBsLg&hxkGeCtl?Z<%;Phx;BLDK%4mQgg#|dY z6c!%j;3K@?KT?!>#e<>(m(hrzXs&AA+M`3+=8_u$N*k4IGGb<}I72{)er7{Nt(epOZw zZX$!W)M3ULDqcaPwt>!cwNC)ov|Ukjj?aXLoIL@~R9BM(j`PCR`P_RdfBeB^zz*XtAJy(`H;0P-9vARlDq_YwsyJD6aRtZ1znr z|2dhwayx5mCP|IwKaD!gmN|9hrD%cB!so<&3|6}>UxssX_Ann$f?oh&=e~%U@%M(4 z4Np8Ny4-_|G4I881cDUm5F8q~*drMx8hEB!5bel2fw^Qlry$r4WYYD`P_hG0eAjxr zip=6pg7G<n~}hrwUNpFbG*{rz8(oT#i|n?aBGMa}k{ zC$~dFav2P%v__*?RJGbz?is0N2vIRm+$6LN=yuFyR?cYbAh>_!y1IJ58quN*)McE$ zn@BgUbz~&lu_(sPo`V7D9#$Hf-!rECjj=X8N+Bzreao*60K3YZfF!u;5Yak1KG!&T z&sb;iVhSqVDXERBW$?mK8L}Im;-eYDN^)O1aP4!aNX4M+Ay1DyF=9?LX&hx5Ye!YJ zhV}|ZP0bw@6B?>F)^{?nYgw5r>K#BV-Wn% zCXtmv3F4E&lim}*v#(KfGisAzQsjYvna*{Bw++w}XC!BT>bkgUpE?&p=Vt6gLX6JS zYjF!|hkYRhZOTN@Evf&@u6P%`u+PN>d>dvrtz6g-05M!uF4O@^+?%pjTGlB9P3GIh zS=5{(QM2Due*iWP!X|xuuBSyfP5US(~x$+ixf$n!e}!1M7P!xrt{qfhx5tu%^o z8JaS?b0)#YtG{?vGv;Jp<(k_d6e(rkGRJVF8Kp4o`hp970zoq!ynFi7t^2Evb=f_1 zih24C^RtEf2&T>Iij8yk^pk9-Wm{w28c$IfP`r*D%NV?IA!PGR==vTj1+AFTl}TUG zl?ji!xKO0a+q;vaqy2pwa?ba0`ULUyjgWHZs@m=8@_6B@x_k)u{EWy6+}-wmxw}gF z{)nkKkIC86{+!nB^73$8Ga&4;~B2hZO{`Nr`accqlF zTL-+Xt}J*O)b5nJw-VK7?mO|Vz8k}jRh|c-$2*0bwB{#7S(guc({WAg>)^1SIrGiK zqh>zY=1r09-EF>(_wfwfYM+;v!_RfsoSGjexIH`iJZ|ppaQ{AUm&^J7>iz)a&6;}n zI6vRcFLwKUJWO|cdEM_HS9EuNe|(LPSLAeiyn0x8``jMo@P0jx-`l+m{yh48xSg-Da(#c6-17x%j+4j_RKVc-+&KUxU8pTnwwXv*g`ISsUgT@wyk32FED%(mPi7hp{TOkN(F?-pB6tAsC z(>|rTynzh@+<^_7&&j->bw00{vbNcs9+`AD-TZhatlARC>-NO~vo^}Z+&rz1)^m#eNUAdW4J zM$SnZ;?wszjn*`>#HY`)+WR4=x$xt8KeTB=MQ)t2X0=g(aQ}=ASsY1Ic5QUO#EskT zKxVJ?WF*V<&G2b*Qr-SWS0vZ0*$UonkguVR(~*)pk3b@Z8u<}Zd$bSTZ)=+eS6ujM zV^^xlPl93Lx^{IajX&UPG#+ofcO%8#m4X%}8{Y$%@$P zd|gQA-eeKxl@+h*4ue^O9AT#o2Uh>dSd7YqbER7wNk>*e?}l*nMV7cBBaJ_iku>9T zm;9|b5+U&!DNN1ASSrXF1uNpt7;yTJH9xb=@_BI^Y+xWPyw!UX$<)wygNl0^Y6t+S zKok%)lYZ4P&VmdJb`h|Msh}=#oNrx8slWK>Ne@902WG2C6t-0;EMI>)r>88FeRXj+ z=WI+nP&epaZ+5PfkWgG>$e#RC_V3Ku_oP%ItJ0&&_rsv>dMpSD_Iv4XQ=a1Z0v91|VM_o&i|>~c z3O{UG!<~s{+uxZN%4k0z*Q*hcn92}Paz=lwPNh3;iB2yNw{PWH0(U9LNV!m91V_*= zp?D8NuCN5nPe*YEnhqnimB#`DdN7AiCYX@*+>9B;E`a|X#vbL>Lu1kK9}lBVKG_w} zguAdUjLY$1MO{9@$xd$Dwr@Z@v`)!f?Yo=9NmUmo$u*kJwqt#_fq&k#Ml~*?`FWT5 zdihn>Dt?7GT*P;^#jMGDwT%W_s8}EVy*Gywbxxuts!ga{f~7}?QWp?O!g+o_gSL|{ zAF4Ly-|3Qix+!O8Ue&!vs_CSN*ijY$M*46^Rt7*TeHdZoAL$!$3_WRP$XJ;88__bX z;~QaWpg=BIolh{$3gET1xOv6tzcV|@{FShzfDeG!vV*_awu84`n&t}>dq7_%tdk9% z;R(`V*oC>O)6zGaMK~_^YY0|ZX6P8;({}ahGTzLF=przU6*IuI#_&3hP+Dzk`u7-h z5^vztur(AAlwqks=TUncfe*>aRUSx>=oG#aiYv#R0Efr~vv3-L1u z;1~@g(dU-iw&$N*K0DwRoPRCAS&>L%ZY(zS zl=0@^eng)0Ge{!YW2k!1F5+6A`W%G8l}9=a-Ivjg=i`Lm+g)Wu!_!xXWXyD(ff|p7 zDmN#=^>_Z&N+usH!#l&f%8mnj2hUpJN@}>Ah$n(xX#M?GYzD1y22M?BL2X&j4v;{a z0Iomi$)@3GBpy%BXOAwIOO6qJh^`S}5TO4p8``c7EHjq;{m42;5R7pwN=7`<(I#By zD6D{HMKtEC?m1-6EvLoWrLn%XI}VhAl{mFBvQ)qz!@12Z)r_c2nr}ii)N{6xg#+o@ zR35)IlujXQ2IlBX!BI;t34a+{4=7?v&S&MxqR=s=sbn*-ve&wqHYwtBLV>TEkmPA( zq~45+z*FybEIIJwxS4X)`<`+%d!A|z?6`}KWy_aAHPQ#|Lq#?5wh+`Vebra|@t~ef2Q$&M*q^7X4&pjALLb^c9eQ4jaJQr3 zX1gW+8E(qR^=$9}J4)zPHa%4tsNjL~#LKLjr(|X=d0>9s9PZZuRKLmKuh)&*q9tq3 zo@9)`;%bf?Z*5$~~g}dLZvR;gEHgR>Lj@Jpp8Xu6$9N8$y=uH2MaOxbI z0+t!1kSPtQ#i@@YI|HKh1?_nUN-|@MC_U{sW6@ri=CaDzL%kubzqTbL7UL3%N6a%` z@|$_i@a#Fa2?3<_50ZuV0u&}C^GA?FkqAVMiPy{(DqE-K)cApT>aOYYbiE|rh{oS{ zK36D1hwt%d(ru~=#aiUcxEfUL&8C8aylEcJb*BbBRSuTEm)JnKG1Eow2D;2Z!ji$YFPs{;ngUY=y z%gbFvyaq&TJ&ag1&Jb5@?TMd#+CD6Sdr%LMAUs3G%v<6mMYHQZ#g3@G?g*zOT#1vkww6NNd~2?$&Wlr^9KN# z(vQGI+OoRvLQ${?tIuep&ix+wiM<|*^gO>tXfB}kyrnz^z`b!G!k#<}Akfr?Ai0Mi!FOAHm z29rp+E;*_+%_dM07PI6-W^pXx!pEtm2#)dyu}EQJZFX+n+FPE>)*%G{GkEi@BfLo}qV$!6AySQ2ez ztfDu$aT9Ffa}eg*g~oeZP8e~o!q~)(R~fW+E@kyLm27^QhV-1rp;(K z`_CG=Eb;G@xrPvjls`JePHBjS5louU`Kp+N=l8y|F_8N2kDS3N2WpDXKzyvkR8p9h zoLgWme`@<_B#4}Y5n-rIysQ=-S5>ML0dmzzh{q(;tGm#Y z8E?{NG3rv<4@1k5_MD`t#F3*^(Oo9fgnoA+H~Bnjsma-Ixpv~)v0_qz7`j`6}>pMe;L=6r{P zp@4t4k}*dIMfV0_01{+5*VV&8rQ?QDW-59d|8LY88KA?cD1z82!WWM<$*(2i^SYj2 zfBmt%(UIG;s&@`a1{4CkOToR+=Q3YtVjJ;~(7jR$|Bh_E((Dl_#T~UQi~L*uofRU; z;Eph;#LottcuDlD>O#QvAYV*HyHHFeomea$FZ}J}t_z;02SDAG0!6BgPPN$wO3NLH zPSO=8yNIrZZ?iE#EWPI6bq7XnzoVvT?x5vSO$qm9I-_|J{j!r(H3 zbKSvT)Rnpc34r4_iOPbtB}VkB<%WG!`ztmw8%p(UOIL6u>20{4C-R4?n!r_liW?2L_*lUpAt4!OxZC+EHZF*2j;C`*}E+=?D9+CM7&Rr^oM+zg;G z1nl|4rGQ`_huPpI@BIN`da*aMu&t{M=8fmc)Q)E%H-Me#{$j|$08*zCw*&(f*;8e$ z1xRpcaCO-4axKa#On6@r{mt5z{RZMcOPD8Yz}2$S`T7}m9Sdr;gig?M)udsRS)pFo z;2VixFN-G|q{zu}ISo;P{^o8(=kOvG78X!nj?vK8Mw6K2Wd0m|sZQ2YlQkT0+eQ<~ zscQJhD1aY^RUpXV$1r03s{zOz(K;y6Rtc-x(2XJXO$mur7eP2Nt73^kbg?>&0S@@a zbYPeRb*ioN-y@BoofT6r5-{7t;>mk0Wv?A|RUhQoOJ-qFts*-Zw zNrYaEBB3i!W~^{GBXM;O?OdpBFLdixjl$$f4G`PV_uKo*E=UqYO@A{FeVRS6y;Ssu z(AAG_>kmDsPN!!7jM9Cgv{cpRZg5&IBiS1fEYqVIYAtVp#cooec6KTiw70na?^zRr z+Fu!{DE@e|mD}P*B!)_NXBw*X#K8&If7BBP;?cZp19U?yP7WU2Q5FS#xWk@B!l!XkT%$<+$vZ9}uP6^VMjvW*U1GHYBp<7q+LPJl0JpK4 z6(b}b8Pw)4WU5WjywT^UJ zzpUg?2vBx%ax69%4Y5iN|$O^3k_|>#Lt_Ib) z1_h!RwhGo4whq~h)nk@jSh*fBnA$4i*}_Yd5w9H(tYJwr7@Bu>cjmME0K&$9zBc@` zB{^LeUK%j-sztRF;O9ZX9Yi&O(HuC@Ia_T$iP0q7f!;<`#0gwb!P9jC19sdDja?%s zWQ~g*iw}8KEiRiiVcnDbe`Gl#{1rRq+>+jpXfa$%ctK!Q)A^0D`-vXH2SaCooh%x8 z_4x~Nn$g_^+htG!Q2qw(rUsNPs)Q2*V-z6~;xs>WUOcs$k7iIH$bokJ zZ;~bE7E3s71m* z7?=Igx(u1=ZJ=%7G(D{543<#}MXtGtHby*uj9Ht|SD>fk;R~QCAzw=ks&wFmyEN3v z)+I-{2mrrz$9^~RfFc2WV4<&V^fP(iJYXF{u3FtBmFGgy?|txCX9yhznJ70sZeVcU ze*)oKXOXiwsts8l{rQ8!1UFp1F2SNg6_h28ZCcE8Tl93S4JkBSSdvYV+poOCWT`s( zi8Dl75E~JR7qJ^A&YmjPZYEzsL88Xmz*v7P8US?1NQ$+o2}&|Y0D&M%7)t4GRE%u4 z?w%lpcP>lbMxN&smtZNgM~1xl2U%Vd$^KES5qG}phhn&bU|2y|uIPs%O@JEQtDgjm zg!XyPbcvsqXJ8)_C@P0_vNS`cbtxu%pB7}+gvC!)REkVd*GQ-m-Mn4cw00HXy2_d; z4meE9x-W-E0EIT-N{mALI$~?C70)r6Byv5;zK^b@h}trlRc3im@Zw9@JrvES9c|$t z!HPM-eNHq0u{WwnsSfy6T1JGv3MG>?y1Kbgj}Ho!#vYyu{7Hb?JD4`OBd`|rwW_G+ zLC1xXet{)q0hT4}MtT=G*)i6XKT&qG4p50#6MaJ$`Xh20SNxi@d21>qW&?jZLhLbYT$H+sO($W_A2qz7f9Ov=rY=gAE-Rcbx#H_0 zOy5K@54YgMsCy@~!8;WOFV}VfW)PAs9ZEgkGD{lW95G1z%W5c7BbcBm+d)O4ni-c= z!I7vbD1$U7bc3LS%m=1o{NoB8?=+!CjNrdIxcSk+kpg!2XaQ|dJ4d12&*T%33<&{P zx;A@55NX_{Z!E)Fnx?INu2w{nlM+cXAN4GLvH>_zf>?gau2a;UDxf?Pxv-Lul1+gt zA6w|~r-n5g2k@JYTS)Llv+KVl_t`iai@exEh>XP=XHzTXq!=O?`6&&--xW9vV(rml z4LcSPNfJ<(A)rQB2p^Zw6?t5+TOk2piRpaDWVIu46yzGyV?dzD)EhO(~mK+lu=t`O4!{J#k4XVJcVH~ zC!P7`(9qz>)m8bsIL4H;D)G*dTElk847<2P&{y;lI2M;3CK|S}kTXajwGn`DxIuY0 z7jXj(3lUnQw5c0WSuY|_iuLIFuoc#YBKtBids<2Vb%|HT?(Q_HU3(R+yO1A)`by!P z$o#S1X>LKinyz{it2!P1)B2Ng#4ANAhV7-owF;d0;_kMrATEi9?-r%vNLytovjxlA zcot*oG_b~N<*pK&(vk*UV*=IvYy+OCiN7WBvP-@*13+-fI7}B>8Y1XHop(y04&>zs3CSS|g&~;&_P#+VV)SHYNw0 zMmxsV*^m2`+`vKTXm)m79)u0}k!$idqQQ4Gy}jjCsiYyw`)Zp(P)fFY{lecd1ssmGl$`!Ji$ft=9yMQJ6GUl(vjQV+RC1X25;6ME73c(sE3Oj?ln_ zy*p+s4B2gXW%@eO7cOgDXZ6i1>4&0l#!1pbHJO$%kf_RCsAh#l1mhLF$Ye$I~ok1?Fgu0?hanxL|Td#ntN<7(OrASp&G+DnXq*E(~cEty5$B9xBddC@Ola5 z{hBUw!ikw1`l##S>Wbr;6Jg}Q7imJ5<_ase?2fqKrWBPJo;s|r0*`inYB52zyMK zdI?9exz1pXueohq93YbH0h$-tcigX`&Cb73jI*c&BWTB0o+&n#e z&Q;`IF_6LToJ?o-iD8LD?Hb3%SG=@X{TJ7$oue7nSEBeEr*OxNeVs?ekX?#ojxWtZ zt#|xGW!*uELCZw&Nl*Pr>)%~MU&Dfag51Y2tgkr2OT2bnc)J#{9~+P6FydcnGa%CU zLW1H=1|b~vgdytP-P0vXAsNMrt=|@NCqMrT+d@w;>oZm&eQ7f@nGv@( z83o)gxwyB|StOi25?FrF+j8!OXGvJ{`H!W)e7o}RxxJ&}tysa>vtIOI&Q2UaJ} z^rmscDQ1>>ny!Jvh|j4bSVP!sT$>{dHUzd^1(n3x|AHFh52%eq(ZdRXU)hrEmXT^9EPx5Z^>KE8p5xbPo&W;gYOFw4`<&}s=yh#0l5+vrO)AE z5LKL-QY5#?%7;}Wi4Uj*m|!_oR9&aC9Z1_bE}V-1Rt8WTWFy+OsK-GvXDBG>oOk3s z=@TknN~$W&3lJKLqkdw6yO`OJ+Uipgs5Z3>LItaANuQcZcJ5^)ib zVk+o^6<{%AB{*3_jJ(;DX#YOw-{*t%Wd2Ow3d1?ll3Y{LrMUHoG-9YD{1SEs0-?c7 zOdCJxy{#2#b8Ju%pEI^pr2dz2IFn#qR}_ot1JaU~{g3jlAyt2Tu=>XbwZa|_w5z4> zN{Z1TN>*qzY&i$p7TjCLlwBWm(#{vKZ1~XRPrl}wAj{?VJ#6mU@xKn-l3T)@6V^sb zgI?!tpRXtYZ%!FQIWG?tx_ygjEnQysvKRL!s=viCHIA17CApbm)fJ{OT5PsEPn#ih z*FLY)-z{zMc&$h7)0jDTBbT~+)K|_=KS$!-xP5F&RV{X_?qRV{eW{oW__AeU5Ap`)(~)!&^1QJK9gwW*IfNAT0#ib(8EMh6q) z8`*s7A7oYVqZ?T%t=eQ+2FZ}*t4Q${#{dfVz2o|Z3!3nd6!!+^T6$AK(h`|7xtM?~ zEYYjRvhZ4c=%rB1iCYVz4cHfOAOU zDH1LM!PI9_YOITL_KJQSlByQgz@aK@8*4CIVi9kO8wMm_{ERscW@|16x>0_qDFD6p z*iU#)hMESJ9%tUW1n$_~&tfVeHyxvJgmH`g)7^&;MyccLceegyqR<-?+;7A1erlzI ztw7576AC}qf)IeCG<%_#t4@qdHQiuO;!Rt1iV&j|GA;$Y+iiJ5+hn@hSc52+!z{=B6A|VDfO~I6EWbb--*Kh-p=%VQE0+`mT@|C8f~n_y-xfteZ0j_i#E& z$bvC8`w&!!s_aN76uCSiJ}D~ibeNEZB|Lr zYGrztu{%46g4cqxBZox+USgElE|TJOZZu{;iXXMC?43gAm`34EXzng_Ym`zc61gaW z+hmnJ9TSW)Uhg6dQ#uaYf55_^UJZu$->L>48;nZfurU855FffBX+|Uhbm=A<{kb2Tc+vFjPY-ucay)-z|!ryEmO81i);7&CdMl)`2W?B_oOkb&E2s=cCL~i1n<)vYR#?-tFX4L>^YW8gpVfEuSbPn5QsjToYhgHs1_ z)+=vP_$4$3U>#}-SQ_Eif17G~Vv*<<8sjOY)!;gyb$^Zi^*ICu#XbhoRnd8kia<@b zs!H30ItNOIZo~ZHbR=CK>mWwt(w78ZRs*D^GcC=!q;1diq2ZhVXALDWOR*8Xn(bh! z&iU`}yAk&R&;lF8D2)=1#|S^2QL$X_no&^>5`ZlcpP85X*kRNm=AB`$S9`F@nO{c} z*@c_`F=+qzR^C{l;+0y{22k1)vPXzMb$iZGV%Ebv?XeQYKjQf^qjS|jiykkoN}Z$U z2Kqs%(p{t_X%3RSxgLgM_Ks*O?8`rYz4IpVl;D3V2A?GKcrK!Y$1>I68F@*4E3j4n zzfjVb3jClsSjpJaRYRksd*mi8N`$>6-xYj+Sy775|=+P&FjF)#vL^&+~ zVd>18^=js`JyLm!P}cF2-^moFt}2VJ6F~R|g`!eEplG00OG|=fJBLRz+#*`+0+P?! z?Dx?aLNL3e!0KhxR9=E1%C2>`;LDvPC63R#`U4hRb@?8&34ZywGm}l=63vHx63k9O z+#1z54FA14s>rRHRCU*?X0(}?E_M;>s%PDCDF+`{K9mLoQ_ON#U<66uy<3k96H^v5(_JQkitESTaMP2VD`eE z4Z9_dQg!sUq{o7X>L*?c1+bfldR1W>N1=Y;O$ih2@aKAt5;sNAMx?v|JH*?;?KUU+ za_&2X-zxsftfa{F*RY>}7lNL?D<-JMt2sp_Wf~s(j;Qrx{?yNVS2OK0A2s(dUuE+-i0gI|~to~kMV5mw8 zDM8g(4hHPM)JCESbkgi(lOLsTk~ztm(n^&v?%gY_O`rmlvRF`ms*tMaEDv$<6YL4r z!mEPi+yaZp44|u{#SZB^AARWMC!m!>X*bX+`>-L=@}yx?@}xgpXpFGVO*{~%-JT4d zE!m1HvW>}H6m?f05M^10+`H=S>j&kEG%GxT%B!e-P0-RTi+UCR&{bt;7<4@$3)X=# zLD6T-*qw?_Ysu)~l$up)L6PC5R2Fpb=jqLERr$VMQ>`GT%jAFjOD#dzRPBeb)*Zz< zaJwC@ycVD`y%%8CLKtc8EvUoIkS4RlInbn~)(>)S0Vonp9&ow31~?tXvaQ7go-v&( znqv%V`e!{UPcJ zM8N4a$NVw_c>%<<6xFJFGB;+K7?Gxb9Gj_FnilmKP8lDaVzX8CjQ+?<*iYTDEr|K; zz;dod=sD9hkXgbhnA!XT&m2B%B!Es}_d_W>O)O0%5YB{5(O`vQ6pcJ`cc82>li+WU zu-lW0H0s^>UEqLTVoq&P>0i^vTU<~~Hbk!0!TQ@{we-btx>b`z7M=tIXB>dr{Fh8V z(}2YObEu=Z5+1UC+mupK?&_7^Ja<0pEzby~AfYqYhVUXaMbub)icMiPsh@L8;?C$%Qki7no4h$RxH?2aI%(#@W-dp^p-M)E z?l$Az$$Vwl{(q`CaY*E0Qvk#t)=oS{$m6!)=dz&^4h2JQ(aE&Jyzqd+@p6G78%MAK zJlrXfEq=o&7q=C}6y7K^c2BRQ&Sf6&$fl*yi#!q_6kkrSn$=>$05Zz$<#NaB|vP9}vHnDd01I^rO9wf)T*l()=EDsrYl}8*l7+Px0 zl=w?uf4la+O!1cz{v_NO(56VZ*X`-n!SDNhF?@L|QF9m4Bk1V6B!Q!$?73m8+IF@<8^4Jkl2<1{jlQn=KejUxOC`8RM!rF zI@?olIeT3{)PR}SH%n_5tW4(-3-K0=+SQocU~TObuX2PTB8e=mH!E)(UqsoHb=(8p zw8v;r2qk&UH>v`~UAxTKK6$N=Q*6YQtm|qSs_U*sT#_Y?MTm}^-2Xa$PqTF~CLs@B z9jeVy4k6G1URh>xJOf{GXjUx_1h98=W+2Jfa-v!H(i#^h3Yh!(^030bQZrVu+aO0d zhFELGjgS<`gH|X(O*XLrLjU9?4Hn&QesClG%F2ctoqM}; z!*16aV#08e?C-x4uqj{X6fi7of^rh?5%u@8<;t!H($F#{f>y0hXg_I}CNlYVEZeg~ zx}EGGQ{W}>3DvO0M2=`EkjEM3evA{($Mp-`D_MADqg3#sTl`kC>}o*GBm(hhJxzVU zAGDjE?R$7Qpg4I&!`MBhqHY30bb7iKS~9^fEC@d%gmkk?^9DBV|M2up;gxjJwn@i! z$F|LmZKq>f9d(Qy+jg>J+qP|XY}?7nf6u-5ZGH8yX3beuqgIVE=Bl}iE7X(NqDhzd zXvrz-pMr)D%cu$4Zq$IBLZztsY-xpORr{1W>TEepGvNtv-TIiVl_=E-$|Ph%a7tpx zd^lDEDp*GQL85BA>1q>Gq|sUtcc3MpivfZrrD6~Z(Vm3JRQ03!9j zia&e-Hl9%$Y#71j{`*rBi3M>C=Bcp=ol*xoS4mwKocZ%zvWB8($PB^H8xYF!T z1@XMNo;8o1rG@}PeA1T5FTsgCCc|UV@^0~_5yDNzHx3aq!_@!o+QV&#tKH!)XFw8z z&5slK4O93-{@Qun{`*Lkxc6OK$jE8TzBHcup+ORCKW=WZ`n999A1 z@R{RAT8ba(4&o9WR}Fm)zW=wvs8)Og5W*pBpT`qbY?5;`Oeu8bm0sc7IxCZ>XTvBZ zOmIfG3nk@1g{FybV@U?d;dT*=kyg+AxzaY~Kn3XNOQPP-uRLl-O%oo4#a3JCh&+coU)WK`Wr0WBJ1&de!Thtekf^S8&coMg8x`G*v}w0 zll6{AP{`6b{nMIgl1ZQ9p`#l<6CR#$=tT@@>R^W5xd9^+t0~3*!|U554 zbr6R7vX&yP#c#NBEjDO$yk-yvAw51N5oXB35V(>@=9yt_>3y?yQhUEqvT8|>JJOFt zjpBC&;Y6iWNVg%|2IOXlerYUr2_3+Ri9prY=4ESeNeje{(b!(FTe(O)rcR0?yj}xL zZUvCl_vXpv6=?f$k^gC*2|w?k$%$`@-&JnIZ&BNA&RCBtXe!LIZNpC(H(g@#!Mtiw zQLLFB>5rxQF`7$mH$f3}t1EHp`YYYZBqXj{oQU7Yv|Tas27+I^l#oecbrrsF#wR(V z;tIMvF)By#f!w(FPr_6|X2SRpJUI*i5|cprwCuztl-eMvGZZKDNc=#Z6=tVUvTEu} zqJi2YjPFSzju*Bo=*XcT8*8&ahR`U)7)y>a$mh_Y=$T0$SfVjF;q|t0m>UR?7)P{F z7Lrls2@~*5K8KH^QbNNCPc1;gU>?N}kUA;1W1)tbW~Tl|wS?%=!=RMO&NF!-o}aZC zG(XW^tTiaJ%I(G!(ZjaskQ7i|XeDQfY#~>pdh7FL#vgV6081*(T2+uuqvgZ!B?2Q& zJrg_V)S-p8Trmsk~2<*OMLQhiPpQs2*cy0Mk} z&Z6U0m5QSM^3IH)#BP0@FMQwNg&rH1e^om|;<6=1f&XscAXeU-ng~+26@5tX%=Xw2y)^Rc=QY+`HsXD?xUkg4z?pUsq`OG8qWx#C=E1F$HZH6CZ(+kNjaIgrXipM%_^vKZb<2 ziS>qJpHdJ}oc6Dh`TKy|X|>^tg@5;oz#p=)(skexbO4sg@E~0)q%!4XJ=L;oP423B zIyB7>(t#jZ$lOgZ){e}P%h{tas{Lr@fI{|jIau-?ZrDOXg_Xh?=BN%yQJ)Owp)+Bj zJ4G_;S!L{mAc6OquusT@Y+pS)NvR@5vMXVL{v** zRnIwFRPk8#@&31PssaV&jev9w`k!Lv_~pc0+n%zmN*LCD!lT_2PRPzY8Y7) zeR3DJA%h#AWGn?;37*7w4h>2RUWSd^p$3IK@@PI1PtHXtTY+(sa7bf-1S`9=l<`KO z)CKeU%mfKK^_uTuWJ?9NE_=EC*I67@hkxVzAfX7GbTY1Glg0Nt(W1ejk0C0WdLw_O z;iytYQ88d8#}i$XikR-q_Ubn3hL5h#ZpdErk}ksi7k6)7%PF)7ilM~1R0?P}B~+9< zF!lPLKWPgx!y?{}f^Ha~fn2wq&PyS6`b-&>4S=Z>MZd+UI5O9X5#@CqvuljI-2pSFb*DwfFLUn_hG>IeHi`o6(q7{{;MfVN?*5g-cPaW8 zALCgI?f9UBdU8f|`z3diqSFiEXkwfF#YB+(2EDq+BtT$Cni?>?c`GS3tozOyq*<{y zmD#P-7DbPiI+C3Y$KLmZ=lU(s{-j7Sq5fSL^iw6+X=Bj-WMQTm?1Y3bt=zE z4il`y&8%U#lW=g2+)*89N;s@O*+^dzM?{=|F(;;kjA=o!c^dK7%L^cv&6UGU1Bp(y)0ykxlxc!8ETw3jIG*ScDY{}CX|Vbc6c^|9Vo$xb z;V2OY$_0Syhf_39XAWH}8FE5}N=KM#muTg_XQ*Am4JBIMKbl6GAr(=^+6(BCJ_U1H z+`!)?<8;J{A+A`OW|qXBw3!JMsKGiJK*g?)$(xf@MLmkXv8?=W5aplcRvyhlmU2Oh zQ||Zi3J-K7Az=>Jyv%2=(V+BOE)*Zcq%y!!%LJ@!sF!nbkE+~gBJRYRDUAh6(-Xkh za8Z^4fV@!=TYp7pe@ZzLJ@Sph96RyP1QCzP9PI$}h-rwk0@ktev>YDBv01W2pn^+k zF!E$}?kkTq6G*uX)e12lY7ECIx}&L23)L4o-{ghH7-{^o>@5^ub3Ud^w5YWqtDnaE zX8@u8Bc;yvd68s&1?P-ABqFtSyM46r9AbXjv;Z2b5EU+YD-Mf64>P`icg_A(Jx$v5>siT?Sg|@wg;s|4 z@kTvyH#8JDRXbRKxR|L_+H8OAEIW(>9pO)FxuE#zA@(K)wmaP_Z2Zy^>H+h9gqd_EBLVH(P2xug`` zB!ZQgR&&jiGfbOR-8>~5e{rjFIRe5qjhca~8`@i5pBA`n6-ql|@3{^*zuFx~I%U{( zd=hNl?=RR+T5>4en`kvGfpN1eMmT8}WPD#Qax|tYjkL1brMFPYEMl5O+c!Iss&v_S zXc39>Dkvh5Tn-M(%9zCjLCSJFEsu5ig&Y6O17kZM>*00%6d5iBj}1PNg#p%V&x5~N zYfqfiECfcuh%A13J)yFiOlfK9=#XZPjp~!q3D!wP%BYVUb18<4N~?EsnvTynU?6h0m9Xp>5OU@}Y#X7`8`ujmq~wA( zbQV5Y1?(yae7=(5Hb$FL4> zVsDuT)RYAlI|~>dGUNk_^d(eXCUi4|rSGbfJvx!UNB(*RE(QYQ(Y%9;z!?4294va4(u zw$E2#b?c4s#zw0aSC3<`Hu4Rej?rNdE#YNM`^rRmQmP6VFL#`CqywWkwBEH9_Z1c^ zix{>O*pqJTW-ti~c@RuPC)8Ft#%d$WM7kJJHAH?H;o20@zp_gF1I%dmd#g^+E_2BhFDB}eP%6y}j z>RO>_xvP~du&u&@N#sC?LQtNuDpQul)BXZ&jM*;mMMTdehn^=pR^R@bnXT6hTKkp> z2Y5t%UeFMMZD_o899G?d#z`C2#WF<*DD4rQRKO`K-GroGLWZm-Oy{*e1vZHr1q}Rd zf!AZD9o_T%x8WxcWI{gtZ~D)6*iMGR{q`Tsi3(qAzc|*kqj4iTgR^#(r-(Uz++gAM z`ij_8C{1M)W>%_~!6GEy+4x$(@ZclxLuDcjfy~~Tm)Yb*4QiVrHep*S{DtVd89fYl zU;=n_?!^s8NEmTptRv- z+%r&Ual5Ah1t7zfIlj~REV11g>~=Y3Gf>|RT>o@Wz*178YIh3_ zJYh^Vxj+X;=r4YDx%ndkdWd&Y|@4W(mT}_@tto@_Fkt z{{3;@%`8%OH#a<5t4lIQ@W^6ctd*);aWPL5hW(yIiYf@KF?>FZs4}qkpF@J5ZX1KTt_Mn$sO7@=VxB4Up?4gzqldizp*Pba3WG8x zzRHqU5EjhtnOEqr?yBmUC$dNKM3YM;+jynp5$5$o*!}*w^>y(C_?Rc`{;Kk#d%;6~ z`GD{xOPhaAXJmbHS+U5D?EzaLX6|GqZnl*3iu z;E(wcv*Z1uB5kWv#O zlUhcJbZH^3^5m2$y7I?;TfeR6JoEvwW0A=zQxV5>R^!-K2B-zFX%8B|)6m)CQN#_X zqAXX!pzYY{4IP39)4ZO-v2HpOutZ!_+v2cEO8}Li42V%1de*AEI(KYR(3twXelk2n z!Q%j&C|UJv_x`%e5*pPV$Ar4JMCOaa!HM9)15F994CSF2by9tKt?cCf{T|M&{x8l_ zPc?qAnYRowUKwj5S!^ovJn^GW+N`1ju|EA+^&rP9or%PUSSrS&{#LAh&&i_a&rJz@Uzr%gF zd6njFk!m?KyPH9cK!@YeQ`r;bA=b65u{0t-jq(P${LQAMy#RIkE4eoBEd{qBMntQ5 zS#{<{>HwLi%k4+jUqSdR_PKO%j-s7%O9s`TwbnBVjKk6ltcKXv$Sh{zC^}dx!2|$_ zpTlkp(OgeLP|VD;O~k`kEM4c!<<+i6}^CXnB6<2ePejr_iag zG<1ga;?x*r*;5O07(6SXlBdF>Qo{9Y__^5)tD*50+mI=9Rkim9rM|Tsk%e*VGE{fV zz_`NY8G+B3IMU?sAIv(+0GbuxyZ=i}%ih^{Px63VOkLJO^rpFo$u|F8Abi+6C^0WO zBTLik%a5m3s6eh;H&!{O1N4Q_t-67)o8?`$=Xd?Dm%Hk(1J17rz*qf5L;G}9%tYzK zI|OdY75}d;6m0dkEUrkeefjLS$X~6O^&XoIx_$FEI1Z~SwGH|HfZZAyUf@B>(K))F z!cF&4#sVj?QMgC(eOWPYJhuz=%Ue)CHn~#)al}61G)A|Qa=N|`Y8KDaPY_#BW*u*{ zWlDb5%zMThO{w8F{y}pW?_JjRZy|xsYEsG@!s<~OfcRb7+H-lX@ocw=*DPtC-`!|u z^H+a|X!a2c>Qt@(DE|F*3lB*PQd@?l>qknzj1A+YfN__(T@nF?K0H4eiKbs(Rb%3# z7tL>wk5HE)Y31{fLl+`-L5)yy-dhf5^n*?*+>3Z9yx12L37*ZUdHD7eBesB8+@E`0 zfgQlJjdS!jVKvJ^YF={W6O)lVnuDukK9)FXf)-V#8ERpGcq|E@&qObMmPM#)emp*e zJ6HU*IbzJq8i5D6l4p!Vcf+zzGSH`N=>k8HEmT0dEctKajf0taoNkH|1R-MXEhuyD zlAAS;GQQ_jnIb6(HQ@eJejBql8UJzQj5pb!(voN5hTpzz9Hv?VfVNHDRAM~<%- z>OlXEPz1(6^#xtKFZs{R47wPEmj8jKUt#_I1a8?DoVmEZD9g57KfmuH|Ms8e=4GWG z$8cMsBP=5X)S-~%2v2C~qZpJ1aZ?rH(O9V>4GoM_el|rB81ORSi~>&l+fC|@#Sq&lZJWw|FBPuS5Oc-bL!=AiP zP$+Qj3K!48S=rq+RDWd$B%u>uEFtR(4l%tLZ!5KuT1BG*G|In)vxtY3&dE^@1$%Xt zv0?gf)9=5ZiL|YBrehcnKqnprf>ldyp|r6}K#xjLeD4eJ6N40o{#>OQa86;ifUoMh z1B^w49+0?*Mgq<912XMxtLTZTFrJXV0r|Ur{wnVeD}}#q&@gu9aK4`WfUm+vNjb1ldrQ%UuB^Jyj#I2% z*gT4G7*R8SeGcTY+PHlMgN5R8baWOWD~m8_hyxNclDpZ3kb5vo5m#vJ}+FSGP9Iz6Mnxc#4g}nM1DpFOd+%Gmx5GR*m}P)WW6i z5yF)+1AR`n*5SKoINwFXu}0@q)0XQfcc@bX`gl;{h0Lhx7{!utYT=~gOFJw-&oZ-V z0$rtv6*HEy8`&1+*yV64)|pZf>ZtO|L*VPa3t)UqR6N7vNKqi1U_sOwe{*#IkH>%Vzp!`v$0zX z2W1C42W6oKB3i}iFdvUcPSk?5R)|gl;vgy#q$_-slu8E`cX;exws< zEYUSc&>z8{eqtZ{{|&t#%+k*$zqf-PD$;kpWvf5`>;kk8U0(TFpSKGv7wNll)}BA} zyA3_!dhcEhbKc?^zeVYMrGA=E$b9U*=)Q@2A!8FRMUql`!o44bD)+EAN^*wyV0Q+`lrns23R((%|SSk`6Dl8<59z ziEn2v`(>B-FU%WKg>UpQcP8vvz?|y{Su6+>$(s`v$Sz;&VVr|O9YKrVJwk$N6|u(3h6dm@ z_d&;BXnFI#v#BmX-mRG$dC{wVOY^FW*3VX)L3bbY*92Ki`w{H@6Y=tv)EWXFU+ho- zp#|`3FaPY$ek>tph8TUh{d@d_}%h)ksz;11PHx%e$i`EvS22P zIcXK37c>OXb@QJC+iuIDrm524t6;|g4>w@s_NaPR_9nS?NEZzfa4uRdycy==!Eb*e za*C2Fhn0CE!aokiB}_L!FHQy0&}sAKJ$<&n~tHezN5N5-(!djHxDg%Z&G96jQ->9|^z( zK8F(|2fkB_TM63;cJOwGP)*kDAZht3vOWc+MPLN^R5r7WJkO+jH;uh6BlcH@11sZF zk$T8va0&Div+RxJMyVvWT~Rx*UXUHTorbuE5JOaT(!C4KK+FOy2mv~r`Q7=;7t9~; zv#m2vICh#b1p0Om4g3WR5v5^Be+xhluvM8an;}3UnN+d|%mk|oi@-~j9$1>_zt=R7 zke`_Ud+UioJn$i{1%u2@NSsdxhM=ywZ;fvYNRA}8fVn9_t68U-&#QvW&SS)9Pc%@P zBjzm~mYuGsvU+}~00>Q0xcxTZb=StdZomaUeMLYjYd1tu2|Z`a6q_e3=>UGl@l}%N z$j7k82-HhjEbE$Lk4!csB(&!B;NaBm@IX!>q`KJ?M&e>z|CxAEgbl+1YCJ)!-b44< z)2JADrwU@d#GD8dvubihhSg{tkH^I;__~vmn@z zv?8{4GZT_1k{Ts5f6Ihkg1%`s-EJ@vt8S}9uBC%2UBl~=P?LcJpyZjFiAkWCVjCOK zHG;c)xZ(sEsW}E2F9O{phBAc2}!?RepSR&p@TL^ZuY?8tvQf#3r(+Naj z6fx3?z03(OzUaWMv<57>I2TLoses=rt&s8qREsqr_fg!KlGzjCafBY`HdGKjbeISh z63^cv3X-v6KF+QhmR&2k6nfZ3H_8hz__~IDHTQkWda1?j-vl-RNjNN zDgMGq$y_cjKt!Pe<5m+|IKbb3EX}p*Z#ryd{D#b#CA3t9c__r*%vp|YPm~I7fp^&K z{mvR-YziZkvgPJTU|@2>=4(CY>v>q44^?UCSU=HT%i!sP4i?RyW#A?Hi{E$E|vKE(>}A7HMx9iDGlI#=1B z=2$(e?ZptmEyh`JAxfoBLoxmHs9PNLRkHejX9&WKs)6MmrSTL{6+hzJcn%0KGaVJ; z;#6^_$aAoXtLla!iZ}M=vK+-#Oz(ER+qeDC?vm+R`5FHOTIeNrddCX6a`U=)sui{a zV5cMNMNjLWsYT&MI(J}BNdDz@2f9sgD0E!2N_^Y|HUDkAfF1Io|Jed7RJW|*MmpSQ zKH(^5le2bSYqLru@F$Gix|)Sf)0@b&DAl?m@ro`P&;zk_6EH{<3a`GH6QuUf-mg(J zn;f)udY8UK07-Dg-tWPEr4PLC!P3YEFwt-~Tx*DJafaaeH`q-P_7G=mA4V)AS@ zY~4_El0A7*$5@l~kzHrWMzN(L8EA{dMqqcr?lZ(6fWuHun@wLXux+nt9{jim7`GbL z%5WK6vSM#-9jZ&Wrn4TZ2p&&IfK6ExhiryY9zPX@jJVhqg$x-#4L2V@MdH#~N@M+a zuG4y5;Bg?Jc*J}g4MS;K)>w)xgim4rb^(+}q5MHANM1B#iPP5ltyn8qjF!DTU|x3Z@+1lW;fE8R*pPg zh1652u!pASxz$@ZUHNxzT~#j~x-Au}x%Qg2Ndb?7R+?EfP$QM^S^)1G8_ms^w4z*| zhQJ&EC$+k`d?ndMb)!W5rF07-UUIrAyCa?TPjVavYYr-%-?(3^1d{aSXkTF)b9HUI zt97qnh$#u>08x?gVoa@8?RU9al-d6&}|8>Y` z-yTle9!=(GCzm-6c|PRJWDssat$B6NQhxe)k;brZ+3nXIzUG91W@|bldvNSxLl1I8a%;TN$Ft z8Wce!fU5nLdOCUt*~;7!gPhZK0d+?v-po;#d^&O-32a5H64W4yqXDOjEn}I#KFKoJ zt`>NA=~fvHJaP4(J$L1agA-l4{?&MS#bil_Jb82QafbA}RAj#qi4E0V2W>8E`J4N7 ze@O|Q{~HFtj6JHzfgPNFvaEL-x#R9UeTe&eCA7J1W=j5K zT3>;xwo5Al+?yvZcj!hj$ipgj9l7 zLQlOgm4-m){C^td%624kbCwV5(7v1bIIwGU_ zCl!l>%2CzaIyb`sQR-`igteCTb z_Rbzq(%6@dgH-UL#Al-Or+A6DcQZbkmkDytp6Q%S%;SUOiDA+EZa^40``{@SHuU&j zK5df9o%X_O{S1i7?O7eacmA@`y}573vftC`S9ZkSu>gx05ZMp~Lv*M|hw|`gW~*^n zV*?U|F_c150icY83V*gxhcJ!KGx`o0T)7Tq654>=(RigDbxS?iKeqkW_=6~AcMG+| z^?NPsOH!7Zc|u)#1s$3EuagS@KDPeo0>#f)Ax&o4xmF@NDi-j+0P-Lb8NrAR|HrTa zH1wA{!aNI#-1&=$`;xuDREmXYTOtx-}$@=PuvR@|?`uT=cQ zf?Es+i~AmY!&m^9CReT=?6CM#8dfw}0kf4H`J3zveFqI8(%PNp&c-C#y~dNN;Hf)kB0oD&6iQ>yLJ4$vo!y#52tl z`HVn8lo+umTsAuRyVjN7hNVOLd*sp$>-zjS_jNj?fgKr0d3;h_DZS#zY#03l^Xr@c z#Q)Z7T#{3-Fnj-Qt6K}xC!I183pd#<7koW%?;QW)F z*G%9mQJFR@rZj0gi|!5~Xr!fP1f-mGdt|pGZ5#@WqBXzOAmMEdJ8X=4^Tz15Pa(ND zEcboW>y4{$f6ZVW9a36Yea|Cc%kim0X+3&rrb#|WArl#1x-?YC(rDriH zUe!B@iYm_v5)8@R%`k639#D5%-*}0ek~Pln+D&VtmMdYRX33}6;fV*u1IPuR<#GO4 zC{Q{y_w*xtS##cS_U7hZ0Y20F%}g~9KYv&uAi%vlrW4csp-F>aGrrnmH)ij5b{RFn zehi0lYLMhN(x5ni>G#o@XGjKcWY;Wb>bBR^G(`3aWBk^fh-@|3A&cokj?jR;pZAQ{ zPg7##+i$iYeK%8Hw*3>eX7-TMr5!o1cZcp8J>dS-^+S}1jx7<4SI&acn>!I zdb?cjr)o6dGF$LJ$U0avnPHQhUB6(@6O`6+^{YWvX6S^!!4RP#L=ir17?Q-iq5iYN zG^bY5K4g+VOdh;S-5FPvrFTGcV$rj7xe-vbETBMH>$mim`Q6BfsX-TFZDYv_wS@^X^xheKZk# zBuhrJ-NLP+EO>H_x6)8-HYj=%+l=y^%FXZa?BPkpwYvi71VC^?;EHlyfZpFZSaU>~ zG4s_o=9_??m2o+YUo@_KZe-VbPG5u4TlwoPH- zPE)&$KliZMp^?Oiz_Mlzbd+dhCA~z@g7U46Qpt~w9wxexDC|NA{|dSx3U#7KkTWF+5<8cxl~uEx^K#fR>Z4j{A=Eiv zY>pYIPV!P!3iQ5B6Xv#~#l@|V+S^eZbv9Mf8;(-XAli?r631$29urUrAePQcsD?u& z%0JXm={4>z+S={|Ny|-JwGz8<*B?w(Yy~LFq1)_e)__q5G1mR(3*vU_dwTqjYk%y? z!u{rARUg#1qV!{@iy96zCo z`xiK@0pfvf*=RdRJ9LkUC}?V6H)y`1XX^yV)DV`zODL9swaN2n{LT~iSHLG8sU8*l zl$pd|22naG0XX+fH>7_0ui`$I!9LH%BTWJhBGSPSNQGk@ zhoJ2l_P=I4V%C>rRrr!ht0*uc{F4cS*$vBG~BE<%G#Htxh)K^JX zJ(uiO*n75Z{{)C`Tu<2Y$mLD+tE>Y~n(R5Vow@W&$o~GNt>q6=S}RWBOmE4An?i1zG64qB*%as z6}P(tJ+~;#Jxi(g323jjn{H}lmr-+svZul-xriQU@`jg$@(uf;k~+6B<6!1Bfxn$P zGG#H(8OVLQcUpU=H;lJs$)uAMT*Y<#TCNMIxCB6$FKofd$V`=K{h=Fi4CI4upc>gt zt&cDdju};W^Sa~a^f~Bs_J4ek`>g;DZeTmM^L^FPF)wBB))da{CIeEm)%?4_9tuV3 zeWC;y>8|67t6>|FFx;4@^A710AXN1nQ_DSk)TI9HImTIxY4e61(aNu-!oU zi1)Hye36rrJ!X2~o>2HgC9D59}MHyw;?vJ0)UQQ zgwt>%-a3p1otKu)JR}Q7)&Sm*(cAkaIE^5!5;$Fm+>NmMe0rLKmJ@S@RyVOk6d#gO z7;>gmdh6&KqXgC4SVA(;Ip!6rYG1=@pPqR%fo)9_6Md9jWbapWiS3gx((8LX$A=QX z$-}PN5XT~NdP79B5{3PYZJR2E4d1s)gX~~!AmFvq0t7pmNy>w;5(RXyj(VTXJX} zfH30TZFc$kr=Z}QmiL*TOWACxrKDlWNN&`lrg1vbT)&4Gmj(9!e2du1298)x`%68E ztRO(wZm-Uh{>GB70OyXW!@r0BaGG5o(hMr_svPb*W_dGr~|< zAa{aD{Pa8yU(Qx&Xpc>bUZkQDs`h=e{>PrcJ22VMoU|^}+0zz+Af#UW4+3v%2UK>z zufT0h(&Fjl%%(q$Gz*+0Fo;IRtD%y8#p2N^t6}IF^vt-ZVUnJld8D?5}eX_?FG+p1L zPXpU2i&npXRv=zS_xBA%nv|Q4&C!}^?0^poe}g!17H8ATrLR%WOdI1ACrlL~eKGT@ zMfF7IE;SARk@eeczZ#D?{Yg2wX5q%Yhpr+4vx2Z1csTK9ep|fjmI#@kT_jX&dwpXFdATC?R09WV> zZ`C}8jz?;AFD^;(!p2VKM)@%ry`0k-=9hqBKe|XsH2j8c zcI>RgHzgD0*=||1s>M!(=53FW%>cOh# zwG_Nc4^KJ0ik8M_87xFSF}E*@UEmpeOi#s&j&N=z^D5@J$vS)s&HdbRkoTdnOOppB z){`}P+=;lkNTmBYO?S6b2I)%L*zm`C^&#vtRK8U~JrL(D0V*niA3Dh_cQ<1L;-_NacnylS4L#3W#GfjB@YUR&8cs=oztTaTioM$BMU@ z+9=5zap!7AMzD%LSct7NMQ4RrC$pyKt@@-zxj^k^a*(UI0Q~S<3*9>);{6U_;vr9m z=`!e|2Vf*GIO=EA8QX~LiY!C3I9J{dKboSDDq%19+4!l=eae0*_6Jb`R60!vK!PMe z)07VoAo0-L%`)#ncIl|fWr$(>kyTrD)Z-{6I@wEgWw^L&!*PG55NF(`@dR_>BLDSE zI2y(N3DvO22RO^I_l+}b_lxWIb+Sbe_w)LZZ!ka{xrB8cVcJvqQ~}HW1OxHGMXQ^` z=1#$ZTW&_t)5zPZ*&@p2Y>&0SY-7C||5@N~JKCJ!sOt$c$}LNd$60$>#+{`5CWXJqnNY0GK1WT;zJ>Le{8a6%5cLxxSI* z&Aaxz`Jqn@2$_GB@mD7pu50Thtq%qpi<$#BO3XLy-YWPeVeU*EgW7@G5td%M#z|Cb zX_%WSC%0J?%%9BaIydexd%K6qv|Af+1$X|WmS-T{?&QA@A4&JN$)D;U*Y88xTom== z1zfxr5GsOIS-XWf?xPLR!LT~w2#-PbrE`BoU(MT36N)>m+ahy{CnSi&K*i!>q#Wj$ zOxnu_Lf@QQ^^!>oSHSrD1c7Zjuq7B>_sr69w-$JK;A-)xu#RjonTN{lcKPbZkI2at ze%=IMOaMF9->A{}hzdmb&_^giV8!u%SVZ08bZLyOd+d}cg3TnovV+^k*ARy|&CmMP zuc;7gvBdXk0)@LZsSZOeiq{CT7$kI7F)+3yUJ!hgt?pt%3# z&hK_^8bP+deMydR!0H?&vVez@0dAbw15%E z1Da~*H$*(6zuI`)sFhja`2=$Ha3vckI!^Rf@?U9c5lpwf-%8a%$Y{(sA>mmS#5~?b z{m>4yocIa3Z}@G!Vn@*PkiU+Fj4zRy8JnafNt<4f&F0LbF5PCtAmbiLc>I;Y`>}Tc z0oUOQ7oilClML z*8qk!y6mNp>R3>7i=r|9&t+k1xcAKJlyA z`B>~L-Gzs4zZb#Ml6=Q!E<=rvpup;XvQG51-y+itUX`wNmhHN`Ym@B}TXUrBL(V2@ z!UMUZ-MUY{0`JOA6tC;L59#t*(buYpkdV78Np~cnyiOa(M$^z=%$tBq`>%e?-z918H&0QMhV_24beooc{CNnkm=gYg2(8rSER zWwZ&$6v1<6`@uaP<&QK-hdr zzs)6s)BU}xZ)*}ccG8~4I9Iby09U2kYW=_r*<;Wc6>|L z45t0UL7}pc5Am@2lhK`Db&TWQNQTuM#vEez+_R$ppes_ZyD_PRpaO?-Q;gwS(f)_2 zcZ{y&d%lNb+qN^YG2z5^CN?L@#7@VyZQHh!2_~M{p4goK{rtYqi|1wEb=T^1s!#8# zy{oFLU9smZPjjc_s*t6b8agIcsiqvJMAov}zH!2$JflO>E(F zivcmRh_Y6WkkqjBL_hRz#!b|>PkiP6Y6TR7+wkrXe%?8gF0P(Y=Z!PHMHiA2=ElXix@p%bK}&Ms;IOwD`> zJAI0)|KM(wEgfVi`GIh8R{a&aI|?D}XYLZ3DrJH8gaEl`I2i(;16tQEWS0Cx8F$~q zk{!pfz53oX86q99s6i+ujS3>MG#(_LcY)-}{OG!5nsj~V7fx*9#Rjt7JcWeHUUkId zXnxzm&rP@M<`ujF(>lk>yH(&8O)$X+Dc5S4&13oGwhD9AuG?9oJnyySFpDUF&O8|R z(^&CCCwFH3nn8I6k5-#x=G5d+h*aOTj(Cz^5=sba?pKD-S z%~h>zi(O2m#{8nJQ`U2LDb8QZ=02xUbRn@E6)S#KY!cq0SM#cfbcZq%7K<9oX82}R!*>FOwu zg@r*!@x2;-BP5I83D^Xc+$ z`j>!>tbV4yPci^dR2xz8b{HEG7m`doYWz%jWg3d3bQHCrk4$l}Tx?dbmRJdg zWEwmHs^9$bU6G1Kguqo<_wb}`)gK=u78GZ?aya7d_U^~~0ncOj??e@rKR=wT3QWme zrW$^0DOoWiKbm3^VG9s}4cS4GDV})7b21=M`T$DwlnGcHh?lN@`rCy?LK)mbN?9SCIwe z<82{8VPfQb!2|A&7Q#{Wt506}!QRCzrk7ocz`SrT_Dk*qndr@XZLk>E-*mK*hc`8H@8C{Fgd{OXmEJumDs5J6 zjxL9V^&kC&rxa;q-$FV4uckI0RU;P_`RNnFaFZr8d?g7 z2#KnJPNstGkGG}9i{&>EL^6qR&<#8qy%59?!KY@c3S|FKHib%=mB8Q=EN)V#({JUmR`44Ok~Wk$v(GZC{DcQl zz;E7f;Wkk}4qHpk!s(So>~(A3*Er(vgv8bV`Z@NQ6DgeXznB8g1L z+V0WkU&3<+lSc@;(8VF$p}WEHUyrO4OI~R=z~>FDHd_C3RTulq$Fr=Q)swRzKMMrLA5B^w9MuM*CF zzj*IA5z0If9A&BGRI<|G+}s#3^|EK1vv!Mv=Y~Sqx73YGJ2U3kKF*QR*nM8wJ_Czl zjZfwcuqQy#?B!3S0xpyf3xLlh>!()88sDX81u^{cp*u5^hNa>=2Vw9etR*$MD7~L3 z8G)s6RYWsyNxSs%dKL?QXTD;dEd}3i6`?dE=u(rHx>8ncA%!Nd@-UIv_NsSO*8w^{ zLBii3h$yU1<@b*YSkVxL0*l}NM}OF&m&@;UivpUM9W}}a;eG+h+c+*f^fb>MvQ{VB zyLfmNf1DUb(J4?EC35)oLXjR~8B}jsuh7)yNjg6p80f1a=hZP93e%_k)7p7B;gJ&s zQ<7(ztEf)t6dADn&5A|nZ3f>UCEK$jvDYs6@I0#`JB#UbvTMMth?B3v7q8dbl4=)A#8k|qJG4GE(beR zVqbk?ov~BasSPp332U9)k`#u>!C#UbhKPaTdx7WIz3T%)6cP4hF3m)7Q)fE`YLMHG z2f1w#9n&rc76v!BYge7GZr7ml+n`xYcGy=`xPp+yAedE8QZ$pz3;CwT*9Q0)lwN*j z*XvFDuAnQotWEoWA4>nuS^i!9^OIWWyn9@lo)YMV?d|!X$$R$p!ui;vPk4;oto;$-fas zS(c{W-)9wHM^x2Vb7F>S9a27RUBr4{G0!S*cvA~px#9lr+RefCDL)Z&gg2O)@|=V= z@3>gTVf4{!Y@lts`r!B>YZtBIgeS@V$>drzcR@28F25rwW0)Esp!RD@ZdaJC5<|Aa zAb@O(=~5iLKiZH%dY$>%HVsv@sV5#~Ua+lx#!QqzADs~w$$s*`i z7!va0`{W`bnjZe~rJM}JW|>L1t6dLOfyiPtvDXurqd(f*)!n{yCDPgTP6vX7h*K$~ z6UAKv>SWuQKD=+*tc5TtA=e#sj(ZY5kqkW99@uBhyc7U0pVV#)*}LIcGT!?XBwY^F ztZyj{@yzd4j(;Jxm4=ABqa~^2*l7lU1(C)Jk<`JyVDWPZXJi2x;uddNxVDhmtQ+|O z6lfr=sY(;7ZtP!M@31)|hdM4C^M)1W??N0Tf%J)S16VPFV^`gr5=!~d{AiUk)u^0swU=8 zVV0>F51Q`H(hymP!9^O$U$7T#(_exIwi6Jr}dSEzGE2pxebPI|>CTYJ={yLlB$_`s# zZO*8kiaX*Nw;z&B!MW2X_iI_UJ^p+$${v#Nwq61p^`g)w7-<-m0}GFihMw-)9K{Qi z&;T*YZqsPQHWRIV(q*Oc_S}IBWkky`7#t5@)<#}Ky=#sBYfZlg#?RoS*yz8jWb=FN zZ@|`gUPr>*)2r#9GKRc;>9yQBw~)ksqF8$m{}U*43Z}aZFaM5YO)C7LOReiyBHkSS zSrFGSVn=#z`!vT&_nF~emH^>gDM;o?UzWb?vxvQPrY?DhWk>%yn;yH%ra#6f*nIv#L>0zIRc~&e< z?Mc6x2>x@URO{xRv%@+Z==dcHtFz`o^=F(DHC75*y3rRei~q338ymt+^yCW?K-Mpw z-)XB>#-RZw!=?>EKAuD|jOW_ATI=@1k~_7lC$=$^2!p7j<>E`c5-Uhi8vacH>(_>@ zDjhUm^#m^`X+GPt%VthEB%S|^#9&@Z(!x7H!J$k1+Rro}OX#iS&xklvx1d)tzO zf7UR0+-X(h3zHe1gs&rw;Id``K>iH1#(K%^h%il4R+gboB^ik|whQW8m!qfS8`vJN z5v7k;%KuNvnkR_RFE6L;1L;cY35%3u>dH?35x&hLszi?uRN0(2t>2Clnpzj(!=kbx^B!B}KqFwxaFeYRx zfz1KPyiR2Fp}u*v6GewIw{KpRm02SuOUbycR8x&rWww95-0n*yx;;J0_< z$w}*IjD}xW*8!xIhU26&cSC+v9)WIy#+`wJv8$s0vg`yO1AEboL4x~Dbc zQh3||pNR?Uzvm;at8{(CqpaNN`<`vlU3nS%g)JZ@Q?Im}cs5T$hd(uA|D3-dck@X3 zR>DW!EiD*fAHXexi45;l4Iels@?=z0hh2{$!J+!9LYg+>EhkCU<>48nl1x%Xc=$b* z3(n_T(e;x*T*Uop^O`^MDm1O(V$?tvPdJyP?6k}3NSp@!w@fm0sT zvbRM8j7gD`m=5N}KM4zzL{wid1i2Z3vUdp$1cEUc>9#6(pYj~)%!M#fb2Qic^y0w! zPWkugv*8k!9?sL2^Sm-+#=+1b_>NuOnx1quQK zD#Jf&m%x|kTD-H>&hi0|-{WNKsHy+JIxVX+U;thyoz&-$X98I(J3 ze^wF4FA(Hlou+~&vl+gEo%b?S>_Efa4YE^(FU|IuFLW|en-{L(qd{Cn?%*1w*m$+g zd+*7t_z=I%Ha9`tmOV~ea)Y6a%D1e2Lf14I zgyym`itrqp%!%1sF+{9NH0v@7+m1Qz_Viq9O65D(!E0fZ{jH|3aLA5OK{2R^u_P3WGLy$e2 zDt-JFo!ZY4?Tn-Phpe$>UpE$bbMwMpc*6_~-Pcrq>}6WANwfUtJM3E`Uo9Zt`)78( z#ta?4V+n+WcNF?u^ZPiR3U?&iH{=~&>2q6==05p|r*iV*odA0a`DPGb@g1K;v5)13 zEdVqghl%XKPl(rwMS6=Q2e3`KGKZENGU7Mn?m)IhQQo9cZ5Px|9$Lt)?q+D(ZP^j8 z@IuvEA}`d;og(nVLO6TP2dFv{yleiV>%kR*vww|0AvjS5N(#j)lL~;S$adw4R<_&d z%Xi*UJT7I|gQI8_vhBIToyZ29>4CF0n7eh_*F^Ql_;NNfFd8Td90-_u=0t0IC-o@J zALzzH$fKTi~FUawXBnYY-9 zuQzqtxtyGxv+rq6F_%wm;K2;LKL{!AMr^962yX)bzAIcs%i0`Jj_heXbDeyoDk0Hg>8U|RRhiFC=R?a)K5stNZGq<;MQ^Kyxo(1c+^JoR+VR2>EK=~ z5(&BD6xP6OC~eS@z2(4mt0Z5_GG0U~5_#@| z-SstLg{M9&@IMSOg;iw-ETQYuW>2&|MbzlyEHDP%D38yBcUB$G{g6dsb{&57<+3VVC&duF9=RHpnT^R>d$2IYBAhDHv2@64{+jiC22!x_WM? zqsL+FoyjDNC#r-DajqaRK2-(nHSOs4bC=YJSpewSa7E{hw{UkFKA*VdA{;2dLC{DsZRJP6Eagc&J^n z0Xs0)-(a`a&z;&hct3NCjnafSxaj7%^oFSso=;Fe0Z*rh8O{M3Hn0H1W5l-4`NH)F zEe4oy%B))C@>s8?pS+q|d@$pbS1;eA5lSvtVaj8o#u%{1pVaBv&5Ff}nf#wW&$~#0 z(3O9(bT@`iWmmnx!_fAY-}}p3obc!EY8mGO)!_SQQl8M;{au&`v`B}GQR*Zg(X`3x zTzrTbz-H$G&caAsc|h;#rIM)i116BzV<<&ee?r_fsBc1ft-O=`*HcVF^{N%prDRt4D)49{&WoZDoyjlHg!;1HMlKV+t^Fw+e-iAM zlK;PFUmAb2e+hoN%r^ir5XLufHtD8VPp=+kI}4Yf#%9=@gXtp~PTJd``oQlq4v1wX z8-13iVSFnshI+XoQ+oQG1zvYCP5Rcabm0eRFYg$?e(Rk(FSB2ihPAg}l!es<6wDnS zh5{1Hkj5#E;ojAvBV}MQif4&aNeJt1BuUncHhEy=2V_)ckx@_;DJ{36dA(8d;j7D2 zv+k2@iBcr}fIYL48*S;F3B$1rpiN3AzK@29Zu&b>LlOe`OLQ~YzjrAN+_7o~Ttzho z-()nXn!>NHrXJ$|EQ+Dx4xm@EZgRMJ_O%#!Ae)BF_Z!rG119QBi5928|C;Z|B;12_ z9i(1nTgk*vPf!Q~IMz0+rsNg6Gl-*^+#%Wf#>KMnF_wlQW>z{O!Q$0(0H;yr0D?;Q zVu^psJNR?NZ)LUS0F-By9xaRIvYB&eLsYV*N>-ful}YFnom-p})LcxSAUN^2F9g56 zej3-S8e4)d-1ozBEcNFK(KOkU{D{jYoqE-(FG)J^0Y*H8zF;LavM#s4?9PH}kS1Xu;@|0Z&6U^~aQ=`Sb(Tlg-C3{Kf21 zLi2)?&1Gv_e&QyD{l`WWKc z0~qdCvk9PYzj7fk1Nc>QSeJ?}CyV1H{~DEBzuhx=kiSC8sAn;O)kBk|JDJWqact-939vrB;_Tz9%{63yO7G8QhI z&dVLiIHKNI63;39yy;U=Ft<>&lh{C_V0`J4;qG<79`KzG;L!6IwZ4chQUD?a9@Myo zN)llkHKQnqERZdNkMx=_7(7SOF8IWj!DG1?K}diNZx*MH=mzB*iaA?2q6ChLr}B*9Q_JJB_{F<2KDpq5Dk+MryjS#e{q(oUnQ3g*sW#%L(yz7U zhGl(v>-f_3!f76z!2D%F|GK~Aq0VqVpKx{Dj5tT*l-I96W&9zv1yV*%1m4&@sRBa7=P>3IZx#UP>@*PS<_E5{2+?tq0Q!B!GN7_sSo`rI*06#`X%ojstgx^BnwT1!v^@9 z%nnXRh6d$$_RZ34zH#{G&Argf=j^rStXZSEzT90nZX4wgwXv0;R$?^_suVd`br}vx zo1LO)o$Ov};KIgx=nd z&hKF#hieV)E;8#C=UA2EH~$iR&VjQhXal!oR*w4JCJRFz*G(<(Z6On5BQG#{=j!u* zlLdGO^_tvaN;X|&ejm808Aqsn8ie({G^iQg@v5^h`TfZSnRCuR(5+FC7{5a$jdXb| z0@4SRX+7k{@KwPwI-|YdLyrC)TsLA+DKCQpky$HGR0siDyS3I|1~jm z(J%>L&!IGO;K9O-Z*uyv-}85eH2IuN{P5WfX&2W~ElV~#;5WI<%?REEH=81nd3L8A z&Z3SJsbPgewe20hp>R-C5^z%x-na_x2JY{lh@-`*HX%L~m|cs7cp!f=?g{~o&3+!G zS$zasq7eP-o%&_RhtX9Fi8IiBvToF$U1x%O^NCrzcRtekZJI}GR)he>QKz%0v9$c| zT)^j5Wp~VwH-3&$)?G7%)C$Cy0Ehv#)YE*4Cyj}6XoUqhbN%i&*a(7pg>qJLNE zj+;bBcS{%7i`6w#M`k~gG-cn}zF@>Bhs=l>}A88^p$4@15$ zMkne_lqWTMFEBdP4h zTW{$vxCHYmaFv6M5Db=%NXm+V--!e3F<;{3I*JMJ6;Q;}jnIFV?%g6@bOzk`cG zVZ?_dU0{4pgkV2H-%@N3lNKOTXSP1@YJ=QxyI1VbEile56SEj7=gxBit{$G0KnbmC zy8Yprs8h}ZC(Zc5UxBtPUnASKRpbI+oix2=3mQIEL4QamLnt4cLIsMWWk66Sb98+c zG*+dLPVD+mQYO5^Q6xQA3MR&-gEmzA1yKev@?`M}s}%PGL0_y-$MCDV^v&p0Je4pz zB_6kl%{w+1W?+D>Qt)0GG0nJkV%*W*fO$!*G>RmBNAjvyclQ6Qd(sta*)>l0gc|RAKrpd_pHvT}2K98^)sh zLg*3G*rdqS8GMd)+Al)!v*cCB*As4##qXIRUe~>(kRaG{_)}XJ!h|iT8#j^1!BluH zldwvXY+Ee`rk+SpGJ^pVJ4s&0 zi}_trR-&BI6|@vW{YB-BhXjxd0hpS_r92E7z&(>C1|DIpx``1HN@Y0tCcCi;l%{*V z2WKj59>NdGms}I5Hax~rA}eo?{1{i+%KRZm*+~QZ;$j`KEwl71wBq7i?T9>|Zj3O0 z+{)>#urJNi(k17e_!UFu)6xJjv#2D)Lyh|V)|6C?!#5$D9!%JO9PT#eMau$QO)8Id z*6{m{Dk;OB7yqn(EuuJsU4huPBz1vL{2ZkTym|oXc+viId;SDitBbjRibPRulw{u- zowKMK^!1)hf;8P{J>!tVHtnP&ajjWVtd81h`JHN+TRQ`xYkOi0zaG%k8c$t>AX}u# zd@yg_rphxU+KNnFrjF(QYT``Wi#F!bi+SzSk)*0Z6e>`jSgAo9=sY?bG`#Vy;!|3y za-pt{v0Kn6t>#XGcoPz-UM=i=ay3xYCxo36-1v#+vJYLc^d1oSO{?0t#M-E^VkSE8 zw;9%4an2*&-={zhMGgQKfkMrzKWyXcUtGKI^@gs5*jcHr2jyzhf5uUZ9SA&d7~|yC znoebE3>MV?Eh_nkR3sR3E)CSsqr0{zgzKbu(=(~Xt&0eQbO06}43%?sIqUD=ODn`L zQhrwuMwGjjlr;a)r(bU3(@qa)=bD|OElgBa{C1&!A|A(NzzYYw#0O{RG`iSsFnq!K zAbb-kO?#of+}5Y z0`F=Rc~Cx^Zrg!PR`6M6qnmkS>_rawEzNT^ElAb613h-h8Jv=bH-g4;wq4v#(bS-c z6{bpGphS$ch(b3%7NM!3GE5S3;vGqs*<>r!j${%D)kpY!Ae}?;63Z{~fBauT$cuCJ zxFAt^x5dCJTw!0j)F8fu>QI%Ch!K_Bqcj>K_ah_|pm#N|ifaWGAWZUd$FVGHWi)*u z#iwX?nK1mzFg;{@%A9l`!lo*3@cWOWtdYcuqt9!jf-UQ;lo~g6;uXE<&!`5Bse%!y zIGi_mH4ItVeutO9J}?Xq2-t$hKa2sQX!|>PMyeM@CZ3&usSHuEN)q}{+0r2xC3ulG(C3BK)!G3qNFY7 zyLYlOj-ugEf#sBCAfQ2OA%r<=nuEm`~upxUI)A zQ(I>3d<6Jpy~6EdgK>J_Q+7F&sp)Xs?oQ|8FWFbWh7<<0g|G!;OGZvTHU1wO0|B3* z2q1LC`VSq+oZDF`v-Aq0e}V1>(cEuiT!)t_-Tk-zg9q=vMnxGKTMX&vKA8%7mz<*R z3*|-VIMhnF^kU)4Z;C2tK6*Wz69gOZDe5qsq_#d)`1n&NL|%v+%t7`ZtBv-x_n7Dl zIHPrCE|5ZG1Cj?wUQzpp zOOA^e5N(57e(WCk!1Fd1po5$@0?2u5#`BRqKKG-Y->ZuH?B6H$uuHSF1p5@5^bR%7 zFEupfX~U-t)-C=N!An&3IM=s~zFlHg=_ls#YMYX4Am z(PQAeU3R%N$zStiW0CW0>l+*MM{EB;B?G06x0=N_n67I^_K(luiUR8yrL1tam_Lg!b zKqYyc(nFp%XD1b(Y63BB+%PID<=<_HIf*0Q2?f#1a5eSxb@AxPbvPLItlLf*&1%E} zR#GsQblS=pJ+dDo0q4f>)#lUM+5EqyaqsP7B=I6mLRSmTz?wJbe!TrUAP`>;Xrfvp zakWmK(U1|opIB&ru~N)~c&QbFUWtCNMyV?U=6TQ4f@MGR7J!g$qA|n{p!?@l6=8oa zeK9XD63mj%Z%TF9w7G`!eVE;K=g{9~;5#uhNHf&z=i-)uD-7sJZ@TBXX0pe_$?*YhQF_jwW^uyj{udA8ZjsbdCi~dM zqCUoM1IV>nk|4@>-ikGBO^Gg8q5=wUl`T}s#I(@HmDxtKH%Q~z%dtr*eU$-dKfxpn zldsfHH9f@~3a0EZeaop`3}$-c=d%tV-;1L6!4S0I18o%3shxgIna{eTXv!Yos(5jY zpu$NDaf9?L`BXc-B5olY+T|Ho4BM)CbPt&pab#Y86W*5`-PzW~mb@*WHw zSF*a++F)<$B~sfpB0Xe%C%v}RV5b9jHF(Md91+y%&{+2sl>SM~dCW=hXO$=EybpqF z@wqSV**7S%UCXhaiDy-~1CYj$+$s^&I;a~T)hK`EP@j6K9s<2*1fZtno1!^^^^~fV znsqBJlCg3tu6ia^bZc29Mu0?hKJbP`2=<$JL>=HLJn#knkV9tkJX{}-wM>s8=Wn(h zn>1%1&o?Xuky0hp){fZwi!&mpbezP;PueT=HolC7~&OQ=}Q9EHCJ7GQL8HKjKPBnYF?Los%%HumK9W!oW!W z!w7HXHQK&ZE>A`L-oE$HcNo#uOaTr?^o6 z8Q-F;;SVs&nkvW#sOb^^%mMLy{x5&QEesn{gAwAyHNx>QACmP7JuH2Z(!Y!nHSl|~ zj=HfwR4F?D+|jAtO%&SXUS>)JJZlioI*)KIqs>E}iMUs#W#-DOhG^Oez z_7?qzI8NklE?Wl#*)4F)_*ElBV7`xF&2`3dVynCQ>=9%WXJ4TaE&y6v?H5AX;PnV< zf242px;Yk7Oh{k~SypQZ1B6bo(x@NPUIXyqqk-&+4wRuoE}S|PrGzLSdW-3wHpL}4 z#9f`y)~h`+v=~O+1rs0---)irz8u4chtE32_r<oY^wx;A-*~$+*0&da`h>mh0cpNw?}W1jom2{= zOO`U_VASabuXYOp5kD|8Af)2zZ?#u&*`b`UG^oZ^_BK=@ciDa8Gl|JB>}cqP0n}*y zO*J~xJ$1*x8;pI!RwPkSHt~8dU_ax`C`c~U&M9QZoRB9JAOkZN7f1UarzxjGOSkO) zCE>&Si%|yKK4m`q2azM9!e+TrO%5!hQU#epS~xY?#4Qr96|q0s`G0Z4%W> zud~eVml=e+K7KSbU1SVWPX^$= zTCC4kr$AT2X5(MNsjAd3?$q>ee+Iu0&1?QelEQ6>b($TRd!CrNb9Wv~Qr5I@38LY| zOk%sU0_woEYqnaHV?eA#lXl+toV`3@YuO;@`*n=R9aLhSoTF4v%n(1vEuR`&s8qdg z&T6BDXuW8Sp%u#@p=bd`x;W)9D_pBrqQ6IwNWc>~+tM9@$7zkAWcE0=1^ZW5k%=BL z0>{I(^7BfSux1NG;)<>jg%Dt@O(#^=n-mifShekQnocpY>4V5d{UWPlu#k}Bde6(l z*yb~3Ywv28CXxj4@{QFdt8piDLxs7Twr>i2GwQi~(-dPeLL)pvixv3?_P*{jHj&Eo zWYkirrX1_VDeoZP6^5Orfz3Av(ryA-h6wF#ryd={8sU`hh-hdtha}%}?KT3s3R`VD z0VN-~hxRZrl}b=)L&XULOib!BesPqMS0aZCIhtlI%%tOY%&k5<)KlT1eu)Ka76J>8 z3F53eIW^sBD+H7!ZmSt{)U0g=ulqK?c78sys3j*9D?^e(gt#gzaF#W&-OBu)z7wv| zR&-A$fFWNrI_tYsTU8)xC0YVW&aNb_mn z+{CYMjILn;h0`)u&xhq^sK>Kzia$(fu0+O0MvdpIdcF~V9m|T#v|v6vV~=;p>iGQ1 zf4NS?3*78<&mL13AWaOE+kLYUdWwSgm?K)W95V3VwAI{-ng*EVG73(Sz+hovLwenH z+ie$a>E#Ya?N|=ujS?;rq(b`-qh7St^e0&30&cYr^TZ4afx=o?Id7yB&GZ$WqRG^{ zRp?zV4g&0nCr*Y=i)t0;Q|PT4$3k{^>@SKc*gx2HYLbS_A`;c;Ig%3uej-tQJG<#0|wZ+p8-rORS!Aw2L!u#O|iwdO+8~fld<7{pkc_e;e z_wfG|!zoKDTDMNVG8*K`UML4Wca;Gy)0j;0vpUr0KgA0A-d(c|_h=Y^XtnZ zQ+-#iliNMlt?S_JbRO{f4E($6%k#M%d#i)@aPO*1A*eAJsiScy{_wt(O!1b%AS)42 zzBB}7l#eBKCF~UZC_Z46HpY()WiaJ8!!b`f2<^7ie%ZmsWkpgGWCTn?T{{Z}rzY%i zGKYx46n*0mQLR(wTV(qx9EpvK0j`fFL#NHkk2yG|)@*3)UDS^?aXJTrF-$r*X3Qm7fzjReqV@19IhvS%A`|0Kf0z8$Luw@a znrtm%Fx!Q`^EC<*{6#d`Rn0%-@IF#9)qEG*WizH&Y0kYkd4 zxo0TamPc-Bvg1l0X??LoY`fqNV*;4=xCobs;ba&t)&sV4Iq}jdM6SegD-6{bXTik+ zAoY3#@>${Q2$;O&{9NIOw1G{YuF(r{8oi%BHyr`##Qy5JX1)r9sJK__M1fAWWOS|x zZ?<9~uN_-IgfK;;sFDOoKQyl1HD=n=9GX4-{!u$H(itlauj^I!g}>Qj+W-fR#h9kT z9iAn6<1Qucn5#`}0%+7WaC~hYaT8>!EQTt^pwW&& zt($dCOpQOS5k?mlfuPlJ1#)dWIel;byBv$!|D8c_TH%n_W;V&d78wEKBajv zeUoK5aNw{r*sj$ayZsHAu+U@kMEumi&CCB0^E|zUnMwHtV=!IHR>%shB`N7(x>sS? z;i==<4!us>8zo**tV&Mdpbq4gEh6*Y>(69!?e7lucU}|tO%ikV%j@=gCUFtQ z($b;QCB02MroOtj90|%BBo?rVzj$R$TLqgJ0AF!aH`VSLOXhNe^tTY88QUu}EU0dhokPAf>^JAsCZx6vLm&qy+KH?Fa z9`h0@cpC4c9z-Lj?Y`=E+pUYB=x$0>RfrN6Eqwl1T*3g%zlJaZx?nu-Yt%!?lB*+Jdph)CD6I3WI-5 ztbTIgDd8CrW7^}#iQ-q`wbq@#uW(lo8&pY}h6WCf0eB|<#1m$hJrfr33?~$8OW6LA zsW2G8udF+`Ty;RKU-f?09lY9nT*Paos+P43x0kK^q22U%?fVsB3Esh=)xx(jnSXwn zWU-x`i@|?E!Fq{pZMap_r^U(?ff1Lwi<)>8wnmNCh4c~iRP9&NA#{3$i%g{f&$8Es zqeffkGNvt`)_6fq7xTBWBVDd#51tN~$1P}^bE&Mh`p1TUFUP_@@6W&1ul!yvdH$ic zO@o8WLcmk+|5mFJ0s|vTBZC5`0Ca8FS+T~pVc$^1u5KjUeBd2_&0{RHjsAJ~o^YtW z*eEDttvcHK7HdY!1SP8z?)*dU*xerR^z@A<_S#v4Thii1p@{aFRcCs&3jsd0XAz>K zOMDdsmhKRQor3D>w)%80Q+j!rj&Z;Xc(Cnjh;fXqw0}`^1rT_kGRDj&0y-Xx>%WmQ z7uEzOn5k5fLlPrN;(|kZg9|bxS^cX1DBf3fkOK>m14Pz|`hiL6+gG9ZYEEFo zk}iWnUUZU)OqZ=a^DYabn_;{qNW`4)aC#GL(ExxWgHW2H^8d*CrtnOnrQO)(#L2{- zi6%BDn%K5&bZnbnY}=Y-f{ATAnbbycnETGj7c6^-mi7{is5 zsuyA+xg74S`yrObiOde5+4{-TTsJ2HJ5S+aApH>ngJt-Gl_Fdo1+~YEa&?L2aU0&z zLt}R7QA0lZoBFgr#@Lh;I?1Po@2Gl=U2@%)P=nGWMt1IteYOcgE^@sDMd#}qJdzBK-#v!X6EX4ued5Pf zK zzid)?_=F%)sejz75$y-NV7lhk{6f9Ntp3D$2Z$u9deBQoJO#Nl^}P3Q-eh!P(_R65 zVm?$6PLop=Cjww#U>_g5G7nl zz#^f@1PS@Uh}Yk)(_cY!hIL@o?7f$Fl((}qJS#|G1eOn%RaYJgY;Jl#9UmV*U5(#l z+VwoW9t)iFe>{ub?VOJ?+wuEuZ+2H#GoQaJH`#6YzU}6abgc|KsgWZ~dgl$KhcIup>F?c{&bb) z8xQRu;pgiCUT*i#$A?HRh&KU`m+kSFoE(1OXAv?1`0TrG6+NA#qS}GhK~IOM7d^as zzE4*=zU-?B^Ued7>|E44wtABHYOFJ@nFZZ4^K+_r`F^HXP6o{1sy0d6MN>(xfpaty(8l=~dtmi<}Nq_|%f{mLTt?I-}bnW8`B_($=8XK|D3Y zSlp%G>I3}YZ-mr|G}Q?Ds1!cd>I34)yDrcFlSzqzg=DeNkIwt`3=2S?Cwr6r^O+(A z3pc)G@Ls|F0_;k&+SIxRa*3LgutMvqL|PT+OS;d8!o`FBT7URq%VSPWW}o)i;Q z{M7pyaX)j{&o;i((NY!H={j@o>%$4RCoV7^3gmW!HBal!lwKAGpO zW(O(_&mX1VOh8~oq_!G8oodR?XUUt0G@^}Cf_v@0PMBqi(F#9sJDbt9x?gVUQC*)y zYT`FZ$g1+dBPAKUR6XWG=#TOmc8i;cPksy-F3F*PsKv4_5F11LrA89|(l*)}TP(d; zyI`}~ulxfUXo?R=ltLK_J&Hp&vHO+xHUK za+ZFIU1C(B_@b=JVt)H_Zo&J)_~5pxT>;a{f}~Wj48-ld-LcH8@aPV~FFEhaCzUdc zR9ww8EtaW0>@vnUWmAx|x@R~ibbd3WE`zgELwuJ3VwH<*V_%S>6$tm9r%NxP#u-Ie z&DZ@)mX(N=&+_?7S2~|_bLL?$dOprBoVbH-Nc3${1W29XA*xix(tL~g$_Z3G@zC=gu>Ho73Tn!A%% z7j#_#U!C19MtDO@#Shfv8x?X>HBmALes$S_trJuD`i&H=Q%- z)Al}cY7+!;AY&3^dAQ(J&no*Hd0?JQcV|O%JdE#aBI`H$b&nb8>vw+WKD$y0>e{AY zwUWq1L!XP+SMa{B3(!)B5sS#bQ2zmQ{xG8jZea35tIsHN0&e?{Yf0B7-OtB3?>aVV z*(13)jAl7(1Y!zOR){G)*kcF_imf_`XAnxONrb`YLZwk_iOT)(8^Jtox=JMH1YP74 zKcRX0U2=WT)5hZiu);YdSBdi~slMekL?=agXIV5Q$tQ^B_j_k>s<36T*a)<+L~Mfq zLKky_pB6k?mTKiU1*Z=@axII~zMY zzeOhCmW9RR6}hd~Zt=U^adwQYAF18D=u>VD zBqH1rFr{7@djrAot6V?*pSKB3*d8@+EBIE{yQBDFI?yBO<2FDaw9Mg<4Rf zp3>mwUh>qVz2@?XatkRKopS!b-L^5~rL!7X7EjWki51txtr63?QHIumRzY)}R|X$= z?RnQz>*Dot|7>Wn)3N2fsM+HPHy$nHSk7Jeh(-39(O zCma|Lm9gO52b*<&zXl$%tEnq zmalZ5`9ZiNJ166qkx;n!*f2>trd@?t?C3Sho+>05-BzF{2F4@AmD`4JZnzLPW9`Vo zPk_QvtzAHalaSIJDA?3Sgwzc72ilTg$Uy;k6NAH9dD@EJeC6F0r_f?^g^U<2oMXvv zj7aNA`bX)Hy>P&x1;mSsPg&+^7-bNbd85X&&}V7Y?nZU>;&8Qw96BT9xIIp7d+NFV zdxxnQsu~g4drcalp=hxo&$4qq*>92dyAF{K)cVH8BBi(&LW;4tW?TD?gsYx}TBdR% zItf`4spk%p)2POThwZ4%V0KCn?q-I;Xi*IN;lF*pZ3zJb+uixlL_!E%u9-=D$l|sW z;HEyN$W8U%wdftAY|oeB=}g{0Ydc|!=}V5U=0ElKWGZ&DmPS_b7bjVE+*S9v^|vE(a|kZ=WD?_|AtM zDGE@d*cYA^i~$}@5=q`qRY^|z1Pa3)l!wd$QA1G>oeAEZQ97Yt;bAGhk+GIF5hbj( zv2Szxx#ebH$fwAJ+esBu*u*O-26$^ldPiG~%@$%zuIy2O!rjVt)-&ULspBeK-#i=n z)^Ot&M&H$k>moIE6P)}kM6|A;36 zdQc;ZehJAjr*gkNJc|u02!927dNxReuMt&>m(qe-R^#t5-CpC2U@>uTf?<`s*APs|(ZI2TX`lt5T{oT?)a>Xq9}=$&X3gIOY;(C+l#U26wi-4GqEQ776$ z+MJou{bBIKM>iBSwCzJkkT4$p<6fyvkwq~qMP=l*^*Fb5bp$c~ zVn!PuUJQ-T=Q$Icpd(YpLHT5KH&L>ihPPARlXk0eNBvHmOi^z`R}^#~Zh{UP{HTny zpMWR$LgvZvD~%L}f(3{|%nnfwA=Z+abt6Y>bz$<<9Hly@_Z)ISpI6ClS@gO_+NDk6 zHRd()SQPBWY<5sTaP5tmDPR`)$H5<-&}nCh%NkgXWJ8E9CXSQ5w}#Bv&_JhgM}IcL zYF!d{9cP0`8x1xEFEE3kH`aZz+?uUaOCC-CB=-3^fP~9fiUANxoErxZ>W-m@XZT&_h3m{iSDJ>+oos#59$kK`-9iS)HU*8gpf?rJ9 z!+zJJu?BM|sHOq&_Pa(1qApK7CS|zEt4CaIv^%Tt=o!27aQq~>o`Sbz;-%0$2>TH` z8~d!jZ7ViGkq1QRFtcDPw*?DyV7zi+((fnSk6*J1vABVJzf5aY?v~1OmUx;;ylG&i z7^7kyu)SyX+zJ#)L>SFHHZb?)uwKRE^WdDr z==~$eSXxN+FHOsy`ZqJmQmqce=5v(75>yw$$6xm_Uz(ia}`Dm;mi+gi;%YUqLpp}Z1StuvM`1vDL zA(%Cm3G`_~S#TDG9gI6J2YPY~(r-HK5x=@n&ATp7BZvv`dH;YDG)sRovin|uj?ODv zUa6O9YPN|_AFN~&Sa>7D=iX*gQzXN=lXj@mU5rWS3?fAvO2+M!s<1 zD<2^HOS)cZnsiq>G=Ly71H3;Wi?a&R5TzAwQ}(t_oTy@$nYZnx(%NsEQ@`I8p5b4W z=FBh{h)R15s>@z#nr7eM6>^OMbwD6@Hc=~X0tOP<5q@BuJ9U!uR ztutk-pR8bG)^Bq*s6UdK*N*XA2nyd1#f5h^kYcC_jg(B*kt!To3B&GJeLBNk;@4KHiu8zFH6H_ zU*u@#aDjqa96xNAT|1Su%D`W&V@5D!P!h7ucWimwTY8n)@y*<&k6yl!8U=~}#OIsP zu>#>yw`1$$L*`8HXY88u_P6=lHy~+iApKGB%O%@(keqe}wi|}6JE<-M)H8lcxn#_m z{huGW)-r4cCD8T%2|Urs{XvYvl2y_%kQ`o3P$DQeBKQRXCGfGRBW?EKU)ESV)YsTo ze`+jEXt-8u@2-uR=iCM}kd-(+Qz7XN@viR(B6m=3In=I*kt4fjspGQH5cu~OJ?kVN-1t_?VwZi_2M_9s1Qh(Di`}Mi%gY18_Vw@2kFcv(h2T+*p z1pfNG52VFGTCCJd_N|^OF_wEYAwvsAfRW8|%~b{+$i>l0Xt_ZiL|<~;2-}^H@C{7D$>8=e(Sj}sfb3WADyO}(2{?4cOSOkOe#B0uLn6kV_!%Nt&AfN9A@RUxc3 z=z#hegYy%Ah(aEdgbo*WH$iU4Ht>7pqFOqI1Xbt48kjr1;mqKnJafHYLe__++8@jz zt5zoe1S4?^GbmVj04s(qMyeMnzbZVTk8uP}CV`L@qTFo(AC>JME+FMVi{?ptkBGTV zgvWzYp0IAa11dTiPrAmpjW^oN%lK&xIubhWW!1!ZUM*dDn!Pm|O*(KP@R8Rs?#NB^ z&R6Jlfbpwi)n?Uy7cV{mE<+7!LgHPpM7}-}V%{cgs!RL&zQsm}KlZT6L zqS3%d4Fbqwh$!`LsWTWq7y|}=C?`7fhMZv-b7>k|K*7oHE`yLSAh}hQOJw)$bGy@m z>f^nT4vk?&9lZYBX?Use{oQaV5kn&P`~{EZkd?7o2KDPR%+7-VnPS6z(@oq!9~NzI z@ZS@T&=HB1hRB2<;b+*C3;J~tTj;i(gG*{+1OYtns3Q+i>N|3y&;PU7N|PTKBPX_= z-Cb!cjO zcV`U$wHR6Wioz8}IP4CI*WCXfKNM$QA2*c-&pVFr>d+_NCToeudq(f$OhVxi`h-(g z1vq({F;({&3nc9643|V=!=bOU?*( zwLoJydvfTiX6a^sd@&Oy7g~M^6UiIZ2hj3z2W9>%bP7(BTSu5juYBys4kQM46z^8y zOe|^CgmyAJZs8bK*llV_?^-Ee##_fN-?gZa`Cs|q_WxY~(z2=oW)K*>vb}FiQc!uj zys4bWZJ|On&1{QbFLKkZswox){~F~SWy+M_*pCPgb!1C_`)?>vTnr67{o8p*itc-B z^JP015ONxwDGhx}re5iPON_+imB+6+P>2D^r7rfQKUhEzYP zD2<_DX$S?;HzqPz#q!a_2~mM zbjfw_$%UMd^iv>>@q$t?v9)B^kldd}ST+TXC?C<*`6}9V9eS?tC)kj?aGSuH~S#t#4 zN4bJ5*%M(b0^0_oR|!Ppw8aCXLt7~X(vJrEo}w<1eZ|`hvs<_3m7z7WsF2+Z$c8O9 z@fd!$_4I4*(Ju>AGHRCrNjwvUJ8N@|>oW0)uf80a60!On*IEkHY2PB#1kfKko!e5z z+S&!(3N3(qT7{a7ET{}Za#9u;F@DZ2^~Y%3vV3YAtx!sjru=0q+fh*@IJ)h1wYlYF z_7o;m#rlJ=sMTe>d@YdVDjvi78r@4uc0CDz)%{0wxVME0ec=uW zQv;s|{9VwHP4x0holZg=-d87;2;JJX6I2Hv-WsB{jd1iyDaGM@_5iH#AgKcV$QFZ` z20s9*m+PQVikaWc07t;-gu{PDTYaB55vNomjJW*IKm^r29Xge&_G!}#E390c{>6+h zYvmdh1SX|E3dw~G@|R2-xIm!YSu_vRJ@GY)N_Ga*A2w}awDg(ctx~tO@jQ~(DL)7p z{T@pV;XiF&%#i;>IJrg-Tq+O(4t=1O@`a43*?=BtC@jh;K7RIPI+bD>b`tMv1@_3D zF&jV~W*ZbX){qusq0HBXAlk9icE8q{2axKpLMXKM8DSkFvPdMyM?kvnBv_Q%&R`=cI(r2RNR@QtY7 zhVeT&5`xWGkA=~DVEF_}BOJ0xx!01TO+6T_E>|^H1s50w3%Y`L9U`d()5;5cH^)_6 z>6pXojykWgLuL8+_uTeo)Usagfn=%e(R!Mrkz*z(DJ>tS1++RujlzMySR1vqste=2 z+AtAPfT=7w(ofd{3!{0Cy0}uH#G%!)Edc+3B?X)LlD6YZQw2rel%#b^Hxfm(G*<=l zZlh|5sDRU5*sPm&F-J&nmDO9s9ZRiEY~dO*4egY|tQ+QJ*b4U0Y>q;!I%93+x9M$e z({l)wZ3va>vIHRH2_jXNctU(QVQ4)35^_2F^{6CuQ}ns5hwP(~Y!Vxfcr zW|`}yq5WN7Ytb#0>z4-sM>6q|;S<}hlnSJ~?uVw@t4NB;>x#Eb zVRJt>&==I{+HC@~1@=(+8^qlDA75ly|_s-fGYs zUy}C4{{DyxQJ?8p0Z-Cq`5DWp95^g`6}Ia#k%k=|qrMPmpMF#;r(G9kx_II53@!XU zd@{enZf-iQCcxPAL?kkr9^-kQAOdechj|pQa4(boJXZZKjoay(Y71T6G zIy&?;+3F+hHuC~VAOe!Df_G7c)5pT>uYmemRRmvSm(ycgHEtUJd;)PG{=;|QuPc%_ zVS({e=?;b%xYn*g-TfR^;%HkMh9AQ!(cc7h7F_IwAekJbctNfnCQD$4o0JE!HF5rG zYlTSWV^_d3l_x)NGKe|! zSK0@YLkPDE9dPZj`#RoM+wXBu#l>7ut%^+(Mn6P`RFt-SG*f=4SQH=aRi#!^!@68E zsv5jF{mg1wH=d8Z)+qMEXLP~(HK>xopOv6G{qY3~7RM-c;g%^HQ%G^Uk2S?j2RAU| z{NQSZK}*OMSo#`*TiZ~xa$iKF_?7;baag6GL%>n9eQy2ZY2=$q?g}=ICW$}775HD0 z2Pg!A6V{w|=Eu1h!SA@OoPa9hOd~`vL!H0+E+LClUxXPh8P-CB)XZ+C>tlRQQwC14w~lRb z5}sh^VEltrDfwJ*NW8ngE7D_@lRLGabFw2iQ?>FJG?%#9(tvCm)e53f#`NydGtm~~ z0AecV3U(noHh3+hwBY&9KE_6L2%{?Dchbtz&whK3x;8x#%Bt(<;hoO@MWcAo*=~SSW8@Ym}1oeCS>rt)<&d$X$?XAwQUiS zMhS$T6`R+;M=l5EwM;XO`KGPpKQ4d%9HG_90*@i_99UiS-z>A#b_5ywQBt${_v(xX z+NY};$wq%|jfwNSL(ng^8y_1CS)oxKJ8Ze39Ao7b7_Ji9*r{rj|I9vFqv$WRJWQdq zseZQy?P5Aqi)z5>uz+JHVpeOvK_slDRjOowtl}8`iO1)8$U0K%uuGqDWa7dr5PVEK z0pid95{Gyb=HSuJWc2d-o1`X~lgyw$*=1lay$-lPA#s;a^{G~u=Z86%o2lHOP0#L{ z%04pb1+vT@WX$Q==^C2z$inc=R9*^`M5)G|)*G6W3i3Q802? znLgS;w^k1k)IPdFI@%jg`pRLbYg$=WkElIwZ|kQdz9qdI50rZC0v|<0;JKx_;>i`I z26Jrj!zRV{+YtvoaiuZZYJ(??k(RZmtPi`zrgf!BFbD#HcQP@5lZUT@PRl&@0F^pq zQ5VQP$`R2GA}4s;lo3*y&OOT!tiQEOd>EhYy>Yz@aDS?H^sX=Ik|q=uFSK3>tuO?D zL(v)j)l6ihd4MJhLXEQV(&p!TQLHhwlTU3sia-#E`{@5i6Pc^x0CN(B`Cy78NVgA0 zGU)I{6$%}cb&#^ka3!c3k&n&!ng7E>q#s+22*OERSA)=17+MU9=}o?#TvVuR5scTW z{;H#091{4a*FqNhV#V~7``bxaRDw-Yxb*=7Y{`{yHECd*H+g*KTXO-Mf19s~PVja_ z_+%*zWppB6Qv{{&uq3J29Zp=xzdNg~vhvm~MYbY=QNtki3T-3ks-{KbI)H;+!s*KY zCNg=NP-8JweM(=Kkz8`*8g3%V3@odB((j)!+g+v#=5Uyb^E2zcL_tH07-GtADtGuV z@(?Nvd4oUAdNT{b$h`dLDBY2@m~KGOU$CV4%;Bj~ zxCF!m=@vnU{wsACid$tKLK4N01G0g6Ua)gGo#v;MXlY(NDu*<39Bi&RbmhSiot;ts z2XuDEkd!Jpsn7DCLQCC{ra=Q;Yl!G^M~Hz3+&bgF_~2p-4gghlkp-+Gr^#lwXjzoU zsrdEAtyYYO%E}!(@d99;N)nxcxvV_?NOX{JT8^0ExOq%`C^YwFEUW{~u`G92Oqcv= zOg2QdtyYF7&-_+Q!ww{mjOSeChbbejb()vmlHFqR9=KCRS&CDiO_qM=V7*BEtN`S} z()Lgl!iF&()=(xHTeK)awMI<^C4g!Z6vt-Vob-3iex4GE<&L^JE+Z%7kWLe2u`6$Z#cC>iKt@252y_X zQShr?6%A+ER+4MgYDmLZUOEp%wd!n`7!2>rv9+gh)x14LTcan<*FGyhLG-V1EYM?` z9IZM+GX54zFML$(FR$O(l?h=2vlfy*`<6V@A^4EE=V{h#wGdChePfZ&LdBP`0rOq1aTKncbIO8bU0FW=Q)Y zL}7Tcitrx?MNSm_vBtq1WxJnPhf*`hs^qZKGD!HX?TJTI(0uUa#~S?*#XtDt8G_Kp z@t`L_MPcwY_&GPjj1+^~dEBYztJSKjmVkbB0$2L$F<-`rvt4|XjA5+LNOGKf0&s+x zF%lSyI?XUaHTHYJ^mQkz-{%2TTz9FjF9iG`T9?%NAZ={1{!8U z>&cxzx1*+E?Wp=noV7>Jb{eKG`md1U=V1>$S)R8rMXe9Dw+bMTYar&CBtgPkyUtkA zN3TgOTdgFM>CEe_m6*iSu=d_^p1}K@X0K^ilF5pXE|-k9_7hW8&uE;*Lrst|@GN(NK&WHKldT_G2pvKEUqMKY{-fE>NGRY4b zz&1x^im`_BZt%==C~{hK$IL9#XDty3myc9${>g$kWuggywO2Gl>-Jxz(FmORBiul& zaS2XlPiQ8ZyFjgE?5FH8CDYzpIEesu5WQTeZ%;%Zg%&)W26-E11;6Q{I%~YQ47<^- z+kC?NAQ;dT`zY^?=wB2Wjys*HhW3YPVln@&(u}b9%x8Q;y`PS&Z1)Cex+VKNWM!71 zS0u%C84J`w`lU3)JE6WY!959yc}Z{=Us^3A{E(?4%C!MYEjmj< z?&E%?{+Dbyy^yVR%RmR3;qQ6O4$ZWgHg)Tp&Q$2I1 zK{((p!ap+c+Kft-l3(Y1(2ksBl1Mp%*qz3ZmQX}}3(#32#Q&(U<+U>zU>Dn}3-=Vv z$(80SdJ)B}i&Cmb;m+2Zc+&t7DxQ-jD@$V2c{F$yj{!s$;W4EX6mCD_xX{Gbxov~o z6*1wa3Lw6^3rq1tfM`tBeqj5c=b;&k*EEW0&KjL!K?J5qEedHJNRz^Hk9nlKk~zf; z3m!-gK!1E9ZuKP3?uYE65qMtsd_oUli&U2UEm22=8}_^p^w<*K&Pf20VciF!?@~a{ z%>m=_`+M$-ja#_q1U!Jl!!QTyVgeD*tAxHofeW(OHPu~NpfIixR>HkW@L|pnC5B5f z7khXJhwCbm_5rkLlJWwxx5SBCL$bF}h%@x5EfoJH(X>qc$e^FpUlycLqYlV7c99|Y zXN^ZV;-tWG!9VGRPYakR|B+6$+;}6Q3IxWc)`hhWY#9EUpb+zhSt=&85Qv1SN;y(uYzlzLoUi^9o^P8;^3HjM&H&h3G3c!;wwIz}d9 zZO1+l<6F(g8*knFV%=hjAP-}i3jsOa3%i+;IsNq!EzyR05(M1)Q`WMv78FPOXKtUXX{_t`td=OVW_UuHYkhCX7cVxY5S%_H4yx!FM&w;hZ9sThmBNEX8s zYafo9-QPrl;0!XJlgKLK;!dH_rscAQmIuUOLT#h?56gnU8|MNmN`0=e_*s|PbM;>G zH?xmd-^2ldUeZ>_{Yb@wLLzm-4aj4ckq8o*^tbk{m?_Sr8QFaq)&f=XW~yvCQ?{EyEpC|hr@_g# zZ26w6Hs#ra^3lQ!Dv3aTP6nTY@J$)^X#DcM{jtJ~X3ZM*d1sb_bn1X&+RmB zx58DTwiUpTW}*Z&`8ZrzN=!Tq?GmW?>lJI_?ea<8LgD0yAnRmSkx_2uj+ug?8Ozp~ zmyO%p$O3nnjo%t}hqvD1rgQ{#OJ?)I>%T%3o!^7w z^3Pa+nrmoBWoE?qBgd$bJ^W3Wh^Goi<(6Cl82Kz{IgBQCo^D z8Ra>HA#K&wGsfdGq1cC=46Uj2+hARp=^2V*8Vf}w&jZ9JH2oU36Why2;*ANbsFJd084i4Hr*!TY7X>cRuVlt-4)?YDkvsTQ9wKuSJ;Y|ioBB`jpkgEec2qWsEv<18+4 zJ`-09#rj4HWXI|+NnJ(yEnRGdFEH@o0G!s=1gXwACpxTE8t!eiAil+C?Qbw6nPnM0 zS(OEhU%RXlC7J6x8tJ<2PI&2qAq1EJmLo%Y;lwCfJ~gaVrhVgSE93|y54x3U8g2|> z5SCH0u9A5MMQz?o)I>2&Jo~b>oS<>?a7Lq*vf!VLSTX-j&SF#RqdC2g*1iNt;Cba2 z7)TG*EW~4b1f1`^M?1^3#m=ywYyHNeF?-a=W~zCNRo0h%hE>NahQba6ue%Qx))CF7 z6NC~M6GvBvOncZ^kx)D|9%{K-0R*`=A>Ov_GeKm2XTJq4C>@kgNlP7CCf}5{keh7D zmZI_&kGbu^QOVR5C^G5lT-vDuT5*Z0_Yq>QQM6TO-!}(p#EvVap3QQ6=rM*nXOqPK z4Kobqw-lEBY3W^a>`U-TUVe=FQ`se+Y{BlG|u29I8~3fCu7lSIQ<`-hheRt z24wLyE}AQziQ=xM=%1>Txr6da+*75GG)FNFrL_LU05|dwf!h~hO4A>VKapcpKWJY{ zrYK&93oA83Ej8q9xQ7^Ea~n(6?kq}X7rlO&FcO!J>O&I;?y`t#Znf=u*8kPnoUNvr zex|d!Z7o?-X<2?S05f}b^~R-%0hDGQc6B#-zQzro`E)|ERCB4_9jz_$KXE)ysT4Nb zM7bX`0(nM!XSi%>Uf5xZ?&lx2Hiq{p)dy7JOJZ^tM#Zc#JlybKWxQHU`x-3*EoNb6 z?-B8ealiIC&lGYxhcWZ64iRd`mFK}CF^M{=$PyrBaC~Vb8UEIa1`m@+^+!KKBo=S% z5#5aQwpzdtUY39`gM+hCZ`l6xI?CsDgK~a=_h2f2{AVET!jxA79LoP5VyVe{@XZ{0 zc5!GadGU2-WPLX}fqv26+&SLfyeH1SbPyEKzguU9WZQ1aRFPQM`qaEP_ubV_N9)Pp z-#wyK^;a&}c)n-*fBcv?-Ht3G#(Hns@(1&iXv=B>JXAy?fTCCVdP$pBW8woE0nj>W z_i-rjVdWCWDh~*Kpi)1pUB3RH+y!_~09c5z<6`+9?DU|Or+*k{^jcnOA0w^18dXCnKmHHD^+sXeoaXJfE|i4I{6^u~?i%>lm|{MV~1#)+@7W z?w9A7DomouV}!cZ0-+lXB+PBe(Y%A-No8WIyfGt_*6JV!Z{Q-Y!ZTK}?8#52-&Xw4 zF6FADy!RlwO1cpl3Go>5FW3P}JZU0n;7(Q~21VO|FYXv^cZ7b%qz2<-z4(+&^XW>< zNb;z<6|3ITM!XoyRK097#6|!Xh5T>#I9gv_d6TqhJ4FZ$_7@ZTbyK)&sfU5 z)QW_|xI@>5P`9jhPD)JPDylVVmt@PA1{HvMM#uEu@5V^(ti`AuwrX|Arf*dSa}RV+ zf46GJ7OSjJ!)IXmY!qMW;EGHft<8v?<@X|w||h>*V;35ZKyd5*D= zZ0dd%sKZG#yNLKl_v79@&y$gi1}ek_(=AIV2c*mN*(HYB`z)WWNn7lZKG7(1$^Hqj z-CX4Gx)(7=l$J!x@;!Ugvr)8%V|}#hGQ`#sdmJ_EyU%~WG5hSwwfTP1BA>JA^Za^~ z8Ecnw?)z|{$It82*5UPdb^+YsZW?60fAW~z_CMZANOJhse7tXN64Rk;7^I1PBT@E3 ze|+6%5mjZ4J1t|6cLZ5Jz+a?FaxDN36$l&IH5Iar_jZOlHjuao-GAO`o>Tl$p*MG! zI6tb0ILWRHNP2#8u@`+OeN;Z!M!LM-84d97|I@y^WxyMR#djIFsPBj(%!{a|)1I$8 zk~nIp)=`F{f*ukBKbk20Es#Aw#?fJ$ESsinuK3`mGGm0)Xnn4mSfx7@ZMX=~)?Sw5 z+)j+cGegbGY;))*iEbI?F{=h*jctR9MlNHL7%MnZqM@z23jxna1F&T;5McOpob z$n7MpLjI5t$g;XP^)#z5vlAwWAAC*Qjq08%r}GZDB3)G?#mIqe>DmU1&hzI;=!V$Z z`49y$tP# zcx8Gb$8Ws4-*p<=gHuxbO#~tKgHK)IjXw~Pu!!Ct{6*8qO+1x~EUYON=azqEn!-zA z(|ka);v4QSnUtS^+3}yqTDX6EkZxjzbpF(0%%>K?xuG{b8J-qUrqDf(W8KoAkMUT; z<#9;PVb{Ey6kUrXbpgMqV7^3H;AIXM9{T&s3zP4a^0sXWOu{yWk(AyCU0aVU<1t2f z`0LG(mEm{YgYcN*i>LDZ;Q#{)qi+vohl$d$5^Z3L5m0wSQR-W&CY|8o6UQDnYM|rC-_sJ2_8Pd+a@2JF{F`h>=uP4}D2|$9 z_ClmjJpx0A^?^)%@O*q8kRzrT z&u~cE0pqgzzktgsSbk3^;TbzT{a=?=LkV_gV36^;AMT+Vf!;7*9=-2RbzJ}K6O4kR zD+5bd)2V*ubFws6v?D!pH{ZdUc139#J*Mat12^!?#apfQ(XDNjd zQfOW+Xs}iSg=M*0+=u2T-3H%iRK@C>n(koZq+LjMUj|&w;*Ro9fy`B$;;$W;M0_a& zD<)b-M#5_C#Hk9+Lv@l!XoMw8Oanf_$Hw7^G7n#u->;ha_YIC&4{D@k^u;IJRgwTI z*P$aY3O-FHgfS)e>~HZEi1CfQX1uIMDerujJK>6#2^>uxK%5e)tq%C%XFovzkSQGu;E-+ zZpq}JpF4R-bWP5@|ERV9L*M)V?9mT!ftF@La25#Y4lqR5e=TRipA=~zVZGJCCppuh zJWJVw6H3sp@Mp!`L|1{N3hlDg^2B7-)sZE;iG?^QU1#;x?z9s8?_g7-yWQ7z~7wi9sY0-jzr zQHE|(Z)SHP-;cLsZ0t}j1YVn^VJ}_nE*4k2dfuW9i~w9_{QHkClAJXv$G2;N34sk? zE^l}5wjLe<5?c~}V!4M&H$B^n9?&N@^hthG3*+kg^8V`h@^Gj`z}KVm?a}-3>HX<_ ze}4OBbhPDAO$|^ZA-Mom4A~uq=3I~vSFFC!>zp2(+@r<{bZN_tt8Grm=Md*~Z}6{W zGk@6ow!XaH-5nwsteROh&JxxFVUuXYy}(IRMv-;L$o97M`z^FU*dOPC|o9rEV+K&E9tSGosOYU z`3HgbTfmuT;!pM6ZT}78mYdqm&eJw#@!iwOVo%R4Vl_+Z=BG71QeO9`z8t2WtPaUP zjaVU2O&Trl8K_Z|T*0MN)mTO!t8CI$XdP;HOU1&ksAk*9RD5%DG4ounHF{;sFBl;p z$RF87zL-M8!PhdokoYFMQM+VnkZ>0bY5#C_23vPb@Az-6Bb84Q{4+tb{7gJOO^;8I zuZG1=A>0l@#>atOZ_?jQOh#d>qfunDITw9yWjzJzN+6fuDIzo4eQqCO#;!h- z>S^3(iQSSss3S{A|Iu0S9z2$t(qXn@ls@*VO+ZG`P^~$ye}Z4YUU<|t2EU|QeAdMA z4*R#X*eLRYEuTDZC`8hH<1&VrG)Psy?`5P3w!vo1I z^g`YeCZ7BeGu-5oBTki&&;iVjc7DhahE{&tRoc|bI83MRd+cR?#Y}jpQcdMt4-Hm# z9o7tIGSqo2yda{4iG`su}N@ zs^J&S-#k6yy7mkj{Xx}r?wYy3`|Hi_lY3^m{z^!qk}Oe)lQ$5*X*(S0XIPT69YYX4 z0>y{O66I4RoNoYckblBN13b#66E($)U+_1=3{s@;Ic9M9*}C~iQaHiJRl>@2SK+Rr zI_sLcYb+-y#}sYaf2!6v-=Phh)vo84(-XmCQ3Z=5Oh(k0wrz?kd7I$mDsZuRNr|Qt zq2e(M7DkxNm}AI7c+-KebS>tn1}pDzw`3ff z;6|LNIwn$Eatz=N&P_XMzP&LG18I4RIE zCE652QXTlH8@y>@ub7W2UweG1pAt+aBzv8IObAh+DMI*mX~NK8{U<;WPz zI7iiicZ5AHf16I!lzFc)bJ!eD#{MH%qfTr(*nf>}4JgoH2I_h?562V^+SIgGIn_3} zF!Wq0o(P?hBUZnga!B=SIQ`R!-34l)x3ElUhrfCQgBG{9Cvc}K7%of=GnThwwKeDd z)O$vbSS4@DG1);@n4HdFgtH)n5#9oc>Dl3ML(f_xe{$N9!@voT0j$&i_dox8xNk3U zz}bKQue{G-VhV%Ed6&fah-OT+`?mef5}LNv4_8vHVJO_*RUv9QGBP=!riw;k*yeDfqW zA`2y9e{t1P&f@Bb(?cAt!b(KXi)D={ODby#LD7<+2>wWzNmkqw;dd}F&pL;zpAa=Q z4YjZ-;DD?f)4|Z`_$hRv&f{qn#v#H;F6LdL!5Fl4mJHwxn=WBURQPEdST5m}fi+rn zpaGnWalCf`z=VQx-I*SCA*KsXVGLYrni3i_e}FgawuB)`Ow)Q+X>9{DJQk=Hf&|M> zxOj!9h_RM16(gC7u(lG0#@eY0yh-aG3e~}U-18Nd!ZobSO;cO28waLhq(f|pshAR` zGJrQMsDznJi6FpG^0fmAgpI^#AxG-itFWQqMuNz&BO2&b?%12Sl9-PYhGQhd5zLV= zf0G=pu3*iEK?t;wE@if-Y% z!;RMYgS&Si>|(c00}x`Aa2PWx^mR)lfu>ikDvSr(1gik+L2TBs$4O{tPUr2G*fO|g zi3vJ|0SGZl2!P2ri#Vku3=PaGI1#Wte{HW}Ep)cySEm#Xq5waFvQ1-PY=Bdhum5QP zLJX$dKi_}+czzhda8_+6X40}N!06(Mt%mfo*wRT&vGxAPK3$~jcNMbLz7Bhf0*bR={S&0Cu+(JjcM=QLz9YJP^TuS=J~COrW2tu zLt`3w_t2zrR-7s|!3{7EO*EYdl^NPUsAZeXZCaSr2$UR<(rzER;tZhq2UXATyA3ZX zVUnSRD{f%Cp(j2xTXIDmz#E~lIgE&?8udmTAj<95r%XSb+TRdnjgi=Te`F-K&NC9X z#7Oq{eZAiIG9$4im+Aq$Vc;SR*1}8~IZr+A0i6gGDNAaJEIe^b8m(7y0k@ez2=4PB z%!XV+jV=p+qMl;8#-GJmhYH-K%uf z;~rV}b!~7Z>9`%@MDSQve-Yw97)f|+$)$Y&ZDi(%UIJ0s66_KJ#*LPcTeILLZu1~^D_g;iO!i0@5 zGI-gXZMfl^q5BHaLp?&eVV~<y#A5Q}iVwBk3ZRXO;2^(QHc68t(Ydd;+8XvPOVJ8EaX-L832t@C` zt(sxwIE0D84Ai#Vf9ssWv7r&f0M7`*e_E);I4S(1}11f6btlh*HHZY0g6l&2X9M z2Tm0Uvmw_Wnn69qa*aQW<%(KTxk{*rlQsoG6$vv@k-kDqhi>2&fI9}!SX}p|aMM>l z`9rxHDt~Zc^F2R~AeMD(iDiviQdvvripv-&MCMAcNUU6AsH4@1K82@=v#f-hxQuIo zok7=xna<t_%sAOg>iDY!vsbAv!8<8?~|Cp}l`;rT*}5@zBgGZ98L!c0Wb zRu;S!VOg$*UTKxkj!D=6GKP?B&>%hzR}!R{2!<6#Amk{aCQecl!4wHYav)+6Z`B9A zAy%wK-hhxJ4RNe2LLcB5s@uB7wD1teQy2l4qb;E;e*<_U`GYVM<5jL|eX3Hge1=oq zi1eml$M>1QNO-9_AE$}PfX)ET*HsvSkfVgHw34j|+a6)oPvt$JQ+S#fa|t_XGe!qN z6$uomB0?-cx3RzxY+-wG1c9D zi$UTdf0mC{-k-@CR|pf3crjBh16&b)A)R8cgl0Yngvo(|)GKPE8 zI58!`8{!Y84mzdB+Up!PlERi^1nxw=4^$*ge;h5L8Eq!0fm21oOmbAn=ta&e4ot-L z_?U2>JDNl_z$$_ehz92Yq6d&P+>8A^!ZUJ|P>*&;{sdu24u=;!1Z8N<2rM@|b@!OU zeN=<1eP|%Uexc^Y_8te25u=2Av>B_2IHnRZfl*lmA^QL;gz>_C ze*_om5f^ze9ZP1BwuE~O;0>c8VW#Ooz66q35HNz(?kxslSxIM2cnVhyT?xM!z#E1^ z!c6bbAvYMcLO^-&ABPJA+X5-vKsQ(@1y8OJ(}V~zrW(Qsgd8ROqT7)aL6}L!RULA= zeHnwsXGUm|qcX!+j8fsbV)P|UqmxWSe>i0c6Yh41U z+W^y0qa1;cVdX3@j6ldyLNq!_Gz7~d%(U_moeGwu5ck+%eHg*IQ@BrJhhZHxLo7_q zwJaPGMj+%UAsSuAh9H<9fu;$u$VdZ=^P#qge-t@P#}sanZtmcb!Lrks5O6KNe=U~$ z7hQ>_9>5#EVZuz?yM}`a26K$dN3wTfcPENkA#Dwfek3VGe^){w2Jl848)2dYh##HIV1dd;d~;60lH!yp;R&6z zQ3#73VQ8>@Tk)vEV~W5H40?eNcYKh1w|xm!7{D9;Tf)$AL29`1VFuvU4aMV5pI0rTuHY1qC*!w%prwf1@rtJHwO2cuVwBKPilrDE%CUE6yB-6PRp2%$R*cpfv{5 zQ<#1)!t}{>@?p?sqD**6DqycGl4VEi{@7)xXJC8iZ3E{^?@+i2zDmAL7`{XCK1@d2 znGOXee3rWqW*kILm=Yb2`W4$H_a;yvNe$w~kdo#QFjD)1WujOH^%{QQ%Q(P@>g#2o z+C=xY~~(?vEW1JZVkv1Vgv?X+F-Ya#Tuq-)XvkK z&7+dq(ydF545E zDO5t7a{HF+d;mj5;gra(AR;-L!ff6YR`L`SgWU6!f0Y7pLv_cg@cs6zWa1h`iMX^t zC1NV!T<04;#UQK%4k5lT2$Nwc*opgVNZ=fin_OrQ0!@1ympyKZnEII%47QG7BGgN6 z$VE&fQwma+h)Zih;^>DZ8%!zu5$`-@B0Xw0h_fgz0hv~~8wwWgUnm)K1$S;D!F-sQ z>7C09e=9?1f2oEaEv7~VA^t-BrXFu+fbKRu)I)fQ7Eva z{NeV7l)OT+yt&$;A2x~&Aofcvj)YA3BOc_V4u3?6x=?sg3Q)u*sLmjkW~hiR>~JXH z94{2ac?vc+Bi7hlc$ge?5pAGCgYlq+F%CQ;e;zbSu2yhH&VS^QnA+j%#s@8k<-W)j zhN6M{m-~)f1{73I_9p`5#i~gV1}BPKDASX~J1?qch$NLgR4uJgXyJ?(jD{64)KER* zH7}Mf#Qm*A^@PfS^Qx4Tj8!8ndeixz*G~yHaw$0RH#ETjBSBAO_*KDD%9RLN5{7Fv^lc?<-Z;3|ta zrWU1n#TyPJAR6%4mK$)9ErALeCfY}ED*^>08VXb)n+tL1FgzhBoo}(Dntu?Dcw!gw z>@HIBnL>n;T&1-I4d|>!DI!!9Lqs9swZ$8jPIVX;NJq%=7U}m)H9^U$(sF`uf2Lrx z4)KMsJ0XHos-2+TqxZY0&4==N`}f?A@PE9@lx@S{jo3gR>?gKm~c;BzJl}q#Plt6bB(nMLH3U3hQWm*1{K$ zZ_NOz5c`S1a#1OL!e^+#PzZ3pFgrs0@wt$Aa2hV3LB+z4<6zRx97d_1p~6+nJ9~V! z1o$Jx$G)cibHWs{hD`NBe@V>JdW48)K_%Z7o)llZ)_;sloKuM@p+!g(6r0cJL%myH zxshMEssEfXMU;Yy`K^;S0tM2B%8>At!h(wAD!gw=g7^;r^eOM&gN+0?{8Wgu$T)Z@ zz;*LHmE?=Xk&JFG_vdv2^a1ii3_i*QuNEp(h+sK59utOw>^yKPe;-GWm5Hy)30W2` z3%oAix-6M$fRgRawf}~U9sfZNxIp}61%DxOfUmO@k6UNuYo7G@Op(u*llm|WLg>Sx z{@(5rfU5Pa+tcPsfWvjMLJ}yx1%*GzQAKu)kR`B@!F~vTmanwo9dinw2jthw6Bg|f z4Ic{j?UyAJULcOYA)E%@}868y)m>wMggKE@FRHTg{5|}#vs14 zRzkZ%LBQx9FFx%x7ZMy;l#fcmtqt?yy`~8B5HpjQ^pRePzzReaz@Gx?5i7#C%wknS zlr%3AW>3bz*yqf8{ntfs(dZFx?>CK-(j@)>e5(r^1=$<0e?=paLwwo`>uk~XKhzQ2 zzs@qYe{rFns}&B18S+t(0)X^5lvnm2Qxohg#p7xSBZvQjMB|;nos9cfEPaSFyhT$Z z1N!-!#uueBASpoxB=}{nCtd;u-_jbpTIH7wr4RtTK>o;a3hwdy{3o^cKgGX-9Zy+d znM2XV{Yxwpe>5SFvV@!9iU`Xzbh+5{2@G|b5r&7a!vx=-*z_YlLt%#EjQeHC1WO3Z z-dq)O3=#5)ml{{V>5mX5@p>RT-+zJ=2}1Noo?ugGsnnB2!4jD=l9F7`btDJff>zlfE^M5I9rHNNDAKeCc5t7_4oF%&x7tqC)Q8Ku!NEwzZu zSO2jP1Txc1$Ssnnbp)-#(Bdl}(SDaRBAgvp&!5ldm(%Ub?VJAxx9VU4CISk7{yt8a z8UO$UiIZMH8n-lK0Sf|uoL%d3C^>@P?4(F*>) zhrj-KpsM=*$&Uy3PtHe~bu3dAp~%LHoQz{04S#@tI=#6Yasv11ZnhBItdQ}Bam@-PK-Sp#UcI_KT%GUstL@Fj>fzP? z{Ca!!ZFjf-(;rTM;R2WX@gA?vZtqtAxV`(^_3idzzxr{tzdGDoeLWn$@9&+QT-=`T zv)k{xn;%yh|XN@_2jv_tn|y+Z9ZYyMNt(Tm7+K9ew_~yW6eKw>PV& ztII1e=i4p(_r>n8y}I6Az%TpryQ}Yd{C;(NxjKB^A)|iZg9uz;}z&E?f^zu#?#C%tI4{JKAY`1y&S8oyq`OWU8k{&cLT zQ&t{-zuq471o-v(YJbSSUtIF9rV_jwEelV-!*8f1Z)lVuZ%DxcHAbtt&B~H4S9Z`t zG^o$&vh4j2c}Fnb&zbQ5oejFjcVytt13>^&04R%pF;&ZwC!umObJn8}eH3=Om37us zji0PJovrNnhsup_vZ83vLm{u?Y#84j0b&P+g2PyBsFsZkg*ynm$0$S$E$lllVLEoy zICjiKKc>!lRUzZZHP6QJv32mK7h9)g>4c_lag?FPInZb`Rp6;fZgj(re^{XLd5y=D z>z|E(<6}c%87?(+t!0T@_4gS}j$ZKiwCs$t9z?9O9zE$A+^s=_*Wd*|J$?21|Ni}- z)yvz9-Sz7I?(WC#j=(mnWWy-oAx`B2o3yaTlJQng0rJhQKD{~YuK)MHt1r9T8~E27 zK-a6A-@mQ8qRC%UHl;6!-PG01q4RF4YKyFYtCXMIpWu#UY`;% z!;!pMTkV1+wm!bSzF0Ly*+q7a6ws}W!A^(URRR9EFVp_m+-+>AS5OwR&qpUz3?NquIo&HmC@v2$DFw_RaU2v3A^T5`^FV)=e+`^#Mzfd z;la6+P0=IIp3AJLDdW~8KHf>4;X$fM5M_lq!mN3t^A2%}k}RV#+SonvT$eLp)EqLi zGd#vm|1cnd3BjBJ#pukZf!0ceH}lBz*{o>DW_H7mIm2U0p;-N_m3Wwx8iK!S1e4tg8Zh{24>nMmS`rwcmt<^8q{F*#|f{ z-bt7(M(0dm)$WC>7yvLHFr~1$+E0+pH-tiJ)|!SKzU@>@rmY<8ePJ zSi@{#)@*iXPjR`R+w{>kPl&U;#~4Mo`Md%T(kjpL8kvTIAmJ;OP=x0N`>6m zLpIzRkb8~&e-$CQxmv_+ z9y^Lu)Fr?j%_-0=zack&PyC>&&8iA}t;CC!kdTty{ZsU|QXwgQ>=<$ld*V7+x74=r zGNFPeUO}I=sZCHRVIM|#HqLCNh&^srS6$Xm@(W(U6Q`h7SzGxqdf9_22iH4E z!bh1X)?p*MOA&kY^5GlZ6pWh0!j`!DlDPUZ5e=A(KWiniyLO0ORRCkPiwR(89s8uoqa0%;stlnR)7C%~`8|$0Y?De#|p8 zrU^VyDr8tmScMUu2V*ub@YIoFpk_Tzso}Wou${Ko33==@tf9f~6;^y=mP)8aHL`-~ zK(>_%@9n9718K0X3CbPNy^YqfsfA})LygD>7?*-E34f>vf6(=dlKi0_&n!O&-Vh8z zIrq_0cFZ$8CKYn71tei@gb74bH|b<3MAs5WZiHtbdX$ME+6IglLNu>tQ!;A!$zMx+ zyB5YErYm5@SwwTi$iX{^ZlL%uh!@gO0YBaWb^}#^9l_E{tczL_DbT`!;Wl8k6ur&W zVxt`rH&FkCtJrc%LwN-r%At%cmD#E#^uh?wqPLX_NvKC}*cX=_ACMjvJ+HunHz>#x z3c_+j3B#yIaxjhER*Kl>2{zakysD_;kWFQH=+;4lAS0RJ7qx_6XyIUnQhqT*yUoSo z35rmExa|5Myd9a%W;rm9!(+qY?UX;R1#Dw&gh{l7(jX@jp}NHL*Mmo%jFyZt6xDTQ zy`)r+SF_>NBv4)A^lReub$&pO>Z})kZf9Xx7CM3FV`b6oVaLC4lO-jF?leJ>sE>wLk=L?!qV=idxoc@n-y84BNe`yqjEUd@J6lUU9Y&)*QwuTx24_|{6|xUYln zwyvwJD1H8oZk+h>4!)tdzpb-8xjH2Ag@%+eHi=TkWT~H4z}V_Qx2+l|$EDj2ew2r5 z1vRckR#XWx*GQ;_5uQhAHbLPLx=;;&q!^Ts5=7pIRp7z8Xd%Z@q>4!z3CA$Pv$1U} z6;ki#4q=NNENWxqVv`mgn2Wa1k$oRdN~Gyp33<@Mm^?I;Jj@VobFqkUdysAv1iEqA zKiv`X3Oru7kO(Mnf+~@cZ3Wz6ZG;Kkrg3g3L-D$mICUdD6GRzhBD}Vd*pK+d4PKYcicAO8xEZ zP9;2}B|M|0JYx>v*$l@^$EV>8Zysb{D5ugl5Ks44SV7<7AX&<-38=@qjiVfObYoT~ z0(c1}XyeMns7t{pLjk_Aap$D~KN18|eZr~XCr&3}23i;ceETz}AI%kiBYNNfUUP9c zik0Se{CEfOMb#-d4T6fF5{63x3mqwxY^6sbwSt(Ldw>W22j_*lbQFnK-~qmX2bSfn zkG>|@dM9BjMtGL3Td9!SdVsI09Gn-*Tu`>oEARkc>81dv@j`&=orIwn;aOC-Qp6x% zM_AQxPyyi+jTP|c_>$^ z%L;|lUWw%>ARg;3j(E_%92+M?p}dpO03$rZ)khf$<@z+KB(}n<*>GwSk5Iw^I>G_E zpE)+EmBfDhwW}~G;Pf!=KtUr9{P;I$yh5kq!euXsI&@?F)SjV#)ZHdT{9G$w4E5zE zc?PHUNsS6W=nWoJ_n76RAX~ycx{(ls5o5~9tyIWcUpq<-FAi7V=TYwwUcnozAgr`b zgj9P8{V>9_=xwElZ9dcjZZeTZA2PVSf;U(}oyg;dtuiDGqbCfbqYPt)lk4%!3`J)s z^D@j)lG(~Dc;gj+@SxL6&RiN{5@Vs5;AALn_Yy~Mgy(vXjA?<8JFi;{KTErhuxd7( zn#3cNc=?`q`R-@VVQM8Y8+Eu{0Tso;hG^Hyk9W8Yg{%c4@W}$MUJ`83!kFH4i6Y8m zaNJxi;y15-MzYJ=a;p56SK#p+Xl~ObioSaZ=`g~xwA)I5g)G%$cnfE5=o)8pAs)4% z0v>X@Yz1QJZg8qMFP}LM8J9pvCzVJQfhiE z$!D62#d8v&b|1B7;Kv}K&iY^)r?MQqfL5%HaEPZ3Ek1j)&A*M8MJ%5G-9-OzNNt(pCcW9jI;mBozZL3_u zGY08dtaf4{DShktGK7CrjSde*-Db>f#qZ8*Xe&5>Wu2O%5{d!&-hm!WBeoMGHhG>5 z=TY*0;zU(K!#l5`fwJnHjU17wDPa;SCsaZgm`hOGC}3u$hwTE=H5Do1`$ldtgZ_MP#;>HU_@Nar3E~fEg$Hi&*w`~~-jW_)pMLeEY1g%nk znUkRj2Vfd6E;o;m2-DwLT%D-KZjYop-+M>NVcQyYwMh*6fChy}Zj!S{5|vQ8ts`di zre%^SjBZrK#Ob}K<~0<-#jAyeEldlQ4|r6NUrZgpP)fvKjPyLUw_+g;evkVM2Ej0N zoNU1A6U(={@`J^3x*}@#Qv+0ia4%?w}pOQwtVdMuiq-fFh24@tA zK_Vie8$$+jQVGv>LJ=t!IDd~qFm=?AJ~8M69u#&b_fhW9TGQikF@BF7Ap`u57@-r2 z@LSXfQjMJ)r}5jidyMc~c-qrmjqC#ezZakPRFd|Or$#!QIX`pcbGT?kV;z2fw-xX};#L9J-S9C0;lwl4rU>E(QWE9a=9v_X&mXToS=!IYcy?non(>%Cr@F+ab@0IdnceFwG%NtBbUQ@93WBjlQtVQ_Mxn!?{vWKO0s7kn>5bg`RMs6 zo;$9!Fey6RTNIy%-34=CG&OTXOhC7K3}%i#!6)bAW5-Cs$3NP456ujJi=1yUYXN}e z6Q8UC_%;JD1Gb}n$P7dsDwUQFt(TtoG%8|l{?Q-ghx}-Vkt@Op+q^Pl!k^IbBREBf zYncm4`k?=CiuH-wRzj26Md5h9 zJLN}TN)Cr+Xc_XxDJSoL43EOycgP$SrAlKQ;%<|ltDvOC zY$ZP-4Go;fo^PS0B<(BLgLIhERH%{U2pwblNe1af%_Y^?+;JM|Z3jq-^sagEQl#e< zf!|`zmL`y1(na!AY3XntQI4?aBRpfLNssh~csryAn1`a{sgKit@CqE#7f^=|XvcjF zhO8n{R6@6{46Jue%NBo@$9BF9Yb`v|>+j^1_>2{;(NeqykMsqU0JSyR_oR}orj_Jk zmTMbN&LKUU6Y)r|Z?3_cvM8FtcewOO4`+t^5?8h+${WF-h&&lvyh)xupb4A!@JJ8# zLC58ZMuG_qpB(CcL+9T-RdBCF4J?gxIA4XwNr&r2TWpW|P?z2XAHs%#{<0LNua=l+fBTiswfMjjT}AnY<95B~2&-pkI8N zR7pBdo*Lio=s(&3^JZToDUMhV+jK)i|1t&Qz1$)4bUi&N0TXQ0{zKriZsskC%x zz4TP6Q4v%6&%TQbY-fV63r3XQHp&d?;TP1#1-`(BlMSgNJ6wKt zgd74$RSOUHwsUGIWkeBOyIU;?pDGhrbYuZbsbTMx`;YBRzwe#B{Y2T8an6;95k9tIRHVJ z1suRYJ^}|2@e3yuI{*=37`swVGkx2-k}~~Lc-9o+7Q?I!6~P{oQIvR=lD?LwN=pY2 zsJ)v!rZe`wBuw8qfCf;o$9|_*h@g3-ll|A-?(lGX*gp7w00030{{R30 z|No3q+j5&Q6#Ny2*Nkg(GdL46gE78hVvOy$dFd!d2E_s?Lf}iM|6W0yOq%wg@N#w~ z?U_Ai?}`*czF>sI9Y-XW08R))Jk0`-L0gW0#!kynR9Q+vk!}Dk1?O+8(^z#5)bmHZ zASku&N?|8+GQsDYM}p5`a>yz^Mtr!LQDa2TgQqDtlA(I3GR5NarCC%K)z;B&h04X` z1lD61Cblh01;G5;BAQnb{ ze6l;h*o7DupV)Pd|K!{Ecl6CUjF%u!VDXjspK*|9zTWjNN;IP@HB7dnvY+?;dA3lO z<(Z{=<@>HS?c6}MPrUFUjx;~WL&Y`p$Jf1eH5yetaBZ70~2=;g(ovBdc@4&F(I zrUQ4UeCg1+@-T>fzkO^j{bL-j4DM}z*%R}~n=Llxlpo@kh3EEPrqeTp45R!Y2Jlf! zjmS`!C3cHqTV|732%gw&S)-p_L+@LzWwkuT(ObG=bz4qfHCvY6^#;R^r&yZtt7-RH zoq`0;0ZUL`NEq(L(mLNv(E%mtd(8=LSVNYc3FjnxM=1m`WHr)cX-`CCub|#LqtJ~Y zFR3Kb3v&+YFxxUEj5C0^$gkipL>%A&MYgP~J-yd8)t->5AmeCvJ<#ereHd0714qIR zO~me(F_i=sx9Fk-5doKr(F72;BzOUS2Y<}rt&>Ov007<{000dD004MwFLQKxY-MvU zcx`OtSy^w}I1qkcp#MS8yQ3qPYa5n}IEkglBDRW_X(ZwJ<054o%_%_LIDHfv8)v{> zW5Ww3YV?RtLPE|m7KS zPABO)2@;VRAzP=yH{F*ZLx1@gfsO}#)6iPEWfAO?eP!FWX%(yYrg}$HNl}9JjDK51$($28 zEk+5UTRrT`0cBtLyo7+Z6(Y?+|E}e%8eUr13o(0Pq^Q;jb;AjwCq=T#=;saSm8LEY z{*5(4ews{|vZ+0-^mq2l7jxeX0`Q>1i7u_0AHi(TV@eDQ@4S%5C_B%BQe6=qhY*?c&~!p^ zEWk)PSuEwhU@Q2YGa(=yOeYaR3zi}(V_&hNyb@9*WJBK+OUPzLXd`KaH<#^!0UCeH z%cghlmxA0;9bGJ=u&G~B;|Y$)I_YB14J(c2Rur?(8hVnIRd0kY>LoXzd z5L5afL+-!vULM)RrRo)`QCU~XnpZwT>c}5;QVV~+a3*jlRR5*&tW}my%UTliS{C))Nqrh&p zz{P3}SyuLW&0dA%x`GyL>S z<(g@|x3YgbZOu8Y0$B?^)flf)!|tF~RehdD<_y$9*UwJ}U|z3Ik5#D# z#$QeQeMR7e?Q2K7L0@nZEl7V@azaC%7{g)o-?DShYYywkvP#Ab=hY|E1cc=`Lim-V z)g#`hil{T(Du+LU+p5YKH3@%M864)d2KAIhl}8$Nig;@*$ENZsMO48Bi{CGVit?i4 z)?ET_DfzUnuTR&kRYKIHrYerDL)L{daLL!JAVC>&5nrz<3RIH@ZCQV*7v1YbUwSx0 zohOqZ3gHuiTH`bi^ z=J)&A`_bKf;P!_0Y}kM6Ut_N~@@98EuYWtcy9-9PYv2A?x{Q0pRh+szg!;_sk24Av zc03n02mi1e^UczWhOY_%fL+EuatHQ}=h(gA+ID*V0UGv(gKMuBc#eBF=+ExGz$;<* z&b+d-EyvN9e-gYm79_!78ilS34V6bhN$^@DgjHz5+Fi^}{|A>Mn*tPn;&E}m^9KL` z?;8LB6aWAKcx*3oXkl_?WK(oMn8E_iKhq*=>yqF|M)H=x&^T z;B@==^Gg)Mnkmht_|$TL+fECZ@MXZo;#2F}!#NqXKpQFo8cM-FwKhz*K9B$Shhwb` z;8~$RwU)-LjyfIfFIhx&TdtVEZ{|`()Znjb(a|f#XrPyjnJDaZ9cR#qC>Jg8Wh@NZ z?T@?`#DYJ^?Bw;M*7#WS@v#}-F&&3SLqs>QrfSoMbE)8mR8OIQM74nKp(Zisi&#;E zH_@=>nwz8Jju{_!tdc~Lb@)iHsE-#xk2F(j)*4SjMm2*N|DC<~Ec}xOBV}lRf*)%m zBmQ>=X31co=xV9kkW)MUo_UhDH@@F!!y-7@^{pUu#TGOyd}=|}h-x7H6w);p()#tp zO@e7i4b!G$(Y9ZIO|9TG(KWpkDfoy)dx?nW{e4&}884QrxPZ|33bhJ+|9LLR{j|DTRIa>37fJh-;N`)?kVc~E8#rP`r z`#=8;yhlc^Cc&ivV!Ujvhv*0KxgBXz3=7utq`7 zAq|?&yK)uMh>|WD&WOLHg`g%q;t0N;-93PD*ww^-g zxy5$*czJVc%Qfvn>505^J93R7><*mkCN(|i_C}S0JH5npN}9<&Sn$oWxRkZf>{!w@ z-?haQ(ClOxqRme3vAKODOu)>k)70LcQ+sUf)?!mjm1p+YK}LcF?=`+nsr(-O_xGo- zjkD>0Ju1)au~YiYH8og&c#q{@@!5DBiKe#7#B1=|iK|O|>C2A*x&LsPW!u9puCBXe zFv~ZAH@cJmhGhiG)&>chFbB7b)Zynso4I-DSGArm$Y9}R(C_Yf{!@;Cq@$cBSiUq> zz7W&&HTRq;HOL|L)Z$w5>Q7(so37nbq3yeWmOj7z!qvZ?7oU`Cu8pE0z8JOd=@99L z1q%4ZRor_T@0Hf2XM@cM7lA#@ap;5R_R?m&Ckr=ot1h!ry-8hmY8vY6&L#j za&{vP((!ciKIPgEDdwLl40`2Q5=g)U!~CVdXJCO8{r7+T3+9ZP7#mD7&(APj=NyOr zfFWfI04EroPSfuu{Y2L81r&RZxnj6~y`Zy@1+b(l_(2uJJePB0{|C4;up>V@4%9b% zovaGlaK)$+5MfdPHCRJ9)3tpW9a#Cf!ro~||J$=``@3R>BH35P9RPs|Q_0vZP(R)x z*;rV!mb@nZ+j$+QPN3Sd;=s=DC^!$9Z%_b>*BZ+0G3;u?UQbA~l?57xu!?7YA@{=# z&^0<_uMF`jQQ3xjmQw?P4UyCcE})ADsl5d*GHxWSlngMy$gTy<5P_MNVT=%LY=6K5 z`EF7b(qCn)(0Pp!vp#PDcw5=zGo9p?IO)a)O7|XZ%5iJd?RV%*#)fQn9-Y-257lZoIWS(ke_ys@wZ)n37Hd0p5gq`%hLB$p2R|ZR+jxqk}|eS~6HH zNMp9c$w30OM3X8zo!lR6*5T_D;5rWBu*sI5-qCH7*;bd1XPO6`+A+C*4Y!FZyO+s| zP+=_7y&L96TIGSwUwcc;90byAtp3CYU3huY)mC*(|BoHk8y6sJr(M|JoY%InlWM+J z=x^esOR=59bGw?Rb_eaj3D=E+2 z39`H|!(sKP`bGJ6Ixdxev88Q0Na?neN5)s9^H!+SIrdWBxqT!Si*}E!vW?+1 zM5fnX1@rthCQID0^`A(@1+RRZcEG-8D_p?ZJ;b0K@N_}rMNJ50&+{s&WEO;M^I|9b zt16WfCGqLhh~iC?WuaX=^{NfO?h8<^TO$VvhXQ>3y;@e9P!xNA|v-`#47F< zJj;e=WHd{$Cc>6u?yH0Mmurw`TTg2uXT{3OAYCHj-u-J`1@Gd$Zz$XrRNYvX@BLf- zRmqZcCCku`01i7KJtrYFd8f1a%WgZSYY#c7-FDV5=St<@&r}`!gWdRlm(gkj7PmT$ z0Z#P^EoLDQHVFU#PnVH`0xo~)htT%GR+2_ANhj!}dlvhTmS|fWN>o!+9zB?Udy+lC z9%7HOCz&EC$+BZ7PSQP-Oxr(fi4;p@u^u0*%5Q$WNr-lZSsv4Lpj$0d*KiurC{AYs z{lmqf(bKh@qclQh$p0Nn`WmiiRdK(^aKJ4iqmR5+h#nSPU8^o)38Wz%GE3w zCWr$E@_C%)6}8anF#8V0Bn}zP>6EuZnzR+MYEeMU^4j&PhQOoCM!ge&CPeOl0IA$r33TVn<)+N7^tvH?_c3ETwOgDo? zPGS=CTe)m~_y%YPa|-;>-s4Y2%y2%?L(5&TzW(;+ucP;e7Z8`4bx5o-vJ4 zE;d2Sv+yuxIX^}@KSK=U&eDYz@KgAj5_&z*F(DueF-U`G2QE{031eHtaxx9bUwM!5J7y^nTmmnXXpd6D}sHO?~iTs?BI6908kvBpQaX^^jKEJW# zGo(mPXr$UYo(bgCe2e6CI#tb1)!YV>6&2XHl@|!LNi}|(;VIaGf&N318iXry7_D?* zq&mWUr6bIxP*jYOuPIq2`Ne=)LT6xCDt$pdA5LIn46%QHBZRL=OpeR3FbJ*{$M3LR zh+n-EYsssPsoBDZXAFn2u#o=JJCOgtaILvHlEr+e9iBP1_P2RFn`5TMxi-NZl>6^1 zlyMpf!4zV$vU0Vub|u?z!KJ8EkW+$gmkdo_axtYsBGe|Qf+KYUP$RMhPp3HK$2k}L z0X38(1#N#NvO5J5_WKno&=!KE_E-o~Wjs;A>4mu2#{_d+nu^kUWrXq(RJ~JpT@4qu z+t{{kG){hwX(Bs76Ej0B)=+X#N+7$$Q7m|cF4+si>EHO0hT*C)-t8V) zNLSN+7xv0zRzLofKK|fIFDVzknG62-=y`Xc2B8}s3Ao4NSxL$ALm+Ijam} zqJQ>3;o_6Bd^2$xgm4<{_@-?WgtusfJvtipzEJz~{R8;0JcbI|N>R&~!=>1srtdqE zRH*&4A|rOmzDkSOdnjAtG6Ygevv$>Z8VZj21t5O=sq^i-7#8fpjT9mHZ)*e-7JX9v zkwf)L3Z!v=suO56p2(4IuxzME^=Bgp>?awVeN?wH*2UQXe!>9NhZofkYxfA(Yq|u) zpGHW%EF1efj1-Y)XX*D`z5Svsg->WYc9!!FDS`^Vvy7>53eV*+3BMA_q(bR72H)4% z!`PB0+b-Mo-R_#Okv$5$`=-saN*ejag1K@k1Kvlw1lwOi!Fy9QSOQ$TAz_H7j$;{3 zyIpZ{FB*68-rl;*zuB)Mt#zIL9o#jpMg$f|ih8ehI*1Ni+# zN3dbxyl1)USqsHJXWvaXcGtnXa)o}#)@^MvsJbn|sp)I%D!68Aq(c*#MvM8f0%*-> z0~P{x;7>I|94pE5UY*+m9zk|kz2B>sgt*nqDACt}rUWqhKARUFL{o4oeq5qE#uoNx^&G~-nST7ne%=+(GmZVDG^- z-|4F~Hctf+sa}|-tb^KMl+8=bSAlHk$0V9`iF6z1Kc&h=izm7l79Fj)Gs5Y`i$CT^ z`7lcvjUP`%)BoI}TKFCLGC9WW{GzxuF^yA8Y5F1Zr|Z3^|<RufX|>4s!OKkFi@lSBj<=R=7XlC}FrlD+DL5u@LI9tF93 zH;^{G$#_Ye;B42>l8&x4;eg1*kvF{Y4}@~Wjg_;j#TcmIWkXH-=~ZUSI(p93v-W11WmT^2}M=03HD5PAI zN35mXN@c3x0u$2-HQS7+I#7v=XrQ*Qud%@PZMNXAusK5lTaR@Z=YaIBv>`SOPDg8& zjd~vRA`w~X^_uMSCqWLvR$q_6n(7k;9V1)FEh9KLo$Bo&$wSW6+ zSI6j?*CLm6&|XyXK9Q)tWOxmIwUk0D7Wky}DUC2HhJ%XS$~@tF2h?n;BgYVdH4^qi)JVM;Zk1 zLHx4UL*rtzZ=fTo_6$Rx)-Kwd(IFi@q_%=Vdc_IyW3Q1%nFkW+17fZuq=>K1*v%kxrx#BO6A2dZ} zf+Z8tqyn?5VD$BEOIsrU;*g(!S@g49Fa5x0WDDu+-sox70XUQW~0h zEyTXggGZuf+3N~-6AG_RRxI8A$7ofdWNeq#8zi>ZKBs~%-lzc+yB{5GM+AUD$Su^q zWICQIecl?NT_CqB3}1|y)Rc0X!YR8v7^CzvM_2)`O`nxRyLW&m0X-k!=1nCm!xxSz z!{*CUHvC~BdIMuqDk-y~WA*?~dH%qzmPZ6BeOZTDIP00HM~QQ@9nVH@WlRr$nNlxK z=p@rf^Gn`^j_ut^rWKU_81yA#O~J4h zDzO5%(A*3z*;k)NRXu<*9T+3F&6x~BR)UW!?5*A3BUvrbc0}fby!+GfVb0J9n=H)3 zD1bMs7i)ZdHMCLbSEHl7Ej3T4!Jkgr-a%z7EC}}IZ`%v4h40nDtwWuOVrffG>Qz3!wkuQdiS^oNPV{CFESu zt67B{1U^$$MQB%SD z9cgP_WavCS?DO_HVkvTqGXAHNL04g2E|`mC+6OY&1|TRg0G~5N8*FNdLe7nGHvODk zxahd11_c{?HILD$jn%5$?uX*M!up&sVTaS%HiF)&;XOh2g#Rui4A1@M2QE+|9SvB* ziyS7*c>KCQRX-#nN^s{xZ8Q0TxC+t39dvOSMbh= zdQ!9P9@Zqg&k@VJ-_x3F&R1eK@QuYUyP~X%)GSzg z8BDCWUD}_oG_{I)qz`Qm{m8*Qx;u0kNS$-0#&#cSf}S)UbK&nSaF}vki!iKGA!CZFR+(GJHSwJZvHvG*)g2b)T{vH7 z{E0^G*Ib7YdAM$Ug6q& z;UrSr_%(A4Gq3BO9K0(+w?^D;SRTv7#5;Y0C6gx(DiQCAv{Zfc9kBy-C=&b_TNSC#X&CSqXNRn~t zKj&)HOW2Jf$1l>>&kWli%~sNh0Z4L^V|z8&29Ux@i@X@y$5_EG3rUpdcl}pDm;h{6rE<_=)64oG7U)bRX z+t9+wa55AH;f`u?44JZ!wk)DN5ZY?#EF<}}m|q#1ECen5%^B%DjkEffnuEfjZaEhK zgkR|UU)QR5199wzqrbI&$QLLO8ymSbYnRG2?^23D)$uhKnJJ*rXN0rDpC}iu9_6RY zzi$=Kh^bp$8@}I*xgBlal$n#$Qj{HqM`3?SEon#hKi|FS(R{nIeg)FdM!?C^Q0E~b z)8d7|QCifqK(qztQC#;O2b5c3rlGxo;{YuGhqxS8d9dHRd%A)pX;~VyhMOtJxaWy^ zWb`1DnnjY_asHQQov1{O76m`GD6*l{qSVFJE7p@pk0cQByUiYR*=LhPN=1F$+ivp} zINW=hT4ZHsrkg6@z{_+mFvsVBn`MzX5W78p>OaYlWR^llPo@55wJ{B>y?Wgp3JIb> z_XasRrzQm8lP2y38(A@j3`0*?5HniI-AHE6K$Kl#Xcgn*2+#ARtWgj7AXj5uh zMd49Jj6n6Y*C>x2Ptd<3L0iiqOkRNkJCdNu-t~^K@l68WAJ_5dg?xvDBr|Zm%Ut9o zAJNJ!tFu@&hk?ts1*WalSj}W*|3u+4P!Bb%#w~Z z5rrH*x;)(WDeG&m$=wgcVrx&=QKTGrFIdSOsHBL)oshkqWT)Y5dl@Fwnrqv^*@}E= zykJK0c<5ym=EB!tYdz;gq={EQ5gtyYM93rPA;=p1F5%TwA4-VS@9b@E3&UoL?xXv; z@sk@yoKFBu$BKioplVaO)Eje{1-N>W%}@bCsa|4E8O>a40ICr@=2v@>&M#EF<_T&O z*po?!H^~y)lqPbVAm+;b$6WRm9$IMKa$@U?*E*yni@Ncnk{I@;DUehX5LITC^eZ+?*};KnD4g6zmE7n>`Ep8Vb|tA>`GD^Ely5unmEnoWVjv-R+`NbRl{r3uH*C?0fs44 zMDwS369~241MpP^zi|}=@{EM;p>QZqBsVtAU${wSUezdtyQ5P24XVV`zevL;LepGnwH_Z$~3b7l189o>ot;T{VXE#(ua5m*AauBRFpQzJ5p>8=<#@s8HkmL2&OXF-C!cDXQm*j@de=anfGB!4Txg zX;-gZVQ>?i@qnp0`!wH$igJEx5a96iGt(hizNk*A86QM_k}j+?48?VK+kN}$eXaO+ zV|`*{)r)+?$8roJ4{?NFUUm&3*d;mHg3Pc`lja+;O7}G9VpxI)m4*85O0k|+#IzU$dx19$)d^h6Q78ay} zn|!mbjx_yvYPt5v zq<(ZQSNeuVAXqZz-F6~SNHMGMRn+5H;}3PWl(yA3!j=}dpAB4S+;a4S8uKl7OXGh& zuXY_d09i`|{TG55tE<%Qsp~-l8_PneG>}kfK8jI~fz?DXZc0M5N?LrLM9EiRh>}%d z2C7xIL|}Hj(yO*h)wgvN4eMdMO)Z`3*;AdL)hz?l_~2{TcE0PPl|4FeuWYQy#=opi zoJvB8%3L^b=vS}WKKZZSAoG9K__gp*6-=>M2gJ5d=FeTW=bXN9&MkP=9@{K}LuHF3 zh+iUj>zPa#I=`t1{SoQ*h^SdLt+;%QU-92`<1;uM&idxB$~_e0Z2&&x_Nv7fh40Vk zLmW!-CIl zBoIl>f*YHMM0}<-DK(EKqyCV4^1{^X5LX6)6#a6|tR;*0Dhw3$g4iD14(DMRz~xEc z+L6nhhtSYnW|!BZyJOrEaB(o-us~Ngu3EotNdIx$U%^cr|B;}s{?s~tO+Q^1f=zpw z5UgG` zmJ@+2Z1AUJNk&!OEs{9%V(;})z^G$Onby`{(&4 z@m#0ftt$W_{d2u&9_~ryb-&Z)mwpp$sBaAe zom^PZr)Mp>Wl~Sy+z9na`!LZeD*}4_KJO}%PbxjeqSm%AGOC%cP|7Q#S618MWvGTo zA1DXGdtt6~wbLpjeTdId9sL|_cxCv=6+=2YuHVmu9hRbRuPY3V!^&W`H+*UBIAi+A zuuiM%g5YW?CG?-mpf1VX8zv#XA720x(rKnyflc}8sWE*N=s>FPS<(KyWRprBP1VJY`9ReeFN5#;5J5y zlq685u5{vDHZ8YC(x!X+d#tvpQKGl~O zE{ul-P7v*Ty(hW_DGR^aY>e2)(!*qOi2w7YDV zNkc66=m2Mdw^;1?=z8<%Lpc>8`k)0RPa4o@EubglJ@*-=8bd5r#f0*+I3=z+j^+)B zo?LDcqb;mG{rW)kGy+L674#cKPw9I*As~7(1<{k^HK*nA{?fn1iu_+V7{R)CqqZQ zuSq+?14pHl_L;w9p8xqBbV6WONxpS@^uA zo~of9?+Knpf6bgy%ingc;<;0%otE1+hYPg{dpYe1@WHB1P-0~gaT+~o3^RCOxZ7_c zs`zz82ZpMhVtA1+NDoc|kih+$nWz?JK-izi2T`r|EGlWj24M=Kr=WlIg#3@5ezPJk zW*)WBex|Cm7#!rb+L%v9OTthHmck#I8RZ{73fz`V5{eo%F(K^aCBOv?B)?PrqbK(N z=qW3MIzL&Sj?Ok~k>6HxCk7$EP%yNV&85@C_9ii28lVpqz2!~7EF)kcP-Xh|r6iDN zGPDpxPcg}^wq@(b@f_QR`N;NUQkg^lm!3wl{6cL?JZx;(hQ>CK38--ja(LCQ(eJ@=bba;cGPZ`1Z+w zBBE>HMS7s-yX2OuLYhUFJU$NZ#*EE<;xn`l=<3 zDiw{+_KxHporZsY)AbI`V5vp4w+@~eD9n5ZKAs;&3HCy8WWw33V+vI(sQ+-=gf^{c z(0k_|Q;U4+t|1&_B6#I-i$y=4jy-S$j=%?BetsLLGX1@aHDWH_K1WSwIG=FGbl&xIOnR*q~)Dm+z1N<#bDj zSB)*wLwM|hhyK(dbl{3N_oR1wO=H&$m(Do0cDekphLRwn2 zAbyGhrB?QO!kC~+khS=;p5q<(W^nkMq({RhPaj$A+k?>Hrgi4lp?LlgITy6Ulz->{ zm!6=zWwfpC)+Vqr|UG0rVOWog-8}ocu_Yco=T|#H8Fx{oWhm!2dJCkmY+Rqu#uZ+4mkV0>U zaQ^WW=rKluCGz#}{&8Uz6NGt`pnPErL=S5gIEK)at8*K>7i z=%i;Fuki=7lE<4eSWP(UAw1z4y0b$doBl7j)WzBw=u|4vk9MWvTQAy%FG>DP#_`4K zy76a@%CxEGq!xKBrYy6|5ppd$3SL3vnpnaf_Tn|qv8En(bY4}KTi@JJ3>FX>cjb80 zmt%onmZK0s$xEU+c&fx?jRlFnyvdxCJE%3O&Kw0I9$EU<7+6VN)L{zK4R)}XB`Us0 z=g1KXmQWVxOkpHOZ#uguAHpMk>~)d_hY_%SkC)9hsD^mz)_=FBF*j6%e~~`shSWkf zHC_`h90Vm*eW?HO6__n#?iAh62J=TU;2&UVWB%xtYzqe^Rnfv!2S#@n7nr#7h+QceP#W_1pmS&Dvr_lfD`wpO5Kero^ z^K}_1RYO+?w3Lz|nv@XK$(aUre@V?ac4G*>j{2HkNvt#S1n} zzZTY?SDm`rOnYVrhf}L%5z2zBsGvKA*m52V#isdD^@t7kTH#%W6d+%W1ptlr%T50L zT6ah}oHBb2;NS7<(tV5)PJoqyoy1vm_8yu zpx!rxLDkk1dsgQ?{{r=ZQH5zA-QZXN znaI$j!sY${xA`kJ8%fPBtiw1KB?sF<<>3(`ndJ3;A2aW7M9ADXMvwbeVSa)SDve-w z2c!TGF3#unR9AGE5Gq!pV@-}l0Po`(W$G=zn-Wh*K2nhrHS#uJXg2Lf$!~Z5hcpgy zF9vDr=q|5BiI80`-9{id+86RVqKUtF*Kv=x56+ic!?Mud#SgrzkcZAjW z0wdcfEDX$zdHMks=jMq;HUVbA^i-li#%*e@xV=QB4rYY5!!LscTse5yl0QA)8WH*= z+sj%cC@8K)Ij5IAJ<9PL3L_x!J*BZt`SGW|zF!46XAXdxKUQFo?YB5$I|No!;r*y-xRl^}Xqb@nj9IIzs(; ztd{*R|C%3t$t+wH?`>Q1yW7t*r~a%?*?1<&m=w;vVw0sEt(cqIN1unLMx0V1HEA+Z zsr2lEQcKxo9-Bt3mfZjwHEb3iIC z-$wMj{)&(uhon)XiN-m#Ea_p@|!X4fQanx1pwvMYKW%!d;R?ZU9`P7(GU10|H!Ai8t(fyQdvNa)W8>UKqv(rDd zYNd5cWfmiXjzW{co%(YIE{!PZJn_&!wW^eLS@h8LHoN{>eS)4sL0S`kETy5q54 zbb1`%4RLn;zJJP%FGpm)LN}2|#&iw|-BpyGko$gjc9j7jRge*<`qSLPB4)QLw82=8 z%cd1sq`M%^DQWy6F^Fck`2cNaW^hD>{9K4!)Q1lju7Re(?GpAMM`bNjccDGq&>M~OqBmdaIwoKY}l3Cepp-}K` z83wkVw#|-$YFhyGy*HiB_6}peY#%gYtBp1&+4Y?4aefI8|LBlhz37Z1kS{!!L0(JG z*@@jpUjEykWix4l9m=9Wfl6KrPgCi*m6~PwN_5+Ec?y0O-#wD?!GnR;&`P_+39ML01Cb-SQ88mrN_vw zc^>$IFMkSaG^PNWxk){}y9C8?wk_km%_ZeXF6$bboG>v{`uw|hx=!A|qD!0mxpbS| z#$bhIrUeh<+cp#{x5#L~=3KfC+Lt#?U#@&iRQ_LoadiCs!iN zROF&S_OVo$ZMjQ??eQ9DCiOh?H>>XOR3EFhllq5g7ZfJV_)B7))N}xasfh$ir!Naa zXXnLMbnHuB5mKX`UnReC@tiGhYy*UUCE8)$f@{mpEd0kWjf%s&=FU#ZRE@C<4xrf9 z-N7iOB#NJJ(m=d(b>fiDdZ%`5gFvcv5(t%bSDjpQFwNa0Czb5O+>*6l!u3##~v!1@fzpJe+3xh*SW93l$<;-vF;4eNrh z23O8;jR~~ zIfV*=+C-IpsU%IP-R*0rU>8m&s5MC>W7N-}iVq*-^+wC0_<#v%@H-=-xc0P+=!31L z_d5wbGV`Y1R#L_D$}Sh6<)aSWV%xO1j9%6=z{bR5Q>vgF2O_69C~~xq&c{>>)_Le2 zYL~E0;wnFPvQ1)azi`VK^ry<6pOTCR*jkjokm)|8tl>hHvgjQ~(`90;zqC|ht~9BL z(<1nq@#c4wo3c~oJc%&qJ{$(epl*K&chHbcjc<7{ey6rD$I}M9T9Nw%1jR%Ddig71 zyZ(=xpt#Ve5_W{ZA{eX5-pc>DrGZZ_gXUuo9z3=-_Tx|(s2(49E{n8rZ&l~VC?FZ9 zIAyh`x#ZE+(8nlrWw0@g6D4p9YKfcLhnmplhA}U(u zR+oPq``b`2D3JoFkDw6xAI+n{E5Q;dPoez$dc9U`xf+ZDO_MpTqA-rKl(8w0TkJLv z@!FwwSgluzmu+Ooy+u9=1`9|3EF_$A$5XUypzSfm-(yv1&4l_5>AWS}95Gv2h^@tB zA(V&v6|4X+`&v?Xx0qam89qYWph0KhQ#vMjY*qEK4R-`^=ceiyB^i|1X44aSy{*E@ zmPtc~>^F+O6uWZkJWYTes|{Z0o9UG9)?4FA6FU^4e^>@KCvTiE7a{&?z^qHs0k-lU z1TNJ0@Tf}6`s4549+xymrw8+ZWe4|6_qv)|t+8|S*BqlJ(y_(?o2&(&eN5_zcslrM zC`TI<%{XS@WuV8+B0^aux*sZzr}t(@!MA}{3vnZFBRT8=N6s|)yZm%LN4hxUZVBx! zPDJ3EcKbBAKe8dasN)MCHKZ9CxHal|57g+F(-kH$Ys0B8`w_Ud*)E8AfsI)FtB+;>a#LetC1MB{(nV zrofln3AF}QF77}xX(%p-b0|@J^q9>OPiXhNnh#C|#6B$E=s$ccDnug9f6&ZSXk`Y6 z7cnb=#8;CZ#(ef#hN7-KViH0G`B-5h(ea_;bb>T7(Ah19FrlsX_%0g?E)~re73WYC zW?W=)V^SZ6OW{tMFx$VK34{j_=X>kwUtey?Ge5J;{glfBz3t!9I7fNlLiOP3v~^o` zv>Pz(|A7=;1f9qO2&5R&)0N0jZQ?L`6M%BCQnckDOLoGmxoCv<0PN9(pytG-;CI;D zH~7DnFd`~GjF4rG*|djuGHcPSy@WWaKb;Kd@_mO{RASF^4n)m@HZWNYKLzc06Jl|} ztfT;0!NIbjwlnT4^4I3lYV{{%A&Z=LH`GU5d@d&2}OA z!TMP*HPd*MGd--nUgXk3De+^o`@Qo9TI5RdM2Wr$Hif&;~3mU44^bI}S2zT_&Z0>%lBBg%cXyM-pa4anSld z8dXuQTsY}Xl9Wz2I@8%n-><2=EyI(xRggyYCJNH1QelMIq&A?KSU%$>?-sHkEC+4m z?C{1v!_~BL9X&;C9g&~at07HuSlyxl;XCTR*ynlg1gpJ-u=_f0S-{mRZLR`bSM(Pl zO1CJq^jy^*J-1}yyQ}Q0T+zTSnK8Vcw%oVf)WgQ`R8|Lh-{09y0tNgRI+Cl+`kUwN z29_Mp=8>T9Z5)AH;kh)jz<(h1ci3gejgux4+Oy?!Ij0&5d}H#{La+IcuupDDyu!L{ zFl%&57p|xlP*h-m?L6oyK=L*HMypVGl|rfrud)V(kOKVx_C^HI%_Mk7fCIrlO�E@N4*{>i#bbm`-xDUpNg@wRIp{dvnveukNf6+QUcA@G^c1iP;bqfK}qIT?&14yFE z+Sfu_bj?|z8gnPmkPXV9y4Q+QY@>1b1*RI1B>$9P(h;PQst0aQm%&ULvl14=HR;S( zeEACq<8~k-s9ydXGgD_uFl6sz=dl`l_~<_|*mU3Q?7);ceX=ioM>j+RMf=vK7qzYR z-tTlivxM7z(Ql>r`lrrS06+F^Kn@JvG_-Eyh1 z=jHbRCX^+Y1xu3JeK6f>;Z2BUM&S=IU7Jk+p&8N~eeW`wzL$9P0LWMC7x*E!= zahI`fU1M6yShi*LO72qH?sx^Mr2(rfb!nEJfa#ZN))hX!hEu)pRtGdO3ItFU~bHVOm1ox!-xUi4eJy>@YdLa(hL(U_{>Rk=5? zg*)jhmN5a+s1Q&Ke;w?|No7dA5BGy?bwVo3qv}sDP5+bCQ*cq*E5QHB>fz)0+ak>h zlwk%c;L4MScyRrfJkF&qV41+R2!=>6QifQ(YzkjB_}^}Q|8r9F8XCd3$~*iJb!yhd zRJ4+=cWkj47;+;mD`YThSZ2V`G@=VzBpmlGslI}?^-Hh{viYD{WPn^p;LPgAh_RAj zYmV@z!zlXT7FPaOH?dx$du}X(Z|<_%XV~x?KN`7opxC^nn_nJA2|KSc;{@5h{={Y= zz+N_n1Bp~%5+XkvI5B?Zpl!Ba)3NQL(&5#YQD@}X|J-Z}luz~Sha>p9 zg&M-FK}QOy!?oDt z-S!E&_J}~h7IWAV3cy}4VU6n{U*_z4#oh&n7x5}@r8A+&Dt?+BRRMK?e4^UB6b$hD z>R4HHSK}T!Yu-F0b|J7YgzeH)|D1KnWiQbvnfe`2KBdhUM1Fc|r#0^?GTy*LCPucT zr6v*84JB;mjC7o~U;S&|38Yh3oFDd`|7&*D?TT+((1y%Gljc44Tpvv=qK~>NOI4+oO zTGj?Q9UvQ<%`-CYU@`QxonGiEPMk9~U=jO)PW4$i_Yqw^YasdFy#nz@3fvv4szxMqt*_tIa1~h z7>Yn z!>P)dSo7c|zt{LNcWI3{3S2i}t=81~qvXL@hip@S;e#cK_Sem|tPWz4i_LLm{Y=U; z%ja`4=@in;jy{l%5X-HHj4inFB}5zyF-`qMmrG5kQT2p%*?)~&Hzv?z)JA9Lf}Icn zY>wJz6)Xy`8merCUn2vf6k!edcs%D=2rF^s0qgennblu3mn!Fo&0bTDn#hLS2OP5Y zefH6dBjWMknV}r%QPSe72M78=6r~NKC>&zvp#3z0>ye*EyxuX;?D!iwtij66$i2g} zM)-BnBn8pAWY8{$>7odabiU(ooy0(9I%<3kr?|+GDVm?1Qpmf zh#?*Z{4e3n6!#I(l)Hx}r}YU@Id{>K09j6V@B#>>l2mCi(qSYt9+fgr3~T~Ye)tlNoOA9b4{(E*I7Gvt z=0`LI?PAmE<;#~_aiKA@4w(pr#q(brC5`BxwZpp#W)Mh>Q&nMeJKu#F0C3wms1%XX8(`8)y(YCJ2FZ&*yO|nai_wwPa3Gp!H2Gd zSG&>QH3P86GC)xE*!d23`+;<_iEUm6pU7uQCKY-v!CM00{B#b0JJ8a_KYvvbeM&bj zB;0122pxtD#xv>qg!%j;QfQBT$MDhm$xi`{m-hd16_Np&`>Cp^Z7fDVYH$=(Fl|TK z2&Xw2t>OX&1JS4NFUV5^(N9O=sy)mku^Tz~M)a(#ldKR(*0`2ir1*nS&)bSM)HN&i zWM_a2^!Q&iTp3pW{VzPO)%rO)#-m7DVfC}UTQ+L(|F}xuCz#Ze%8eF_OO~fUsMcl} zE7eA~^vKVG1PaUu`Z)Pw>2j#dH5t~@lLPpUWAs?lsJaM4Ks+Tyt^bI?O|VA`mOgjF z+ZX4u{3+r zR+ayf_w-MkvVhF}ntPeZHwZgE#cI?{KZ*kVDEX+EFMKsIjMvkf`}R9_@g$ZAIL%}K zTXo;DtNj+-W$LEia^CV{U&9U3(?w4=3ThbC3ooRR*R(+a)h^Po&9*v-tH3!=DFg=})qz!lj-aXc-~aQZw!o#{Ur2T@)2Xj6<;brDxV#;PzLfE!Y)2%| zq176;rZkN>(!#~WuAbOuA63hTtkPi*gM+UXDx+BuKQ3ox5(RR$0Gg1 zs!~N*OuaL~k=?KD?mV(=8n`$O*@^#8Lle*$S>cZz0mzeuM?p2g>dxh?H22* z?u{)?fWIg-FsZxG?`CJMs*ZLaOM8hDfsI!{gqJ zMsZXRTJ;wN4_Yp9RJKYUaa5eco2JscKD~)s6JWA^hnfS4oz=H2>1U1Y{Kj4I^=I>z z!-96;1yEZsX2-luX?O0zsT(Uh-Nt6e0w~(40Qa~fRjG258A9?q6a14(xVkqw4KzfG zu9SMs{?Q9+JDqTwtr%nZDUZACy4RwlcWjB|miht<>)O>In@3%?3O1=!EK@&Hd_62U zC)-2?Hb|I)YoAf-z9AL&A+yr|ew3HR)*(KR?lH`+lhdBuS^BL&N>-3+{ZX(O&uvs5 zka$P>IO`-64QP_f8pR0GD;IagOa1s;Zw*qX+!L1-Wr{^Y%<)}Ku?+Du9a)kiNWsHG z4^aKXly0cPv?`E!`;T&y^~HPl;V}Ja*{KoW%`%aqH||A{lgjh&Ap5v+)bKiD+O}#4 z9-%<$6i0rGZjeb!&*cGT)547L&ondO7f>NEgHVI&C7vUt=3s7`jgXpG5Q42FN(ucv zIeOUW1A?9D<)``>W&$3bn1WczPqLc)(RK^S6H7KdN`DYKMJMO%g?t^-1ff$92%VBq zp?8Wv=wu#fNIW(^-(J+~`TM8RJlX^ArC+CVRp?VmD6Siugfwg1M!4Rk+9WGSRZ=G zc{zGt@lBCJ^KjKUzR4B!b4*oHK;alv;3s1V*83~@GnlgDPx+P!Axb?wUj zyuRtTdOQ`0R~@{gJ#;2ZzzKzed%=Q@`An#2)XLL^&Ki4}%g%s{rJJEqDM_OYw8b!E z)aDY~emw$ZUj2;H#)Nj-QB^Hoa3$)g~V>fCkJvE(rdiPMHByutDX{=NboGo{U z|5$0?*1?Af#7aK@Gp9KJXHH!@thQr)d@;JrgM+#V6KST@eGgy3iA2Zgm0!)1{$*?k zuTyN#lw7K!mU(Bgc~9vRAlOfA{(sE#Uv9Ijoc8A^&EipCDW|&s@wXGbD%UXsK z`1=g{<>Yo&zuZ+h*%Bw^A>^|c4~vzr$g^jyhb7?M=W`T)_JSp;6_R4zsG0oP_ow1N z_Z00nPvA{=2m`HBz#b#$wDY;cZ2S#1i(-jBQIoXbbPdyfJ8 zv02%8{V{UDMUOCumL4ipGi8;H6a$c#bRbn3YpCq@fL7Eqr?3( zCU?n*)V|)X)Qr7FCP68dGd$RB0#t3FW0Wq3%Q6Uivc40zfKVfyX?!T|Y~D&8f}U?} z)kBb(7(bwiDq{b=5G}LLR7H(DzrVBOj}$ax+M&(yGqozya)y$SDxXj2@*W9rWwkos zOYN;u_s$&V4zI3iL$f1Wsy90IH^pB6f5U!hc_g{w~Au4NeNIRC@@rYNZI%tIHB@Cbqd-A62bLJL)`iq+a*f{uhlU!1b^R= zg4mf;vrIY=x*7@M!bwDWH<6WK)8esljf$5KgCi=Xm$0$}q@c5|mEX!-wi0U7d}j9k zLxOYdD-w!z_$Nf>=sEWf?cBHWJvHfx70L7zP6p+n(D_7QimW+$t#T>5V{JZ}0gP2Z zaUqtGKse?YH2OCqm2;qSj?$nEVWJA%!#O#KV57IT z=9RHbWo1Tzq#CSSzY>j<<48wEqYqFrAQ>n^68&}U;c8_AO=(2QVe+s2Q=RjY=$9YH zGjt9?6$9Y0zC&N%c}rBcrHgu<5dS&)#CXKOr%GW_kZS<`y@S{5dN$~|V4SZ69DlZU z(&&od57g5!iXw6@WDdH!Ah@X*`0o-)OSTZ#)FK3S_VL0{YupL~q-sg5r;qnrQ2D0> zS<{GYK8jBVqlw1F?HwlDRHb%jYVkuHjU~XH%>y{Qew*i2NO9%Q=*)A)y{VX$P6Pc# z6|}1LZzwrm(CRj>yACp*QQBXX>JXV>&HoQy zZxt4G+;{KN-6h@KAl=;{9n#(1APfy6EiEY>BHi6e3rKf2(vtf(_&opj-Er*`=7i(9 z)_m5z?r*{{=nb_VuEQ+a_1Zj|dtS2rtmEe2J~K|!E1_qXv>65(cF~e<6VDk^)cuBw zT!mm4OI)b))4B6z?D|&t9QPT5NEQFzh*{5SYwoR`YiLVPUXy}Ck3%f?Xr}>>rBr8P zXWiKc87qH6AltIR5V{@Y%MG+>^*6bm6#`XK*3S+gr2Q}Bke&;~e46>Ik>ukv^)SCn zDaL6qp7=IrhNz(zMM5+)r|$1fOncxQgP8{NY;CtFzw&#&i+4SzK8JsIzVf$z4T0w& z7MpkV^kUhcB*3wKtjeRK(SB-pA3KfS+BuEZnmdU)9hr^l z^%W-P=&sI#i~CWpQ3wGP(S`Z^XqGSyWAR%O!hg6WGOMEM13Le0f;%h-^NhmU7jIrt zwGsT>+bn$Lfu}|VN~6%?eIa{GaCJL2k+0T#MI`e;}A~4Q>6E&YdtJ$j6bhL z&@EZfi`Eo~3!P>G9$ir=y?)tmw?Z2ll zp{7H1W~oN_a7gz(hP)I&zETNfUn(T2-f)-+3QXJ6CiNepwB!BEbP zMB%&zYQNci7`d~u(JAwve1|~mw*|C*gvzzuNE1f-jCRcw4R_qwF5pmgv4H-ws|P&} z+jb?QFyiKIG$SU!OZfyxaetji!`WEid`w8_a(Bg)O`A{ymKUSxRSc6E{P-Ow*;t{6 zQ@1!h3CK<;tAkg6(qeJ@j-AU@_D@o$=UKGd`72H|#p{MF+Pbvu4ievdT&8L$jBdIJ zt3ZTRK})(4`28xpW*LhxAb;k!9GAc0wlbnh%9Cwq4jzi?#9z=+hgjDP z*Ligq8iyQ*Q8ItE7&7&<;<-RxN^8s6fACuMv1oP#F{}Q#KO1P8K`!Qj;7QSWytg^2 z`}A~Ey)P(ap9~zMz_%!}VCmMQgrB#`sj3Lq6^F$!pfYBTvAxtBi8gH{R8PzVR{j~C zk$I($)`63Ub5DOwsHB=F^1raqDE?Ub!UxKzt(~b9GI+NoBq)V@V+pi=AO5v|3|@sl zX=h&${?Ssx$E?yhpZ}&pO`M%|cZ}rHX#c!Ab6p*g%KGU(GeZSaIl+x=c*V0zJUb|S zKb)G#FP&~dT0-9`E9_kGsg?KII9uioY~jxL`=zK;_P$xe5g9QVH4a|I_da@w84^hi zX%&5|7or*kFKL*?(76BW8YOrOBrI4a*u_AZw4PLGLJ$Yfzj(C*Rw?`g(|y2T`ewY; zlTsl`Mk8lNgE`oxuquv2T@xV(2SXw#=>I=GU?7BCW>S%^7f+iCrJ)?P7%>$5-d-QS zEL&i^9_Lo(1+&6t@Bx>@be?OvQh|>M`FV@weicxUrg7o<;bn1Bb_Y9P7U{{eczifrEohFB$g%jL#MNuDFw_52 zIyimWNxOnD1SGFPSvp;(Kgewg4cxG56BB)^h-@4>IwoUGT(5Y(F>(Eg)Rl)~-NjO&kYzi*d`-WY_&EqKi3Lg{OF%nHQ z4VQ6a3l3#Olnwx6C$yPwFint&cA@!ZILaES%tUrAVuCQeQ~QasT_asE!wu zu6%%E{nZM$?CgWdpelEnmr!fS&w(G|t(>t;2#^tCt}|FOTi8izc=EILHMxbk zG|ZWCRD(0IGm?btRV{2z#MdYzX(c~2oBlcyZC`!3+*xoa{?bv~f|x{u>#)3tT)5aD z(X4!c;$>p;>`9=nWb==x66|&O2BI2GR!xpk`_7MBG2xA|~pFjd@bXMYTS8a1F2*&Z2|`A|meWkn=%#XqHZCp<&<#omw8KS$?th};@% zO?^`CJTAIX+bbGU@OE@wOW$_vR*?Kx0Ll;R_VEcO|WtndeL7 zraz*WLh;p0b?4)YUu_B?hUAjdcL11f)S3U5-;C#R17&z?rDDz+4h5L?!M@J=&@NZm zS%08|1o(t(yo;(UF!6~n()sxXa3Kw9=!sKvOc_y>B$wu^LBh;GUe1hPjC@0>eZG0n zr~H;{txKxzRC`-F)2KQ1j=&Ks;{5Js(Q;j9JkSHCRvphCgSfN%9E{(vw#?5T{0OuB z2`TDc*JCsQ!C*9(Xrx+71X0Q31n3mh0i8l=N}7@^#$Q)p*Y^?E%X7Vh?*44zdI_)! zECtX8f_HsUz^?BdQa`ZkBYtB=p-)Ywvj3`6s6rJ!LqKEw#rmpKU?Jb+)~iBlbcnon zb^t~4C;V(UVhPKGau4U7cn*wwq&#V<~;$~jqF_MxMYwv$Lg}fD` zCC4u^^gSmi>1b&q9!Pma6OaxI#HAV_v*nWY5hOWa*mrITD<~SvHk%I&``(eKST%Qw z=;}Iz)&f0XURkXxg__{CTs((>&FwvW0rZ;xymLM8L#z&8XU)s=(4C;N*<%*dV-{O% z)zMN(#kiyZ<%tKiO`4OXp!1DBj8hz7XFV7uNK;elKt`BeQ8AZk=m-c!U zxw|`z{;HjCp|LDG`m{Kce4pKK(&-=E0L-7wwRHIas7WRtERg-+Jax)G7Atsu#C znI#{5ApKdXl54EkT>#EoPx@TApD4F~X{B=$8;bF(rP%%4_4<)Fc_I5e^_V z0v#O)IIaMJOLCa+DOS_ST(F3lTF!rL=YV|!GlY*ZxPq=WW>dpWqZ`OuQ7}|_9atw6 zyjuGo4;W4p#OoJ_7k}VPxhqhY{}rN0yd$B*@YL=`7zg%$Ld_JBfKY*9o6+KB$#M}8 zDqI(qqHWnsiDDe8C36^(Ad1C{vAokj9K;=|xw(mNktT`LNi8Gx}dM!yw=NkZ*{R zmVi&8uo};(U-9#F*2n$rOz{@GW~kM#A?#7vPW_w>h|hpe;Vamuph&8089u4tb*oqs z5QvNlpLWJ}Ucm&hw8c{h;VHdb6>gk{+#K|mats!!oRttvsmH@|GX0~}M}C$9lsYZO zE2XX#;15vhpd4-_Fs5!-lm4!oa@JFB0A2x^K1@Q(SEFB(QR%XUyBKHzqb0OX3ha<) z|JwrYwu*vVz<7R-SE)kBr`o!7TyD#O4L?H51B##DzSpX$Erh3AyGqE&&nH&KCxg)c zwSeK@+ce!(cJ&$g|FwX@+PY;!pnhMr9W`cFRiSu{Y5`yg=()rvuc{4D>J0yufH92z zIHx=U^e&%4J+>aI4DTO*fa2vg$sE5Q@wv9b!%bwk z|1N0IGWuHGwIuto+t}S-^vF{#C zaV1fPsV%V6W2T9p!>Av%S(k`AAs43r_9_NouQJ#TP)6tfoawHMra*elpEeqH0PNM! z*&A?aNDh>Sc1za)do>u9fEdjN3vwnzpNn6;{uEp>icwIA{}DOIC1htH_RdPa9lulj z2{HR$Ysk|>f8mo=9YR(R^1oWcF@z4%bZ}`1Qz%~l-_mdgy6#Ss5-1HVIkKF z?KV!A>hN>Os~*&#ejCt(D*ZVY9xc8@0ma|*j9OD#rwpIDnONlk5WB7$B4gT=yRRi- z8aWI*42~?TxQK{2EbT{CWuhDGYh;C_13n6=RRrOFOw;Ix$HByj1Ww_6joIIb4&)`f z`o$%68Fn}2yF7qQp?mOKyI85R)%gq__EWjnEet>*_e zh%Kq~AwUnR`jWUI<8Dn2`a8+z=RL*!+%C6^JQ`ETPMaQT^r}tpBaj z5Ru2gg<%gTEIDt9hDpN)&q^~26uP*fcvg5whJeB z7ia#v0GfOb2IK^PA+2nks1=#yNtz~jvncU_%|Ol$y(Ige#d$&?We@;g9q+udSAAqE zkiH*;{Jiag^85Pvh5+`;GAr2Encxk;UY(qrc|uVfyTQc70-1|LU=Op|tcnC+uf|aU z_NvD9#ZEWzmA$$G*eiuYRaSt#vc`R7uPPjizM>O2;tZ|-DN<~)4#+JYm!~yYm^voO z0fjDf{$ka0KECpiKspFAZdo9gB?!X#euTgVuvZY35Kg*v*1wJyj!TQuMrFw_wxr&7 zAu%}|b^A|Tt+8D%ZTNMabYgXNH0(e|p(J_BaBl*1Y`SfoD)as|M-FsZ?s>F*Xihwk zxq4X@XCahB8xj6^(;MPBOrafiHIj^if>o<5n-C|?(mo^h?ETn#03OSMx^zd0s`xmy zCDMuctU-@qKtKZxgf5SH#CDf$UHEKQi@$X$Cf8_wak#BjWPgQ4_fOAW+#(x7>Bx98 z`2-WdnQMTAZvS)JN8Z^i3pgNit-}>#BD)AeE5WDPiooW58H1cp;?GkFb_L~zve}a( z0vJ1+{rq3{&g4h{V<#13Jn``iFflbHDuk`#fy=ZbdPCKs@$L%0phKkuFoYtX3_ZzI zuR0U={E2-8&jC5c*7uY)_c*y*fs#TZ30Kr<3 zSxEa=9dUKW&ATtF`;LSl{NCjIPKGTsSUzf%yva?mM79qRrNBE<3W{g!~hrqeZoC{T}hX$%_*s zixV=xDo^}Ycqg_iD7I)qnW{?*sy*Hi#O%Nfnp}ksg=z;Av+9*KDK@cR_}GU1>3Xk z9W(=0(>0RjnNko4o;CIi)*yrz-i07Q%jKZi2*2LJn(C?|4kqrT&1vMC*Q|?BE248Q zB&)Sl>4oI>1L%un);YZA^P6e**@>#d54TKfVpydV%c3c5a+({Yqxx%|SL``&na0k9u zR^@1cl|Q2yjBiy7ADViq*o=&o!>rUVYC&;c!d+g{ut_$l>a3$(7Sn0y6j{HX<5?s z<<%G35BNf%z`jt+$rV=p8bchPfX>B38}iFchg;_qF29`t(sy!%A1N!%3bFgJJZ2`L z%7dk>Q*tp6jzK2Hmd490MX|$SG2}g<{L5bn+p4(lv7w6CVXJDp6?`bD72YZIe%@k& z`Z?Q;yUt>V0A2H|LARKrhb`Wc)^7EeNxS6_&14vsBKIz@oV~e8m-UM$*g$t&W5H!O zeo-x`CdVQ`Wd#PX(w6aavzv?vocs-8!v+7mXEHKIs{R;sGwHiU(nagNX$lCIzVXbo$vU16GFyOtp~}-+ z#HR+8uE7G)_aPmpgkVbA;@(AAIr;e zOf1`UPvHQ9m+Qaa#rJ=%U9Fh^(J<_0sYGIdq|`R01gO-s2i=e7u?1~QyfF~vE5$<~ z#b35ZTaG?oLEd8oB#X$ufsj|L-5yr?#rM&JA`zkn2Jhk0)cg7(Z7IxVE`5hX8NPLv z(-XCXB6ib*V{*+%B{rvOhqwrnl4}6^32hH_&(^;8i~K|T_TjkaD(Nl`{vV(-Wk^7y_D)&&es{}n>} z6BZ#IqVb>LmB6;e{Jk_>J8j{Xp4N5PMrE%Z_gCS+;FY@c8oZ>V%s~~-UiJ1;aIf=0 z@dF9&^%)=kd?0xJLXd^DWN?t(`b&K!3*cH%WkJQnR)!Z$3!k0W#z0z9W zVg3cLs{4U%<^MwO`~dW>{h^M}0L_>ZBva7lUd-`xfY!zrPsKd-5x7I(cpk9SJ(EUa%4nl47pg zSle4~lg+V^XSqy86LU4NWT(?He5tN5;KLYBFZC5gB%`;JWy-%DWs0ecHOY^W|P1$s3kFXY;XizlA1vgCvbr(Ln z;y%i;LJ`~WULP;tI}Tn04W&;bYUg|gOh#+w%^2-|=#5y~`s#1P&?g9hVKdQFgE#pL zRbRtyqulOAK98`s!^xgyyRI2n%HBsjb3Z`@ma-?}K-Jb*ftQ1`fRsH!BSAIf^tY0d z(FI$P1m4U|vC4m~>=S!@+jL+nJ1p4BPE!d#i<$aBq?s)YBY#qZQTWs0Ysj>s>)Rec z%1-$4m-9a<``>E8S1CJzS#USh&NVe_nh+T@73kj8qm6bhSvpTh*&2vdF*`&;Z>VpUO+64mxii1-Cq`e!6RYlpMj?T7e`z4|s_+94?udxahW1f#gU<>lj)X0yCyD!F` z9o5oA^2HB`RcE3v4L3lndVi=&TbX!n`M+Y7o&!$A`e~79i?tsasASxg@p1moBezk| zSSRa~YfH`4kC^RW4c^ePirtO+ATVmhqL)vx2gB}K(I8L_}Num;!$s?N{( z-{$VOi_Y5mFJ(7ADP`TProx|d$gT3TY_I~`KyW>nw3@XJn{bco+|0A@8@WF7U!)xw z0~8BN0^lovY1jY9w0nO!e{4=4L+FeWU8eI40g!g=AbsT^()&AYFnr}wgo~1}?A)U^ z=h>cUsnj~%y+7JkHI;kC(pPt)GI`*=yz2A#fMnAaFJj9j-pIu7;tZG@Q;$B+0^2|p zb<{kkwFW~&WZ2ql=)BMo>Q4|mpkidP1`Egp4T$BEpb&{2H;70fa z1;J$lGF{z&>%idR#ldj7Q!snAAHH|Tw!ZYxyynvM5NpoYmiY3h|3FmC@R+4|Zb1Eh z;xx15Xk3zya>oP4>;`IL3)pRE{8iX6 zs3UU$^*cvgy~u`La$C2sBCPp`&bVzT)`Uv2<5T@SgV@eyeCADp2Yu~>1wYO%+FNFo zlAGPJ15*QwAw@@0>3SyC-}X$fMeu3uTV7U!#IoS~TR^8j(4#Jc_~sz~K)R;N219So z1mg4~t`cL=eE^yEAMm60w*=3elHnUnI4)R0FP{+CNMx;~&V`2cQ(;(_%)0xdLbSkd z9{YF9GAR*6Jg{PvT-!#~iR1<{Q6v_J>*pPRckNHJR4u0aW4>Q8DpmX*wL(Zae+j8F z^h0Yg{2mBTHanfy&o8yYZ(VDQ{B0JxEazp-7j;s~;b@lrzlpW@y+WK_72w{Gp_L^b z0#iFqJZFM>#&64gKX zqv@5@3GxE$erXzq?R!LnB^Z29BjhSJqfd{cqy@N~G%QCpm{=ogp z-ZCS{*LIkyR_Ut+6In`z1)_kR4P=Al*E)s$sK?ti4kjQ)ecQUNv~SgcZgK(i2%O9rr5?Es54uqlUw@ofjx z;=MkRNvs`_P(ElfuePoYq>aM`)UZ1~{4ck_`;&bAS|Ov=a&cXOYn;7rFW) z&eoi)eXhZ+caZejhF*b`h3xPm|VTCDb7 z)o8XR18d?jFT6VPvWITTMIAg8^mYwQO{zTv8bevvoY;Kw2S&1#24jp%=6A!YTs)Y# z$(w7?7<$WM_gc2cBG-UlA@2QaV>n?<{hRTI3}_5>v))}G(*y2~YN0u1%crGeb$X#x z*+vu=cEGRTisX5xy0~#>I7SALBnuK_hL8@}R)2oy}^U z7Y)0+|M(TGXu#=-bqvt~>{n3zm!AHkU0(r9Ks$|1R>i4c()Ca*6l-@xXGrJW00}6U zc(xauzpP^Y7%ELUPD(D?u2ZfBsR@}GnPsj8y1abqe zc31HiUo${TygIz*(%nQT;!a)1zu%3KmFs3Po{@|L(v#Ftm1EOkbW{d5ke)Cou-(&paLaiC62eDyRYY>ln8-VsF+j;+t6+6dP_PG1(Q|ez-kBi=`d5bhMs)a=h{sR z+C)7H#tYQyHQg*z@4RSb$fJ(K<^dh|jaOr3SPw6f?}{MaRFH8(R@_+*c9$dJLXX+x zH-FdH4n9(fz!VxhCEj4^aRi!86_zF9c8JC2B%mjpyEByGQ+cpj=h9)e5lMygYFjO9 z``g)TmTkp`>y2~tYF*c7TdOE$7f&8nb)oSE9J-l5jzw?~*Dw&Cw$7Bd zA_@wfi_bXwYi#rQf~?ll*9T(R)>jklrGzu1%M4mu(X2if+0 zu(H#Z3a}1%3kNoU{=h9Y7%c`IR#jpMCJ6L#??e1PM5rr$)OwiYiVFNbUO%J)kBG)>8)_wLuCH7v@M;8TxIXHK@y#nYtaFm7`i< zif>?_vu56o)9uFA*eT|$`~#n(d7Y2^m(0!Y)nGp|d?(E&Sq?pvm><%8+<^v316b~% zg@Y)_s%Kr!s)Mfd(i%uV2N$n@9)^4Lg{$_Ci>WiT>c8c-9Un@U2W3ny5i73Vci+S*I;&WVs?8;QKgL_ zraBop)uolD-5P}K^oMaljB@MX%X=a!IhGftD{>LCL9X*f_{_fAs_G}B}Z!`__Xnq^Hv1SM&DH`~sLFcjk%yEW&+K`0S{THleH6dE_r^pr@7_IN*ItKtuN} z@yZs~G@!srx7;!RE4rQ&4$S&ZigAl|2wBNEb?Wo6#WNBcsN2hUFOYu?m<-Bj?1bmG zEacH~kf+EDB~}4dp_m@&^10naZU%7xMgCnNk$sivu@cRk z;Y#6Q6|)0NtRiHyz0PcWW;{iXdD~Bb&EUApmFNpkU>|ctE;8)by5aq1mIsFwUpd*r z>^P~@U*8;10)N=p)Gy0e?}>SvlM}PoTmm_5miQqvq_mZP0Q!ct=uK8T1 zqxYP~fDz;Z9J}4MT5n@74pR+1528#x(d+JYqNd&(T0+{ zeSUQtVyf-UNvc1Vx>VB|IMV1abS*nxDtwcyGWV+gSv~f0;@ruJXbb zl9h{Md00KNEB+G1iB9jn;!mc!3u##uybk?ybmIjCX-T+-^b;5i(oet4&`;ZzvCs0r z=n|tq==rUTi=@|ta5_4(jp~YLOS{sCXkX|Rr#IruG`@~}ZC1=}F< zg9cE-ki;OOl(Xi&_B(3IKLo8~YJofA2#}K0{w$-RP9y2qvl`1(ARFI=y3Z~7d&x4I zm%b8&fAm$z$~}TMjN3UZ3|tg1&fSaWfjqTC4b*|Ap*}yUu?89UXLyax=%2)JP>@H+ zX;J^|Frh%s3oT;ggE#}_Ln_|V?@wAO;ewD#mx=H@RMpUzeWU8=Ys*ZpVa&>1&sV)Br=) z9$4m7dD^@ryZ8Q9%_lC*2?uxI2bwe$zcxbfWbi*e+ME=OfJcE$LiXFk)~{?kI?x7k z_in;%5hIC*m$&~gXQU|x{hi+gMu83L5?Z?j(MYpe1Ydsq8RdPy1|9{5gX_R)b-=OS zL37}%bZJ)m=t7djMIfGittBw3*h;>ek(-c@=fI3iwUY}?UfUde^(lHT;3uE;ClZ=0 z^yMjtTL;w%Jml_#M(pOmX`1hjf0_{tX zhP~uRn`fpM3 zIYq(r714jK>i-MA`i~V<9fWNwTU;4qeKL>D|L+=A`g)DBus-{rA`m*LkU`Il+Pco3 z?pL@JR6m7_OQ1-W`_=u>Qaa&Wx^);0^tA1Kaq#B^X=*)`(c3ia3VXr~G5Iih#q;j= zKJYs>=fFEQBsYEFK=l;^ydJ#Y=mrAS&6hD|1MqqX!SC1rfl5Ni%9M0V>OX z@QjV5iFz0isQenO4X6az!Oz$*DB!h1(*1rYbJ}KpM@bADLst!eS6kJhU!g8xndAglwjWG)-ZP1VTXh*b$}AClf(;Zh^&x3d)NUK48v*)G27wfA!eSg zOkMnlX?=60qUKX8u)KWOZrCx*sB~f0n>d!6P_F-8u^Hd6S6LYA?oSS3#sG}!Xa**a zpKX_2lF>$w&ExoZP?N(ZQHZhI+Yw>F+*Rqjw;U;oI=p2L@@&nIAI~32Mrj{l;{8BC z3s`I=J>xCHjt%CnvJ8TM(m0e?DQzLW8W>7xN{6iJ`;z5m>rF+~M9q_>uqDb`6aAu~ ze{$ard@IPw_x&(Px$^tQ5&VkH&>r9fbxIji241lVj@9=M*8I|s;mrU(Mx}#du|FXx zqZKru#dyf3fAb8>z=}@d03C9}R^W8;u`3<|$EY&4QnrZcgL%KmIv8NVEnhW!fXGc4 z(bKT|7kW?;vhv1k3;F7|W(;N@a-1ZkqXlS(r1T|YLngo<=kvfMpRfM{Py|wcqS=M} z5kr8Nw+blLV?N787vm&1G=N{SIfy{c56ccNRQG5yv#Sk>NBj)p%x?me>ScsMgjy^< zB18FnV`1Cq^j`@XZ0+-fOB3qXfj|Y|?BGD9aQV^gzk$k>8R;cOomS>wpkkG#J)Xco zr>3Q^8Y*P}Z=gZ~2P(Nslx55{XHb|LShTJu50$VECRBEv|A3$z zZap{BKE=kWSaWF@-DP}1^-F?KxAi1BKnib#Oy223{eC-oD7@6s;FfZLAQdQ)e9Z*e zS7p?GD@FHQG~QOX;M+FQ?|bIp3qcpUZCDL;6C%#9f>f8lvM|PA1OXI*nLrVUj-bK@ zhOfXyAjHQedLvwi1)vB7te~U8npIjr5lFUEM=+pqaX?X~5!3yc1$A5weO(>zT=bHR z1fwG}4mtHiOx*OOZN06#bwgdgf!sFh3ZS#%jggT7vQHD5#~fTQB(`F_v|lPT&-AnH5|REA z90(v1_Y#9w8-rO!YI@Ahj>$feK*6Tjzq{~xH-|<9i}oD&YGLqHqWpi*SKl|*?t`gw88P1y;Y)hdyf@?No~hGg2D?i8c4YKbgGM}K^!Pp#4l=*b3F8PgMl=uP z)-Bs&KB9Yot`zG2spTCIlmf5Y65@wmUEBQtuWS2(ts1Kx#NVxO_ncW>JMAKXxM2`r}g@>XPTtU1J?|wLwqQk*X&22vkHDGk{zX z{H#RFVWau2)?^1P2xFzp^rZA#k={lGWV^|ER_x#mQG4h^dCWy%Xbrg@r-~qbzxE(8 zzU{6!oHFMF-NkAufRQVygq!!np%hy%o4jD;3hVPW$_id}*9|#LFhyzy*1mX?*38Gl zSLEsgfLt9<{vYH@m<~frq8ot~c*BOXnIZtetxRKBtNyu3v6iIpcn6GJnTJA*P~!~- z%mc_(kLqJEJw9Hlq$VNds180zqFVZnLgvNdF)2*PTJ=}{SUMxF>H;s|UKfN#DVLqc zM%Xdn1V3UE)#W4v+^mYfhMqe4ep=b=`SNy7yG=~=RSkMjU~TkJBYQ9R+rqMdQo2%1 z7M3NCPtGefgD_+M8Gbh$tOkX_%AUe36o^LQE)?|W`5hx+sT2Vk`2avy2#K|ISx9bv z%CFEB_z@d&uU5>F8QQxe^Nfs~CQf{3zK<3FbY%t@u0DXFD^^j1^-oL?g6llP&j-=X zhR9v4Y3opUBV2C=x6KxP{$(u0zs>_qa_h!;(qQ|#_uWH50~_nUVE0$&^GY@HTwGnv zPoM!Is85=6n0UPSZ&VTNG(o)kxyuRUJmrIaoX_r=&&npF6cTOYC^?_;k z`MpVHCa`@y*kTWn<>!y+K2NFID+I<@=xPR*V`4K5W!A)8kC5So^xul`cl)XOFt8$I z*nGS}TW))VH!H#c#a|lzlvkoGR^3y+8n?Fo1sQ*$H=f9K{4Wz7ho|cb`;FNXGl*Xz7&DZoS3BFsEKk)S_0e346 z@ZAb8&1oA3F6~?%8Vf{O1_E9P)ce0v?Qn?ypHw@I{{Ne52erqgdrC_aUig^!bI&!} z5TEPev%%f8efl^J%$AaTWboq;x;yjBo?e3$!Br>?=7kUiWCL)idg$pk0x4u%9Ra7R zUr~kJIg|JS?yQuQu1t8>8RTTZ3xS?%o=cv}{o{xUbl?{P-)s%AZvFj03U`$NuP0@0 zYehj%g+(lvW}1XI1NEKdQ4ptq9YbixT@Tqulea$_VA|)((*UMj{w-zMocovGxTerc z4M*hw(r!vbPp2Mxh>DW6uD{0v zo7s)9R&{mXa;YyZPuR%qyw6Wj)FMCcpc}1ymjyW!&@wEQc^?IKe%Gy>J1)71P^%-K zxSq8;+VQ`}d3~*h757!kuzFJwd^D)}ROrRihJf)%Yv`dKpv6SC#fp$PT@$Jl*P6wTo$z6C`XSE-jv@TYf;*J=qCCM9H?l%GS3{%V4V2aFknp4J`)&nCbtH68 zhX&KqPZ%<&BCSK<@@qf;hiw0Lh-WJ^lF!X=fA>2#ZNp4H9Jly)vO?*dF)$YNDel)m z99g^{1I|@pTM~A`dkNYyPTHf&nU|cq3aq2rL(x>0fGTtaP=yi>Z)q}up2Y~R<)oJo zPK{!dabJ*TZQ#W@{VQS0*z>7BKT50+tIN=gp~{^0VGZucshT}5piU$Y!#puynujA_ z9QCpD3fu5HIYEl1K|tUmqUxqfoF0sN7-71;-HrKvCmKZ&9u52C;?+SpXE2&4q%~yc z0TFS@l3js$xq)61M1E!r>f&!O`9ohfUKSKFQ~7rOZ>Fbs;NA46&^I|pjJ9)<>^@2+ z=QHP0aU02&r>&zn*tVvmgNtxa6{h0AE6jj$oe8Ih>6_F?PT8L|xM5?lIc8-|;c=_h zo$?snxAuE(LujHZxqZ6)4Re>g9+1$1DS~?+tgVMMQtfmeM^E7VK_t*sT&?pWOG)=| zsukfxY9|@m{*7%8Cv2ZFEdsIZE_L!E3=MtHKIS~S3;S5t!QbY7gkM{@f?$@s825<_ zFjb4&TxKm)eEyTp3OvKyy1O)anX`^ifKsQ8YD2ghd+ZDimvZhe)I?1lJIxL&AG_H8|tpbp;PePOOx^NW`+~nRBeqdf7d*> zTD_od=YMWE-+dDb*eYrRXVxS~h5HRWSNx+|BGZ`=e9$@EkGmq%#p>6P`!&%>Ff^V% zfYhQEU}+r=d6#>4+vuPtZv7gp+)+nYM06xZRqB3!z}IeE$=uU6vAh-6U9btNYQv2p zq|WF&`;M&bmkZA{BDpt*wO9JS23GzVZ+x3GzTSgd2IZc1Eg@ZdXavB%TgA3JKb>n1 zg(74~p1*$w)TDqxlu%$W(Yf%FV2Pm!1S!61pC1=y5Eiwvk5ZK(hcigGEim*q1s)Sp zhTNZKSh8v)3LCs74BxL$Xw)bJu2}Ljz!i)3kN-INp+gY2h`)7N>3YO6#rCi5gb}$X z=QPtL#f7=gC4uT8sF*48)Zdm6X1W8z4I0T&BEqesaH?4!v$ZeD9*W8V zM4}z)vJ>rhTbN~3TFEJK7mw9&BsBG6o(YG{wOou9Pw0$p3p(CRKd5*tsS5couDH}m zip}hQsuF(wMfnEU6mqBqCV@BzNR2t@bzpT~JqlZ7qfu6V~{>u@gxeOnd8ZM5^zzTNS3@M6P-;<1u#-|!FnyoI}wXZ=S zHOvOc>awhjv-sa4ZJEyKpE`T$Wa2((o2b#?xuY}xGKAL*ECg{yO3hsAr@_T8mYUQ))7Z0ae4!w};CfnW*!0NnR%KsPKVZI9AU{A(gDk+BB0mZ}gs% zWCzwT%6FH}LNkP$k^!8Xqo&uCu-M_oMU zsxcGPG)I-?k=aJDH1GfeyXQP`=;D49ZQ{^?hJr(6T1)b`076#eYsdm|a{gnhmYkM` z2|C#(y-hwsEPc)EuS1KfPhenT{~Z@QEdOaWb~v%$Z`d?KlB{;VO+=n74}D7Z;PX{3 z0q`dCk3`^wn{42P8~(n({E;u`Pw*Yje|RVsxvhMarh}atlnnc9oY`WebMfbMRSkmAJ1W+5Xb*FPDbo7_pzLAnc_D$f^pE zb?YKGCG^V7($~a1k9lo$pvk|Qbkosa7x97~RcTvl9B~gHc<&^- z=M7TC;W5z&LhswBNWHFpyC_#?g`+SwLc~{6b8Z8-;Mf}#4EV=U8`50 zD$nF)Jgk}SoGPKj+vER1s{)cP!CH1OTGazatIhzliVk?;hTBx+Eq{e>;2UURST$_3 zp-m=?mSmNklqy3=>~0JU;ue^{cPKmQ{SsWteD7u^7(N!rD@Cbps znJ;~jZ%`s@CO-p^Ro=68g#8#*646-X;*X_~s+<2sSxG&zy;4>;IHR>SfdFMSu;J1} z)2Fen4Nz8~8D*nWO9fct(gKx%xEY`$90qrU3Awul$LI(J#0)k}gThgXlbdX+F*hdE zo`WHYjJVNADr^H|_}HVaW$X$>shB@yQK&2@PHiZ3!6z+b7*_i4L#}0hd4?G+u=6b_ z5J8)h2{`=c!f~2=+-Ba8G#+EmkVIw}HPvLYpeFkd%6c%p(_EDbaAK~VeeN7)B99xy zxSk~sePq>#cMQAi8O!N10NsAk)j)C;j8Wy;2Hdo8>j-+L@1_IPOYnoA z`C*S|rH)L=){vkXVGye7Lm(?3`1x*xOWxMT)QBRYc%S;qI4{@5#5uMFFS&P|PED~f z&724?p4d=2y*mX;!m^c`b`d@0c7|Q*S%}*kp{3~t)$V2fJi%tAS_E?ERuDv#B3_Uv z>|y}YuSyD=avH6&NDaaeT8kUI_hNm$9CD}zBq&_70<2OOg`~EeGA5j@ zPdex33%O0it6n3nP*uc#&p`EE=9#?{;r+j)zj`{Z3F-5a_2MwpQ9-n`GZTfbsbbl7 zo_Khp)$_uInRHM30PGU^0WPLSl;djmfnV2rCC zjc#Z>gjwC)tZH_m#W-0hKKI^0Z@`JQMp1zheq6jaCjjcT)0IK_9!J!7Spxn=z zlpqZ{w7$U4`vzqnfp#Au1ktPAQE|yktPjwMP3ZEL&tScfLeZ+R13PJtuat(IJ=<}k zmJtz|x_ZJtNRFJOcN}h=KqYb?AA7#~&GUOy+y1;M%SMOykt43{X{nf~oNiJtrV@FH#J5 zV(NGLg7NE6H}!yi$hV-x9&#zor-Q%=j@OmioxF`v=qQO}q=ky>S9 z(ow$Y54lt<#a#y;zC@E}a&#%fX`&aG5QcD~il54YEPpPY1zFc#Wx*)9P0xV8vJ_U+ zHjy}t=4wmaDd@#ZuLU}-BWwBQbCMf|YX-G}et0R|J<%}et&~{JB-Gb=jKt1fj9Qj0 zOvHOS%QROXPdHC?N$CB4aSR~I9F)ypQi22 z1ABhqS;sp|lmq^Q(>11*t5IYb(nKe$aD%_4qYR3ZY9LAEQhKaxEh|}z-j`s#9}k;O zEE`f*7BFRroJFVym|8pBRH`bUw-`Z{>2J)6uPVufL^^264Nwgre}9c&$4~-lML_C) z4b+Nb`iwEW%yCkBQ|~G7?5(2$kr5*eJG;8y=x}=hwIYkArN@KzC!kh@VNTWSY4|Zv ze?95~TIylJKzwu_Xhc{y)>3nXbfBDgy?IqZ2mv>*N$`tobn62O$!pNOZnApx3l19LE!k7U>d>^v@Ml zshm9(?Og+KtqTU6MW9){2ej@8y!ki^2}L8R`LGe}KPXl5Daq_UP&9qD-vkMML&Hf) z?b>9cu9ZZ6;~j?vNgyu0GE={9DP62^z-z>s)p7r#(f0xk0r3L-QB{OaJ6?lC{y(M% z?51%uXy6q>*3y$VoIpmCdCh20oNv=sQlardrO0%@;*|6VCKhYxynd;?h*MMNgRfu# zKgOT`2X;q?NP2xO2mI=wC^Z&2TJZlx*IPwZ9k<=uba!_nAT3?eNJt6@NOyPFqPwNL zq@<)fq`Rc0q`On%`!Ds~&+~p`@1q5$>tKy*+`l=m8R&FxKR$IxZii*5ZIF$fj3!@f zk^u*d8-I3Y=Umf!U(sQ)1@(nJpQ%s$5^r}Wwg3pbyWmL3_4av$P+3F7bE4hXk)ZzXd67!cKskP0_22X2 zmR!O%ZvEiCBaqh$lE~b)X3@fwTLLDV!Ox4wo@&(kDp4@5aIOPQV$SB^$w8Gw7H4RM zird|m_*$xHkORq^?C-$lpkluMd)OU@I3&&2`g63M30b_^6uz3qKwc~GPhLwM$ZNI3 zEp34FS^$!X@{Wg^ZaJT5OnSVAKGJPna~<&s?&Hs*z;FDQ*8-3X$ZJUgc`auKj~VwS z->XVJG;OmGk}ti|x(HPC>i9et8J|JsfSWXcFw$<~Z(eIllap2ue}Ep}?0@oFNI+hz zGW5UmS~>4G(+jN4cgT^!c`eQWa9(Rxp8ii>3v#Tl8OUo58Oyo$Z!1RqBeFW9e{n5* zaSqr&aV=WFn>9Y0WC79+!05tIBB_VD1``=pX*%~29g?Gzj3DiwycTaEIIqQfK&VAS z$24c7vj)yyTl1v#6s{2<)FD*d}N4FNRlMp3?_3kI-Wq#l+v3HI~OwVZ(CtiiaZ)A0jfyqt{vS zG(rfcKtfDn5lvCSHxGO}X%lM18le$QzZvfuZ;V2|>dDb1ZF9_wNp$6cfeS@I$tKAT$H=Uq z*Gb7Xe1(!4+R&hE&sDbf*Y6NaoA@laEC1P z{chEP>iL#}_q%jYxhga#>XI1EYQC{rwTS@u3RVrT(yadUN)V(pYKoKdS@Bke)CdN2 zvrAC@5xe3YWJf%j>T95D2n(-q>*T(0-M+f;?bV8fxyz2u8qs^Wm|3S4g#|+A$LH>D zR=1L6(8afc@Ft%=f3KODpAL_HtMYB7JDgk)w`kq~Gf02`$72Klj{!J_?wNv_t@29t`@SAT(tX)opUu&3Mn@{?q9$Lude#WP1XI7(q*XXdt5fw&maRxQJ zY23UavPuXT{Uc4UrQ~)zd<0Mgl7;ulv`D=$wBSRttw?Wvh70U@bl``e%FX%I&>nJv z4B=ODrH2Nf$deW>&3I!_;;m5ZaSQv-5P?TT9 z;Sm@>9ZW$m>uYyMP;~H>^AZsPA|nc*TL;elo3ng8XY1UI zoA(NIm8DeVMk0?oL)3BN1%haq6{Vto6gI+qNnu5{wAeJZ^9Qk){y>bY;Du|DtTz10 z57Ad;#ZBUKlNXd9=xRk-89YjME~B2ccLpdxerHp&nuaw4X-t>6RdAflbAg> zuE`V!@rL-zy|s&iaQ8~`U~yoE#$U5jV=umRwI%v^G$yd-bMS|ZN~Ohz9EGq%r~e53@z2&^uOcAN>W2aOFmcRWs>6XcQR%*wBWVq7`tHO0 zmAL~c_=AX!8z2Sm(}H9>Z&(%iHw6##LjmX#64)-2BO52yN{A!nfrTeu*|&{ z`3~@B0W$YL{wyv-c5rUX#jlU6pB5(|UDe}5zp5o*ao8@*)nV)lEUSE%gjx2LAH@9u zyyu+n0(uMVIcGcW71bNdc>ET*m`O~YHQL_bf92r{ku;lf-&DR4kZR^37V@hqygVoo zga~vBiKeS8wcSdWGnpHHSo?axpXfZ_R;zfv1YPR(%e)@XQy>%TI?%3Fb+P`?pBtb- zy7;E93*(h$91hY!o6aegLY}{vEzJ|CI`3mVr_saQIRRhA0MxbP||{xDCnO zw_U1ve}^8uV)g{b;#p{4i)6iBN)$^>5$<)dmfn)KQe3?11 z9GmmPYp;GZ^$XH5k*5E#IY=?R+BY=sXwB3Tni+(ZHImSb2;Cta2aWv^lpX?tQcQe& z!iZ`}iD_>P)UhuZi@7V~1+)rEtA>OMd3NLO0Wd!=Bn5W%;KJzYj+YwN)Fnm}AUxsx zVdCqECfo=ocAT&_5F;aCip3@Zj3Qv{DfAM6PN8JX#0oqPHkLzjZL_%~lZBPxY`$OW zX8oaBw4V$+)&gjMS!a`ID$gRrW>H$XWfGOQepP2rYCM9o-(k=lX&JbDg~J)WW>fT6 z9aMp=Ww}o}>OF@!|4LP>-$KabC`FIyX1Tbt$2EzTf^uij)<=%?-Qg`Kb+Ok(z{v+f zefVSDW#(I4yhCUB=B5G-*1>N&%}cINx?I5w`OYEvj6$ z4AE~bO-XB+5+hSqoT=aLPBMuz5t|XEvL!!!cX9x3U@^RZmz~YdxQdTV%?)Km@eshv z&Ma)tQ=GQE!y%^6c_JhkaI)0qa50FwD?v~vq|H*Cxy?;+K#D8PdT6})G5q|IdGyr_ zLM+JD1{fq9izqoYG%w1vR5|8thDwrK{A16;fFgj3C`isF%gH18K&3Vl@+1+Msf?wF z_~k(AfD(;i{wIE8jAsbm5MrcD#_6Jn!o;6S;mXg_^ktEnd?@0^8>mw5Qp%Gzg75-t zoG+P$c(~~#QdXn{`!V!-hhdJiIJh9AA6ByJ489Kf$v+4el4SKJMWR--$aJ+ZWh-T@ zzj3zL>(j<$<}2ao(`#FG2{?M# z1v4Yr=L58wOzb?;wxiV$xGytACYk*zf?ul3EA*<$b!2$85opg)3X}3%ah*V)Q`+9A zB_g-amTpO}K>R4N?7rlS>2+YC0f`XKO3Qpg1|Ay=abMuTkB!q_Ot7VI@=7p{R94@! z^+Bb;6IOwm9Lfc% zFguY6O&Fii!P<98XsoqL&>+_)JH6t}H%n58o_oBB?Xdg3Vh-T7F;-y$rP@7}2tPG2 zq%O*-`jUAQ>**@=BYYJJ{%DA|);&2H6`T77KQ<+=iE;~zP~Uq3Hgp@Cg6`{*d>|bE zY_*M>uwJi{|3001f8q7_2Pye)bIY>t$T`;NBf4%GcQ3logynO9Y3r<|sYe{vE6n`A zN~u?qVFaALI@h_@;Dk;wNel`N)XN~#;cO_eis=D0DQFSx0myP4fumfVz}X82h;rd? zZHlW55P)AB!Dp{2h&5UG*SH9kQ3259zqtgj+7;Mb%fp3HD^7mimOmGsYWN?z_SkkF zUZbHsJH)oM(~%3Lhb?~~ac2WOS_8zBzrjw2BDnR{j$O|pOqo;xcxU(&89eAMd3*?v z!#>nmu|(^HXgXIRO!bsVIv0cwiAC9Fyp&C>NRn$^$J^@aYSdwu>;?0h zWj-5k=tH}Jb&~VN?I&1HOS-B+Y|7lg_?78eOU{Y{KV~mNXRU1H(Gp=KrJuiFLW%d> zK6T75n@H3<#VlWVW+~cW3*t6`7Yl_PK0S3E^9ZB6Zf3vBWyPJLEty)bo*H%m@e2u{ zv?lYMqgPa}pjQO;#8{ESo%9~VlU8Aepfc_XFAlp9JHUdJ3b(}lV`a4;tor1vu-vJfCFZ#-8>bBlhB%Ca4Of=#jadm*vcBp2vRl72Un z%&c^Sj}>V`XtmE!NPz0xPdQVO{DCZP(k6IBhZsuLe{+G<2SxXApAU%1gFbQv4%qJz ze~s#QmGuasxU7#0{Tq`<`>04mxsTYwJmtG78U`s&??ns5Ge}E{ELV|HI zK8M_0;D$h0g9u%uC)Wr&X{KfoRb9+EippSBFTe_T1<1hk^J^SKij?=nZPA3kScy^L z00sO>ZsVsp!$$M7o!~FL437QZR9Wfr|-C zOXkpb-+SUkyvKh89sh1@*6-MXc@B0bMDl zRX(6AP2>8bD?PmbFI_2sTd`eKATV#Cf_V!Iux9<`t=TUCZ`B8tjlNUE1$YaLTjX_J z{~Ncw6fbHMllzSS;a2e6$NMIx{lB=SRUOV0mH2}37q?_}-553j+)A|f1%Yu3?wd;Z zKip!;wU+(ITP8LDZ{_|MZyg-w88+gIm}ETSD=_+DOno@!P=~iHQie=fV2$%k=8#K{2^FY`n;Cb z0X;G_+AxzxqmplnZ87jKFE6maO)epdWp##yaYZZeUz>%1t282k_8pMwa;18TllJ3< zv9|@UKR<2S3`SWE&rrm`73o-|!bLy26G;HxtY=5Sn?;RFla7V=&RKtT3iRXA;qLcZ z`_1p?0DHjqo2u(thTGTU|~Xx2vG=Gfe>@S-x-s}LUgQg`DgPxb`)>2;`8@E z9(l{H4Eyp$MqLlL%R&|k89h~o$-5X$9=Oj(- zSkbZ-J^V&Ix%Z>hg>IJ-&#LjEw8==7bt(EI?%bdMvIW zkzfcJ6G3XWui|mXzmuTj)su{1yTC47Y_M@a{Htf)^-^BSQeHX|N3#qSG5W z)I|0{F$jyeTQK7e#3?}}n zV{mse4;G^V`@H(^SEPjs$u>QK%zQ<*fu=i=qRXdc8^6qKJG|^M)0SpU&Uw+AT&Vtv zc9Al9vZq0KYSgm0ot+$C%f=_ zI{TO!-ZtfBCMgA;aaLZduPxg#xX~hWPtTGt+=`=2@LfY&5dCI6rFfb^;VIKr_F@d_ zziZnk0BOTq!fs2&5n;d1odszXye}wClx85elAB}viZ2$8oM$W;;`hd}(a>Pp`**%Z z1Snv~md35n8iK1xYtLHB#~}f!S$I;IOJ#blre&VQqDe1z=ZHc&9b7;BD)RMKS+JPn@kexZ|bTv5h6HJ8$N^w<9WpLRkmn zR^Lwr=`SKiz^2mFHT0rJMn}?1KvW7kkONGm>Kz6DOr<58J?G0D&{8pPHUk`i`XO(q zMHOMnR;X|rzqg5i4#Tk}0-P5Nw*s1C5WFyxqurO{R0QbDNw~RFJ#^%UKWTYgN+2yy zD_h8y&{0kSq~&qs+L@iNC?n;)$xn3eKYJD3Iy{g_OvjQ=tWNWRtZ!USVx7hn6kjo^ zt=^_Bqq>qn=5e+*IQxsOK6THjWpWe zu>2DsfA^vQC0Whd0rq!m!2V8k=bZBck+xcx&hL|&-^uNMg>$Y7V1G9zkv=gyEji{E zfN$Wkj1~IIW!0)~_oKEVX0xtMy=vPP9sQ*lX6d9rkw`7>xQrCAX0;16ESrslf1J&f zgv+&^aM7MQqpit;oZNWxzDT5biliiY43dww%LfV6#H38qB?YUUoO)?sU#cKM1b$dN zc>XJYH}OaQljeG>oatFO!s|D(cdfTky!hC*YtQ2qW0bkN06t_T^<(;u<9d&a6&%(P z7k@nwZ=pJNr7=y30Ys%4sq)bcGe`ktns*wfXzJZVA2q;fuK9BW^gaEBl6_6J^f~1&JYClouHvZ! zW&p65o?d+58d~cW8b1(DE%y5bIJth^SsEJ-Tiw+N!Bmh#+$&1MdEED7TKM+3aLZf! zlXXv_8v@auQ;u7!&9rtIbk>%<9kcXK{p-|ac~3!+5vfaN{LCTeO3|(wlkV>+yec(B z#=75{INqas4l&9g7CfWHF}&3iSJJ~tVp7d9hk>`&yvI{{-mP!9tl7}{*0l$+nn9)O z<89kAO*6m0*zbM}Oy}RJ@f}_h9~1ha?Xuxj(j1!3DD95Ub;;312C{@&^kr50!0#*~ z|L84Wvgl&vitN|^Sl(U#Td8)YY;naFme)2o2)AVMP<`WwMXA=Pu>o?L_J{Q?ks3@Z z+-I(q(Y*VSfb^Rb(Xk7W2NgpKwGnQ*A-#Z7jVdb?CuE0&A*Q{g}H=vOQ# zoM*ci3rf)kt%P)zXF)>lYFy<}o9q6=F4o0~4=@(h&4Mibf+E`QXq~SmzEkkiHuBw6 z35M;l1^i@$q;UKsRR9cPq7h1tNa86LMQ497R36(!4DUPT%`4%64RL#3jl=9g^^gQk zTns77k9w@XffHBC=LTPu3smJ~2l^-zdQ1I|JbE}vln~l+3{ZvC)_nvOXT|`9*hOrr z9^5nFDHQ~JN}*pf|MQe`U64^yITBg{7cRF)m(5id9NW}&R3ys3S(7ebxh!k}IFxG$=E1c*v~#iWuJq?6wd zcOnDL#E`2N1zlvTdl@mGSdjp4>B5A6|n5# zvn%V1d0In#9?O>!ZTOf@s-E(|I^KKx9wnAoQw0Q8qg5%r7*#aACcR6a?u` zl+&WMX|PU2@qfMWoEw;!P206QL7y2hVl`?_ma~ee4DnzFu@NL_qDIcWKc5dd6TvyN zB3^{`19zbqsMn|@vR$TVkx{>GuF z1gIxAZh;q~XK~OFjEu0frB{EDfhx(`k5iyi(kS0CuqWVC;Y5`I^O<8k`foku;WmvO zorY1f5rE59G8{bRL@(x0FT=*)exkw|m&wrVWMPyjo*4MDg^%?-#97HJ6KZuZm`e3GlXWR(Uvc-!%NAEdi(gJCTeIg4DcAfE4L?Ds`4U6%K}`9L zR6D3hKj{}fd~M;l{zq9KN@(_nPv5Z)1M!D65xEc)H??XX&^o{TRQpt&P`Mf(L2l?g zY=I#=!>i#APbkRtv*a}5R|NrKsNIBDzv87mckxdI4x{(5bq|VifE7L-i(%WyVhC)7 zF9EFZQFIteQnq+ZfEAuqp^$!WB^r>zgZ^0I6^c1A-hr*~&!u21{A|UJbk7qMml^cm zb5>nC4gOGb+hRc!lGZ}lcbhLvm;!(m{=01gV1;KRz&??qSJFNW4z35R@YWnn4LEOs zW?~oE3QyN^nQ%r-9?`abK@o!U8V}|B&~<<|oj25W>2H+^=;|cU>7Hs#YK*TSYfp$Y``^3 zKd^hdr0`{J4G1bD$(*S$D8C+yzcQf(R1ZZO`!=Nox0=>iF!r~@IX~IkV`p~Rl6{Zt zACkxQYgv&&+DD+HE%VVg+detXEBc}A7!k`(g~-6G;odN~m>6{^bl6_!7{8BZ{7j*` zQw4*R;d%Ouoprm*e|%zDyyE#v!OLg{+8eOK%Of=*N7j(zyHaM*eSGH(r_wn|yB3m- z4pWS0k(y*FGBde@eV!~48Nj@p_rkbrfL`dx{2(vb3Cf5ah{_JmIZ5Ephq>8Bu>XoS z83@^)41P4MjLL)_dOA3D6KrhV)bPrJUb9pb$evw06i{!RwfueTyL@{^V*wvrVGUI=r{hg!i%hQ%wN2xjvI7E5(yol&w|+Q=g8IKhTckqxMf(c5sQZ) z265SI8jSz${=$!G@xv8e)dFthFfh0eYPY_;FSv$z5UMltK*-qYf;gTR3~q{5?B(9KRnkwebb8CL zCrdKhV&2_>9}V-&)U33mAgyj{Od5$RcYoC>d1r=lPwp?G&bvM|=D1J~XIZ%sub>g= z`uq%SK%s7Pt>VtFx0PuA7_+?B%&SuQ30`$K+3lgIm%Z<%yw10Bxz?xC#=|QMPSYp;*aqKYt*|_f!Ob%x1$5-L2did0tNByca#s*&;^6eO zp?uu8s^0GRNk?m7<013}(g$U~2@@2pyL2KrW}u=d`m}}|=*zxPy2(01_so3s^ZU4@ zTSsu#r-f#}h{oz#UUX{%I;E3a5R?G$h?f={ID^eV2##))73JABS2SR4Lm?sqkriNtbu0+=qmM?0#>E)FI^b{a21sd zhAV``R|9<58&kA_AjesI*q>6c1{j<=;U<>Yk5Au5HU837=3qo#aL{QAKv%+?B-$OV zKs*bhq31<@7hDwdZZ(7u3wd(kJ>NaXi#8ZuSM9Eew3>K`iMDq*HcSoA0KQC zrg+@v=;J1!n9BMLPy8jztWqKv!4CLez%>i>R|4-+JXsn1T3ijH@BsDyNZ=n@gviVP zBZ2opsQsLZYhnsW;IT~iS!AnOYqIhekimV#sMp_wG~Y11umI;QtZCLr10IGml8HGS zH}LDB{4@49rGl3*ppV$2k(rlE2CgHfeGO;~q!BF@`o>@L#aIgSRa!9n>r>^C$iy&< z!Oktt7J4#f-~VANZEQ$F8Ob%(I$#NU7MDg2{CId!rTqUN-@$Iw$OU88Z=%X*2>`P^ z|J}6yx!{4=JTDO-)mjr}#7Wg*GCnk46gw8p@0Dbe!REBMwFe##EB-jO#NnM972lw< zp?{05QLGXExo%pAGUVzyKH0T(Sv8TDAR<*4Er3#<)jq9lg#rgYdI7>5Oa z!|W|ddFC3=(GGETc`7Mn{_U)=A|1(cT^x+sHrv`VOly@W-Ubj{(N`VHH$UXt8Ho^w zP0Jw7`hPpk4w|R+@uD$|tn+G36f0N1<25CWL$c^Mn49(e^6A(;V!3=eZ=F_tzyYxh2ir-j!94iJ=G&6( zr7mAEJC$)jzT7tuPeo|PJNzfZ^#j02nh@O!tH=u&Nu|7NY0_hL3-rY{mpQ;jQc814 zvS@7eGb7bAj$Xf0CLL)=;ovHUym@iJNZJ+$!4u}X06YA4e~C-{UWxT>a+raRxF*b4 zwNd6Q4`ryATsbCe;rg!O{>3YN=6gbHVfO5fz(t&#b&&FW(Ea(fyd3?SRNM5b{kgA`c3rOjysa4e#r~h7|M5Cz9@BmHWgCk zZ_}8#2@Ifv==iGA2H6w*z%i^h(8Vev$Vhj4x9%X! zO{>vOAHkDnv6Ik>aly%*igZKw)Zdg2wr^A0c<(GM2Uq3*D zK3BxWCr)(gzjpkPfC)E7sg!h@CZuI-&q?4uC0 z`2;F6hH%~%5X7Y}DgZ?AF5bMJUS<9XJrV)~C^UryS^k!%=Jm3w`-j&?P3^h31W%JE;QDf@)|3H4BU>NM)rJ& zh$S&oy(|<@N>&GkoDvWPTseIr&2mAtRj<9Yvw(OOWpG``gtlE=0SZRMOk)t@)&<;T z5$ z&S3-f{a{0ds@QWB>8GHN*2GudBJ1@CeN2+jG(bA5cP-yJ?VF^);LDQhBs@s0Df#%7 zV5gqKE0`QQFlhB7TurcdRd9;{d+^jKB^VS<)3R{VC%b3WyL`{tCL~w=c^e8|+3dUo z8Xb-!kW9NM_*NaBAh^?aFenX^ZWXa6%NJZu~0V2;Uk1m z7L<|Ub)LjttqIo$V)!LS=EqB#f+j= zFNhcz(uofTv|>K=>8T4nlOFx-9yf972+R^#Xby^S169{vz`Iss1F!`p5G)G6z~I|K zb9CiJIs<>D3n~xQF9zQF;7Ou(sHydCyA{@s>+G3Ddf*L;?Ew47G71&`WP^X!Q_QlR z_;NQX$)$<`UDIO@v;gs+eEgD@ZOBC@Eql(#skDyXIOSYNoMua|^5HLHJY0s2^7dNZ zBOsv8##?oD#jJSU^>t~()E@iR7|odD3zb+LSq36cLJiC>p*K^2z4X;0VoT|x-&`OD zSIGLUiax>$r}FFkq9)0rsWXaJx@nmSh7J1N%b=i5w@h13`qFc~ zl0!0XK}%fSQ9uh^!n{W@6u?(yXshSjW+fJetM9Dj&Fj?3Hfp^)e^6~y-u$v#2+SU~ z?5E9#2~w@lST;tqzb3Dx#@A@$ztBhWzRNm0g(RYCTmU@4^Ajq*>qXsKt~J6E zRUg}=4x}0EvM;@VJr9@c%sUmAM!=^${7bi>CF{Co0~-?rW93O$-*T zK~vV{`j&F0Qy;bd!~N;tAZEvLV%R(G-W~F z=uZ?wtZ`m+M$qdg%W7)ihyR4M(=o zK=y(R>k9=|f<_<=bU??}=>31T4Ot-mb`lF6u=Ya%y%v_n(RRdfUNLjA_U;L3t{VZ#>sdN;87@ zXN?DsDG`HdLY z8!c0IP>lHSn}wLUOAAf{>=;eBLa}s*RF}RgXFBl8PEs=@cx7h>SlLlv`9$6hv$O%M z?9}nx`G8k;t_K~Hne4%`_ft5X)+I1np&@HE5P+m1Ja7p&F@a^ciVk$lo%;L86n6otI@5f)IEU--CXOO2m6T9 zVH&iG;_@D0Pz-9CV#yyl(Py%rIOHNvw~ZCErzNBYA>IE!XoV`>&DuM?EzhEL;q=Pa zP|ypymf>wzK@8$$+OQ_=MJvexVM)4@Kr*1$GPx4d-3Kk$akT6(@#M}Z-O*k737fzL2uthn z5$cCHeN#3G7yR>{cR4u_K5nsT9d=2(Nc3sFQf@LI?hx!I~gpIWD$8ddmiFLQ$)v&fEnW2th=g(tdRIP?|VO(rI?bjq1tGp0o7L&{XKP*74We^bopjCdZ zD-Sk_1z^^SXLjfA!4zg_T1Vf@0oM>US3p)VY(LljJQiBU22;$cV<%}2XJNcqOAZw7z1ju78r@SVG_;e39(Hpi1~>;eSB_OTD&usMcM~n zuqXxyDH3({5%C8>+o-vW__i9YW=kiFCttoqjt)>z_76f}y&T)Zd6Wc7}vzFJBHMv!BO^^`v?HwZS*q z&^bTE5R;HRHvD}qw23>GMf;hN{VQf0ZP4+T4HsP+oKoZIQe~wi)Z#eAWVcEBfS+S6 zU#`|0_(2AUlMh`cfmD#zb-)K7Ib6zxM4{3$w9*&iiYjyv6O(mdUDq|>)-u|pqH#Nj zXEA#G?l{(se7@=Z)2RsvzO!ep4uYO5o^WSSe8*|MSw1jV3tc+T`XGEP{29}A^!-~o zs;{I>xrZz30{^5gF~U1JCU5m=9;WK_9oFU5g=Ox^Q2%FW=qwv(uMm>}@N1zuIFB`# zowQHg4r*O^ZGTZw0Yw!3`Qv#Q#$@;4QHf?f_`NW>{pvAyrZLd>@?5yVKYQ^sl`M1h zAQqU6Zg-+^Bwe#?7Y|6L?bkq(X*WsUbbjn;kG5|Ytz15T=x#sOc!P*jw9_KOWeIE& zHS4mn^#CULa5r;aQ(;atG_MASy=A}zfBO@pOVBWyd27zGTW5&4nmd|sOZ&FcZ@-F* z4wWbKYU3Dtf`B}VwC5Z2a-&zr_HEnR)`nw&e2LNy|3Z1DEV8vavm#W+#x7Kb9*Q2Y zvhy(I8_Rlo=QwM3dcIAjSJ|~D)KTvDLZ%0m`J{YC%j@jV+Bb1Qf3|pTqOV0df1M06 zSMIRFzWIud&|&2I;2#F5niKm=Zu!V1v!lJYn!GQ?Pgg*6{fI3;Bwcs%+q!%0W78Vy zQQccqS8lp#J6eS421lpF7uxXqe54*2EO9C5q^Lo)uLf#nbk3isU6+FvlI%216|>^z zJ)tI<7J+2^K+=XFZ}L#gabg65D48{gWjr#G7GoivP=_m?b4X#z#s$q>2w}q@%&|(s zZ_)-khRQNRpb!TYf7+eO?ke+5q~lw~L8j*}Ldm4plix2J(-8=5rI~!ku9kI&JF?oY zKiAm#QO2lLG8-w2KjDU0Vn}pVPhx$a#qb0ghZz;WFc}`;O;Ti>U68oQhNFf+OOR

=EP&stn6AA=Z4&m&KF78I)BD#S=X|Xu{qB2zXTqJ`=k@KC*24(TjfXuX0U3WhX zKTwdWiYB5i!XZg8Ik&(cPl=g^{}3le+T(?^R`#ZEMbhAvAsOs1!qNtBd;gUm>vi-oi zVZbvv5t=s8I<%{#vW{J*U@no}a;<=A)%HV9WxYtSdbf+8Wfro0GXND|`l1U|s8TIt>H21a9Su!q`COM?~hG7ib{W#i@i+5Hq%;~O?)L~K(pmhsW}+Y zNG+QuJf}eh;MD+tS9FAltK#ecUgh(g=VZp9&lUi9<@E=zbQDAXEbS2eS=!;cv;r^f z>~#KL4qLk&;KLR}(JVI~09v2^f>y`D|GXa#;y&x;o_1kDx<&?I$;7u%PKy;)bZH3S z8PYL7*h$3D%!#9MkUS*x{a5#JLR(t7#&|?wdLjHu{MV@c_|Ng}45t{~7st;Sn@F{S zMf+XOL=6_}oQ2An#zR$`Dk)CAl#OMum(jAzhnyR>4?Xt zu&hB5fS#>Wo}IZP#l5Bfm*e3V6KiR(LPoRO-}M}`Hq7b5(y%08wlHR`T%9XOJ_2I^ zSS^hDvs%b4Nh`Xqt+xKd6IA*7f5vl2-W@;D9K|9GFu|%o%JYqCoqnvDiJ=SA!Ak>H z3-#k?QhR~bLJ%-p2)EQ7@Eu5BoefZ#@{*_KZ8A4@c^&;7iar1w5B$2qyN!V3fvq|^SP^|2j@JjTW>1$z7XC`RkUiQ(X~DBpY$kHsUY^LFzB8@^MGI#E1GoZh zl7M)#4PH||VNt#u3OMiGR57GGLhpa8g`o9df+o=)JuFH$z=I&+Fr4Z0HxU#d>MmXq zLb_Sb1Vb|XXSOgCJX|1MTd1J~B$A&?5MtIUforDjaGt9pAH8 zk?^}uzcy&Jt2FT<;KBP2ZI@vu*)Gz;VG&t3)U1Bqja!>d*9DFk@hf#HcMv)qSnLq} zl*F#|;(sFbqygFj=5rh#Z}P1k+roB%(L!FHqSQc*7mw&S3e?>R0ittr?& z5EA(GrS?PZ-q9I?&M$&RsY z!o3#qeTC%R2}&~KsN`BQ@B-W z+~)Uw>vL3BH8`kooF%}xPGo{wKi)OPGx4D)DZ+*pSH{`1C_8{tWS|x~0H>6|ICTIa zbhJrqARG+Q{aKj}pefjQHl4GCd~*g~BpR#b$Di7J{-Y{I2ch}uLD{@a@kH|v-fW}& zOv_#(9XS2G-Fs-LpaPw1JK^Rtg8;1SK;{MxfY)`3oPcp1w149|NWi#GiT?p~2(M&l{+VZ&<**7H zQh~{DPKxjBJ`E&qFXVo6YlwXa66yM*b;z{9hj&pDrq5L(bl;9dfFM9Q4oUw|VMz*U z%_q~ieND6yGs7sn5*jm6zpM-D!FQDdW!eZ0J6!^vPp40_80{5CiSRofzCE?AvltDn zHc=OaB*nAlw{t~MH8nrk)ad;q-^NEDRL&kOiLPrt8~h%;0Ix)o)N3D>Y?8IKD~#B| zipsCllUJOyr7PyGuuk6@`Nwj-eX7u|<#KJICU~N+_UxV-n$~JjjhRZM`Y@kw zL+o*F!{~fRN|v)nbQRhjWJGzY12=<+<%EB9c?VJ=!(b{g)j{xZHD$80-x=4xP>R0f z9#NsODpEFzm$IjEQ1m$DUN!JI?y0&p13Tu;xw0EJQG_5Jc=nPf`9=7Z6)3`1zA&E9 znu-P~=?NJ(&{vTAG>0OThlHREjoCIeDF?t%0eEq=G(;nl(P7Rnq#R?v^XssbsXn|3 zSO`Va6n_*qv6V+G9NcO`!C>5h4P_8wMu=6cL_qk6`q6K(gaU;ZTrFIJ=@d*(EAx;G zJ`d@Om&Vl!zw$lCn0yicZjf@ya@J=O00iBGpw%pYf+`91@sZhp;FY?)kszO)Aji*! zPP{OLr(45M2b$p66#_6OUw5-e=nL#aldf`cUTFGDa_Nlib523lWQ}dyTYI ztYVNpF~$Sjr-oP((}dqlD7{tdAdEKiolKm6dIChH?xSBaR5NMsj>R)`zDVq*?2owM z>@I>J3w8B&sI#_V-zxfuhg?;Q$1^fK(?MkSaTV&l>OM(WO>6keFHZhAt};;H znwW9m0gEg%Hp!cSh}Wuh`qfk)N-wMeBS&C$uX9Q^{si3{dB>r*avL=wVay;K1MOjE zi5f}lz<+l}!7$)~=#fLUsE{Ba+!!Drs30I9+^tzn+?`DvZ4Ip7Ti9AT{jdMn!4ew7Q22Qizy;W_8ji?6v!zgCj5$lh$7?8@!T?8xmXiG3EC z=H%!}q9yl__PvE%eSG#|CvR81_H58P0mUZ8Z7SEdeOj4FUb`9KnSa+Ad~V*J#&TMG zIa{+t{-Rbf&`JLIWM@})no*{!uuE6FokPu3qkS4n&XZBxK4k23tHE4k_06*^`EH`8 zO!qy{odzhQJmTvea*7 zOG&lZ@7p~S{x$N6`~Wt@($hdldt#b-NFXs(TDag0-^ThesZGMeIdX3Rm6d?8}A zb(&Srd;w{)j~*Y#cE{FU%-^<=XjDm7ms#hWnO;E2b62Bz6Lfcn=kj$eBTRmmoo7~0 zCb=tt4b{hH`+eYsX&oJBDm{KnQ;a!xXKbMmwMp89(J@&%SUyUQKvE~u4Sc%kpJw7N zIe)s!B%l+hp6(Z*#BE*QM+422(*~T@bp=dNEx^&%$(47vKV|-4%v7-He!UUkEwv{R zR%6`PWNl**T&FNyz{~#jxJ!X3wTE)Cf5375Yl$v&nh$bPg3djEKs$Z3OjkLxZT>+a zZa=NL>-I82NV+(E>le+l-96#AVJe~Hfg0=`M_rpB6dm8gnp~ zwUG4CY&PR2rI3v@T05gLHYM|BGz?Hd5_=ShAHs)G%|_jhm&n5DqnXj3vV+v5;VBo1 zT5lXA@Wu8eqE%}4(3y$qW4!JM5Y8Hu0u*iNF!Kv%waiBzzFWXpXliVT6;t8)NSOz< z$8|)a;`yvHtYvWWE|7wB;`qPNLw!vgnM)DOlyS8)oBu6g&iJKTx54*hf%jGLC*>wK zg2AAh?-{n+HOBKDl;SW9QST4n4%ZtCjUxMW^10t8%U~e};$+F*!CZa0CSn+#-0mjH zdgp}wVw0!-O+@6&E0p%*YDsE09PbKSO0nc2&$hdFi2RrxbjuyopgQI6{Bs4X*f8~@ zGIQ<&Frs3zPXQ@s;-Sesd$E`mYW}Z8QGZNhi+Np_ zx)N8EOP^7suU^%NdCYclhIr<5=#GcV)H;|l$4Aq0>g>25J5gSCu+VOle|`MdpE+hlYApjCdf5&aTS`&$*_7VWDSo;- zdeXxkd{PRl3J*1%1~PR&PMK-=)?3AjMvpw}08^jKvTIPzG`bW1JZZW-Bu^bHbpW9? zf*oRX+1G{qvot{rRQOXDy}|#lude{Bs@dL$a}M3zAl)gYbV;{>q;yCl(r}QHZrC6w zAt5ClD$*e-pdj4{(w&0;eLn8@-QT_5=MU$ZnRl)AuC-={z4vV0XAdtQgKdi8#NKx4 zmi2(OP&TQmtB|#Y)ThFcn=6Kr*76*AKUIrl-iuh`3+_5Cwic)MIQ9Fe_w`wmdd4z- zCoxQorf9_q}~g)zw`)@KG;1z~R5Y2}P;U}q~;`Kif=y%&>Z0I9l?)hqMRh}Q|>~ZIf(BOik z6Anv*vXIl4GHfFJchsvsksGOCx1q1!3=_A+f9|i#QRazdOGq@V+o&JdUq(~)M!h?WOW?GE7Cz->G5~lay z1j_19v$RP9WvCKJwOT@KKjmz^(K1)8N$4vskd@c@I@W@&;NgU$^Ay@;6=}P&>-jPY@}9i4}B&g+U({6q_?gq^VBngD;*EI+1Au3z@IBIZ;R8tzQprtuyRpF>CGd*m9mKL5$ zDj(}0SJFr#v*z~!Y$LxYxRXfPp3&bsUJGaTP;UPcLS@PLC2B`nQ8?h2+A3i__~@hN z3wa&~eAF81Cq0r$Oek;JGII@I;lB?P4v!JCOvnEqI!3{u9Ay*IQ@4h(lrFOAI1p(V zGW1C^igMyQ%KC7T)Y-3se#S`y@|1~aGQrbL_Myl0oeqqRa=gW_v;-PBJK(mU)sl-G z^0X4-G}Nkh^U<6yq?hl+V-$OQehf-n*!E${e5HG+@2bq~vd~5U{3D^w{U6mv47kb` zTy0aogK?tdI_}X+6%shZq|H>$-%v5^1#944`+CGGF^%Bmk|e*VCnZpM>(BGX;^6#3 zNW~JO!CfP}e^*Fcn#W;QrHpp-$@O`|JA)>n!s#oc3uexvvNqezNY6A4T;JbqS;AK< zCq?rb%$|E&Qok>~yC?LddYR$AQ`bl`6Y)5oYb+>Y=2{n49Fht8p2okXq-kcNi1mjVE?i&3;TOL) z;gZ)kC+m+}n_H=FPJir<7xU7=uMgIFf5QXdXODlszd1SB9se#R8E|pX37Q4?!O!41 zE%VpcSFOKhyy>I@ZbW|{FRk-d@^u20dwW5bD+voX*C$(*bnwgLfftqVtCL2}$0LUe ztv5FVFQoSO_I?LkpENE=1)d*lY34{?9}KL2U&#_VtAw}1ycghS2jhEtdq0-u;4>c$ zq*`044%Y9IK0W(TCtjS6DYsX0s&S1T5%gw5k?)vFUMeJy&h5CS|JXESsB;Ewx0mnu zBN;-!hfqE4v@B1s*kIiU#Y9&&lj3Tc&5{v$bV{YYbe$ubi~mPQeu?2{@ptiy---6Y zbGTGQUd|*hXgGrPckwSul`nZ$5Jf~#h#C363J4rqPU`s|j2}yJZU2Ni9RSlf?)bYn zw*upVp@Noo1nh$>EiFcVbrp2vUPKY}g2iYL94tlpz(P4p=7^C9J@kiS582!(m`jne9yuQIk8PG)-mrIJbF)ra2>S<3$?Xa#JtkaXzchv z(o<3898N*?x`$IVe5rG2kMi0R2X6=IV?=L{o@wVk_yS#*1%WUu-(SD@KJ4Mi@bpf- z4qw}CINSdFP2Cz=Zo4+2~Gq2_WH9?a2rn2rn!&IKw5&H&rTv*iLkIi$DpY#a~73-+_?Ege?d6)P`SA=1< z<27a66I3C}d(TN#+XLjZdImUHG}m;EI1(7Aes1oo5e}!ClP=2H7K2ugU3KHs~_9 zrzOzNmF)OlB)P3eU~Oy5_!-X%L+v>`2k;ujiKL3Ihj9O1tj#3HWd-*$!G=8^NCWP; z;ovoSL&Hemu0gj$+XQL;@b#u|F+4~}IOPNXhIlHUeMcJy_+7{9X3R-nwxV$h$%t2+ zMYz||gMG!?)!rKhRe*V89xNjFn3H5Sf+6Wf5QfWwWn>ms`ouRWwRA1~^x**3T4_3& zh88z(K!K52lU9ARlGONN$H2E5tmbV}UVDBYnV5DgBW$w~%l#12P_jDmW_WJ*A__Mu zLN^676OKX1OkNQBDS3JmR2i!)6$gC`cBZsF^>#KPeh4S1dYVRo#={}N_%sVYP*yg5 zQu%ePOk;IZQ6^)rrH%kf0<$lR(dPZAnBL?zT`Ov`gwU1c>PP-hgnPTK_=&%fD^@^w zKXzjoQQIZp{?&j}lNmRg$AVT($Xw63%=og}2UzwY8E<|asHYI=%|C{13zN1UNr!h! zkQD1Pke7DrJdDC9;n(1e?bb4|$~GfcUDS+o_p{()#@-C0G}Zg%3^AD5aC=4*Lzc|v zPrzsY1D+XlfyDJ)bh)X!(NEoF61rR*)IfC*$LPga{uy4i?l?|G552Df_wxQcNh5HY zd$RJHUSUv#THw^u2Yf5gbW4|h7kVi6#^P&%T>BGQkr&u++kyz~rSqE>U)j>6e=aa* zRn+P3Tp?igW1`F+VavP6-;wk%3Wz4qFeh%*rB)(pC!zQ#*W-c`Dc7@`SPjUNRrb-n+F0~}4I@-O98qaKuzuE(P{Z@*#Yiy@1x?ujd`9wp4|#%ftH zNgZ9Xp`$^}cnwA~l5N4rGi(-I#%PQ6>iaeXmWFVYZSnM3b(SZ{VhM%OXCvU%!k{X> zb)YJ+G@Hm}gvEU0`?2|5KbTX_lPbn9y;Wr!-mgkKQ%wYr{YkBIz8b}2QgcTH@if>J zYE7xoZ>atx%HPzvgsmD`t7Ny=kLXI@$WWfqsT03>34FqpDQr;-C>7c2HP&-7jh30w%|lr#H*@i5htS=w;O*vMTvoj6`pf1#2= ztGd8V{>=(;Zb_Biap$9D92vwW_Lg=XV-RaU84!ZO*U~b$1S0*E@$XyeqXvS>G!gCQoSez zd&v#M99uCb8E8h(gX>%is-(l0sN`RiW#Gl*jNUh&7~T&`e0s&7RnMM1lfaLAHL=r6 z?x-tTMzb3*<6xe`Uvs1E+{BX1H5V>JrHCZF9^z76aPCIVGV}}o2?0lZcM6}UzVoj{ zID=8L%)?gyR~^`nmgV2DS(#>4s`+Cgb;`IXO`M8|ul6I8XUT9hWFs8Ly6>ylG-mX_ z&rHyec_k0{@0pof9(p6?n)gII{w=LT^H4zF>bq z-SBx`Z!Mv{+I=F?$R#iE8f6>J;B>fyQ% zeD+voF3XxxU456EDM!k#x+j?I&+n#DC2-{w=}_buWnT-AA6K8p#~7$ZjU=!y_rZ3oWJwWQ zUHRv;3{v-MbZoRk#<8@2nvmB`z*VP8$U-OlkQq-bM-0OvCv zyk;3=V#1B~?PcF-vU`)5V}~+Q@78drBnF)pXD@FDZOp7%sK$#;XP zk=Al3VBTyk?rv}K{cY}=b-+tstK5qs7Q(huJ61)JSt3Sh4^JsH-$rWJCe+aG>P)^n zv4UzLg~`Qlzf#U>a;KCu7)#!TAVh4N17qNr%iOq&n^I+x*ND81`KXotQHB%W3Y#f* z22SON0{R|N#%4)!xoDiUa2|(8yzHg?4^qvAI|c3eJD_PJo?%LK_%d)LTU})RSL0~U z!U$|li^4Aap76C-I4rCC7JF$atFM1TCyzxTaJ6{@IdZ?oXLQ5*C8X~eIg?SAQB2-p zftp_W{&OSXD{hsX=se1Pmb6NxP?aRXNFF^Us#Um_OHM`bD>061;SL%F_-<}RqtMOl zFr+o}^L|UWq~s(=t*|2#|W)Z z6stQF%C)=LGxbIih!~X9VN)fpRpIfgId?Q~cur*|I9F-!u80&ke6BYq=LriQ)T#+L z7vl|RMCMo*-`1a)-bj5)>qV92G2H3Gs5?j(wxRG@#&%}X`|X7%JS`9K7Dj8k|sLrvf>5WdE zHEgo^YysWS$8BVU@O()>R&`$SH;2O-(Sv!hkC+rQc-x>MvhVfDyZXjt+7c-(<@ZPQ z+!OfT`Z$jzS*Xggv9uG8+{8XpzZslnryDKMXR@VY=7_(;jdn=Z+t2mi73WVn`M4DphJ!}_Qdf>KFXS1RlYV{71$^1K(m!m>t_8ZsSHT%H zCjP)0JCMmo@sK=*q)*eNsg>h&wNl{Bi6P{UUt!2B-%Vy;o0hzkC{u{3^K%xX$d0_g zFh}N9PZb!dYR%6+lMe#uTVC82-x8SN!k*4Z%T&gw3CA%6p1F9p zGgLskiL_H*o5IiUM!ioi=_x9(rMECro-ilkpL^xFpk8S9+{YFt;|W)#{AYH(bXG#L z6U^;`HXsL!RtGdy@uo~VSGM?chJFtsI@SF4SYsLb&L^3p@EE8#8VNPvf)&}WW&KJN1tV% z;;f@*#IuDmK1{_fjg}#hA<$^>IAxrL3u5S?JY-CJcty-+KE#Bd_5_XaYm8gYejIvO zt!G56kwqXT;YX=MU%_uebDLsEeXa9C$D-Gqp-=ez+xDM@+7(1O5tY)Y#}qWwtN zLYMbn6Bwz{(sBP_8nJZD>}q7SiJY#@{>5$e8tp97GkgvF1-nHu& zwN%V}BnN+5qy`0_Nih%@QyA`Wiwkw$^*HJ9SrRs9Y?bfhrix$?i6TW7iKf@yPlYW z@9_%MfS0wTzw4QNdgi)RwNN{wm)E_G%YAyhW2Jv067vX(ZsP2vXU|_I8?%yhDWf&_ek$c%=AwFh@~f{5trR$^zLPrvRYL0+bmOJqwM_J zm=5v*+ldTzO;NiWPAQBoDz;WOCa)hn1Yr{24B_;{Zd1MzAC154PSh$Ey}27U=jY|9 zbl@l4_L_Y2NJ}P-}f135NVod+&*hbeE^jVg(c%)`8)@hcjnuS#}_co_= zEt3Pn9?>h8Xo&}dBNE0~V{X+IeV2T?fhrVC9+9be^KEZg>qK)rJtmkY9!TPP@^*6~ zKjn|P9AGNQ;_FF&G}EhrV%r`$yk3*hP2NjS3a@R_{aH>BV;EZvN#e%}jsDt3TD#$F z<%wHMzH%xNHc}#pMc3D3Rk2A|zDrkrNLPMJSAI|6+(&KyLBU;agV=P=2LmKK6h6x> z{O`Vb2i*H$Ab@XIeLPSJ*-n8Q98lqL8gU$Ja$Yu)T(3l*O0GQdSFiYUx77H4p}@jhDKC7PlWI?E$>rzq#Dgc|Hqz4Z17 z5;Qv8RTI&UY+<%1of2=bD$Rv!{p%+2U!GJv*fPPefl-YGwsSDXy3;=>i*?bb$m=#* z7q(~mCLThNjr&dtTG4^eGkafD4yS}^OBr4(AiMa*!mG;8E{)*bso2pdf|*g@kIs&P zOQ@;g{-#k~Vaoe})!)je#vp>#-_GaoyUqJ%N(;e!g0^x)yM@9A!H{odckL9lzw>_> zU_ciTWAv~4t}vZ~U;d*LYiGRq$bq2cEQ+q>O7wo{nk^1#-8t{1b?{gEj3E_mS9ld) zJ>!nu5Y-lv+B|YqtCx8Rje{V%TRrIKwXa zg~_GIPbYAai-g&2QxqpYv^x#cQ^TFDzZr=r?Z$k#x8-3p6z-dCV=hFdLgs|uc_7d; ziI00Ez>qdPP}LmEKhf~?ud=pPWz9Ep?(N)rZGRJjJ-MA>k^Mcg>Z9k``9t%D-Taf^ z`74f|?}@^U)}clD6$<8WkTX5Dh$qh+%K79`g70!EvSRc1P^3@TODmwk_4pw_eg$ya z^Yx7HKavUM*AJX*8WDlT`AAOm1d33dDa4Ra^6bX&^Mv1f-^AN9S;{((5sOFVP}kaH z=^gS?#Ib+UJ90;ZFDu5pHg_ufua61%-lPM{q`~01Z7g3W{^Z&Stzj#yS`z`NRe5?t zA2vb10_mjTH{sM6e)xn-wcoQ<#V#$cAMt*5y@HodJ@6Wr#PnxorbGK|zG=&L3P;94 za~rt{8!*hN(Atc-G_of}k$N8C#eVg~ziOx(oJM_f-L0ZJ#a2>VR+DL+Rn*CgEaYMz z1yMEK*eSqRKu`a$X!D_~sPJ7@Brf!a8X6T%54of7bPjwb^Vgo}RL)Fh%Ymu} zvCO1144Hf4tmY?2)?u5|L~xb1ea<2N#e=N*TB$RK6n`Ga4>R9hE2PEM@8h}%vpAKR zJIhb*EZVLp2vcu+AxUM4Qq%HY%m*6=+QQ$An9;R8L%rk(+KG!b`E?VVHy^Glr>}R` z;Z++aTF{^DWy%U4PZiAYix1vY;?2+ZQ$DG8XH=SvP$%ghP2k<&&V$a3GifK(MCP9` zlX&;Co}01nzShPWPTpLbp49sAT>Kd?Pse6O&fAD?yG-(?aynis_f$4hipW>haWR^L z%CxGaXJucoRLyqArMO0)m!9n?tdY^o1Ajmo`a}3Kg1*(Z03>%ttY7 z+GzYo0Vc(bn2Dl7;{FdN=*)>{jK9_=LkoQuZgm)8V`3MTTKwb$cUKSMJ4)3uX#Gf<+U9S@g-re9L;t&Bem zf)O%g$9pU_vbrxvGmO`e6dp_xr$+4@@bav<^Pb?)1CN-^%S8!Wwu6I0bf*$vCm8)C zQUc84zHA;Az}u|BWkr$X%AB`mi-xa~nslq$dC-Xeo zdMh2V;;}eS>gKss@dr`|YL8UR>w`9#jT$m09`+LEED}9V)}$mbk$j}X$NGaP!2?S;T!vN6$Re>IS3S~#-bg1|psS28 zNjHN%3OgN7_7&0>u9v!O{4E2#o{L@Cf6j~fkS-CX(EX$5?S$lENo}sjsh10 zB&i>cAv;{MK!INro5<}JH#Mk4F>xM|+=<;8(2;Ew^gO`)t3X;>0}){Z8tpvE29clQ zYW@-*){BJ4!C^liEFdLe_U2fipH!)+-y53DI)5f%(X{w6WV3s(uSni3d1&wv`$4j( z%(nVQ+ruET4h0NOixx}~tPj9@wnE)$5uFZxcnfE2#Y1F*I?i%N_cAP6ffs(dG8%)d zX2bg9Lk8p*xRTwC*ILVRl5HYT8oH z*l|NAQHdJ;#zFy2B*{sTI1~#=q{ER{2|m7W&Lgu|mMW_Bw6>LGH{K$UUhL7`c8=eE zLU4YW;c+h&S2YZ+dD$~eIQFGhUmZ^xJJ3 z2A2x#npzfdMT7u2#q}^=z zSn1O=c7!w&&z97~Qhe}^Q-i2Je1vmFV!}$kmDk9JPus@9-b6(`oOH#W4tuHBr8pp3 z59~jGy;8;ieF6Tj?Gc%Vta5gj<`mDe0n1o-#Qu<8K%zyN+i!)!G<-r)F@Xo#92{_L zGi}U77rHtUtQlkRTpo{$*rafpA*#LWRL^JLp31e}f;rp4E#0QalFU?Zs8V8ATDm_O zda0oLN%hPni88b1(X9kD>t5j;RZVMbHm;$Za7Qzs;5;`IE$4JlN{x?X;6`|5*`V7;MJjwu*Kh4q%O>VrGE|se ztqY5u;n|W3M|D4&mSpr8>52S{H@HtM)~V6V>APa)&Vvoiwpk7_-f4NLBWJ&9G03r8 zV%MR6m+q;K^Fnh;VP5#)puphDi^nKZhhOt+N3ZXQWxi|%tI?&NJC8^peSXtV>H7*& zP_d);->x5Zr1Y45f3WMZId0CoaU7D2I5wpQ7C;v35fVJygSb`#mbbfvQ&3pmx@ zPT$ACXH85bhqz-!QCK(=GSk}praiN=kwa&HF}Ziq9dr2%>+_ zdE2j@)1I$$Sm4AeeUy&#N@c}L#)&I4XoieelLuA zF!;HiyYhS^gtKEi?iNme7TPbtziS3n;nqIX=R0}BK*dx~$$TyR@KvxrHOJd_%hf5S z6)BT2tzIdt`>Vl*RI|P>9qiAvjLT@Lryh^Sb`?3WsL$8ST-*J+*gego5)x`SQ92hXub+=s~O<7x6aaKq=?@cN*qM z)4oXW<1AOyTKE!YMqj?_uehDN|)y)(zBr_*mlOkOi;bhs>iAQ}KfDVR}#YIT;3W7B~;% z@vDqZKPH-eW#EmNO3a^XX4uw_l9r6N2=YxY_NtKk09zlUWN`AYRVNb`qq}QNRPwPx zwI_j*<`>?W)Z7r>&G&O6aNjndD-)Rj7%qbe0IkR0e63s76(2J!nBcj0K6d>EtRyejbktj38@7O z2?-B0{P)juKxqiZKt+;ge&P%2@b(D6MKaF6%@Fj&A( z8O5grHilsM)EAYj$;DutXz&5$|FohYPC+K71bBvF9FSA;lqGUX;A9GhpYntjADAA7 zVM5|)K{q5o{V?pv4FE9?|mw@{h zm;yvu=r8in7tqnq`+v2GM!cQ z{@{f+LV(~Z(0}4AnBIOH&^!xzDene;&%)TK#bO%8I6$7_AH2C9KyD7Sygvd&fHm~= z8w5BQ0^=c$TIwkb@@hqpr}77nbppVd2YGo5fEHLoO;;g+^c=`5n+NmcsI1!-0eL=u z@Z#4Xfcrd{=k7e1=lL!`xd5{3jvzqJ_TN$099{gmLAL)N?6qSc5449U+yJYH8W{>4 zEW(I@`-?CV8cvkypa&pL=MS0y3<@|cf|F5(1H4-Vd3J;d9vhAIh%Zb5q{#h2$smFP zZrD&#Ky3-k6Q2g~Sps>3^tbVbm%x)t$@0@Qevs_`2l<)-_>C~P-~wcq!J3BumX@&$ z(s+h2oDg%N^#@Hu0O&)Qmx%$`D`0IWeM_@k0cn>zn*vh*vv(OXKnlVfQx%v%)IFMj z%sMzIT&p0h^h&f-4W#M*5$#M1FkJ=By^MfdL@n_6mUfDu{j<6IA2cg-fOQR=vP%aj z&|(P=5O_dyKpI?b$!%+h9{#=S=pW>DSKt67L)3f$!5?6a9&}5K{sGed*;nfi+C4b% z5n)~w3EUuR%Qys$jpm>IA^u>b#6to17hrbT>)=TVJp&5NCxU1JM3AoR+n}@?V9*|eeuCyL4S)%v_WO8CtN97i{&^YA zAGES&UOW{igFpqs96k>0BWkhfTiSyy@Ji@319)$N zC%=g~;L{eUw7N>)24{{A2?od`8d}IOAaWZf2W^JJfZ=WM_;^kN;O~Hz5|n`M4onu} zLInUj2oE&C96}D40`BZ0!pH+wyP)1l0Vv)DuPl>_Fd$eOyrwDcftNGztWOEPWDDTi z1A{oYzyJa}7#R?=2af$qS0L#Sya1Tm!!UqZgrva@A)x{o_Yqerk6VD}2*U(|_Ca;s z(_2Y5LQVDT7R)(=>N^LBQ0HK%P_J9@cMt&24?xML_pPMd4K$1%0}VIb!9sl>T!3i% z-s&(8L61#-2tW-OBLIB>0%$=z0&j)C5yH-(TVVMWbnbWrIwuakmAp9u)i%!of)g

sv_z07}Gufs&=1TS;yPDEVt3Vlw~sc$j;u8~O!~7&QMDP+fz7_zj?a4d%mK za4T#?2!#u8haUX~1b2%N8Ztod225YR_*PSl(8#>K)%?x{hw%3en1fEqt@;7DP!#*` zZvg&#ouF9?`~QC*5Xb{pzrZ`t3#7UZ*oUGpgLmjK9;kFZ3^*uNc!N2KX z%X>Vn4@CkZJ_(>BqliOa&)g=FUJgS6T#-@mW&Zn4^xtf@+5gegKkrKc8Spp*;K(SH z(1y8N?}(c!UWoKOu#b$Q3;i$8N|49W!9`0I1r^biLGV}i9ukuB3V6$e G^#1@9WKQ@1 delta 143915 zcmYIuQ(z!X*L1SUZfx7OZJQh0wl#4!w(U(ewylkA+t}EDp7*=>FQ#XvZ_ad`>gujD zUDt}b6M>4UC<6wL3i1O45(ET<2xNuiC20o?1mq7-EhgzVKw*s$HS~(&LO``O8=K=~ zXjyb8&~~=Smax~)UuvD5IGkk7=lzw-GA-|$ms5=OQRu(qTik96x3|@snZLe2?CCK)LCisD&8fY%>z8p^xL2FIRayI?A9&O6r zNIjP?Kck_l-fIqmO)g2KtnyG2IDG%n{c1kZ7CYcbZ2Q{Q19mGZkudB->gj_kg`X>) zFx)Ho?};~o7yhd^HGV&Sd?;Z=Lww$ZR>X=qg^3M{?Kd`0K61sYbT>~MBTr7%DsPf- zdycUSAmkpuq!a$uw_LV5>iJE<_-cei{jEwa;C^e`^$HAbf0l|TUXaSzk?0=$|eSZ)ES zSMcQ6-~xhy6jI)eH*TSl4pM$JZ*wNn^X>p2uc902QkvicX6s;_+PK`ixZEfWHVS`V)5O9+iaX21UQ9E>(oY3 z=@1j#iQ9kQ(Z&A%`#ZBQ-1V~HQo$ZLkBD^9A?4|aegtloxUtJ3P#H?Pb3E=`G9tP_ zW%J>OCNUQM$$E82#)NDOp2U|Ck;c5cQ23(V<-){2W**%&J!jt-{lh8zy{@0q-tf20 zjJ!vo_4;qpNdjlZ;H3ZdXH=akVZ2M~c{DVem_BkVd`^N3$Z}{_mN8a}k^3{>JQC_V zy;he6h0}$5em$5_YZ}ks`%Z{`e9Lr|u#NppjYk1PtLOcp0tThRJ6{<94g!*tmVz1qfv#?Ap^MAI{J2ez^ znEhN)cTiAAq?@BQKIo`kmq%GpQKXYhkME$i)b}iD{O5Z8YJa4F2Kf9gmjDMb-B3pt zrUjGxGxwcNa^p`*LvXt0APn(#;#8v-@R#cE>G$G-5jK$}yh_3fRdz_wPe^gL%-SUJ zpR1@1s5pr7&cyzHod#oGVQNJL0W^?+O^THknvB1P0T;>?-S5B%R?Mlfh{Q15K0RZc zO0GAq>sVJ0OdbVB*WGdhs_IiKaxaidTglvkSXa0`bt;YA!);^9l%bP|zu1?Wfa6hO zMHXL*Gf4jnAl^?{sYU}zdX;GLkdv7YJ84c^>W*_lT&JTKFRVtbt|)JXWG*{hQQdN_ z$v2}8)}a{+H)go#z^9_#(5p9z5(A>9$iHJFHfw$i^<79{8t%oV_w=>$U-3!?MEke` zxWu(LuCD(hPTe>9Dm!Y*wFp;O;Ie~&frO?nCSA1L_(fzP0!{>Ph*Jq~1Dt-DK(qgt z!X$!6NGo2Pwh2jkzRO66lR_ocq%*VEYr5vVXPd092BZB_F*+=#O3Neo=hkqoRa*GL znUsZA)1!(wvG;S#OzBG;%yanSW5sxRX;;BFt}V6cTf3eC{G_|G>@M?kKtqrlZz+T< zv$mTmlfj>bT^9o#`kw>cYeV&NOtoMNSCN$6k2v)2@v1dw*$EID2v`u|BZ4oZGRImU zBXG5+Vh@sy$PB9eNKL9+%!TnR4X`ZmYak8s;1}dz@eTH>O?{ZDf$ExIekQ36EOn$m z6!!vL28Xdy(vPuqr@8*l06)nLmdcQ9f+&q#Gf>q0FWV@4JY$V;3uQ4ntPUehNv18F z)|mR$hR1z|&)4=J7+mg0(G!{;Ejq7m6d11(zW=lui`-`(GXWd-#adWDX0}#p9ou&B`BT!qo z2Uo$8CpA-OrYnPEPYA&b#|s(bgUGww{pitFa^41wScnzT7m~7&AA;0+Q{5^;R&x&Bfi1|752x z(bXBms3S0H*MJVoc1NFEO?#_U3QXT%QsN!brT3Bxy1j6^36n`>fO)Eog?GN) zH!+%kxzWC^s?dDnGPk6(rpQ7i8w!Xz=C>6J1WyI)N-89{ur=~HyKb;&*`~tR-W4;0 zKke$x#oOFgvB%N(-Z1urFIj#Lj!uugA%%a&PE0dLIlz`lHTbnPE!yi~KmMmfi0}0d z&sv}$Aa2kgAlM*~psDox;MnzM@X(;ZU#osbM8T_&kD!^(_~bz;=`36==+>qa2uRNk zL~Ukl*PAW%B7-Hw5-v<#*o5oxy$QzI%j=E#848VxDEaiXLE3MXvT7&9rdOq@VQ`Vt zQrUl3EhIG|M&Msx+c#+DXPK5PggUYGW9sVu1j z11dTb`^guj0Fkk%CaR2SI}0}Dze&l$OACJT4=ji7aw+64g^G-cZ7qiTWiDbrS>SD~?bk*|q6 z-00A?Qx1$T-#|d9z9%xFVkIIXK>&a4)))~(ufD&)f4gFCT#*DH3rPJvQB2vkVQFQ8 zbXbsZ7g}(Yldr?`-G}L$hY7WT}AUZ@Tkrtb3sFiiyDKx^cO&A+vYdx zP*O6>6jGG(wCR0W$_qZ&xu(GW(d+DQ7gLBZ37!>Ot_AKG8M}|$sP${Q1kQzCq7@;i zAFP5>MIO&}GedQa16{F}y&f(NPbN=?80m(e;!;p{(JYk&MCm!!UACX`{K`YsMQa~d zeMR3JYcLG`?SG&9k`?dK(0Btu2Uo#zRW%N}P3z$$#6VpbY5LNwXU7QopO%pv^&2BD zsXcCj*xWNyt@*)yhIBcIT%Sgy45Ti`5SAzW^Au$6O!jZ31l%#S4QS~InIe4*pb(D? zTmwqiC#z|;8Xb9l$kT)Y5W}Ld0^S;dNodbWh0hyT0hmVf`Ra$CHzDNu2%AI5I2J4N zFZPGmSOX=uPb6rC-@M^Vs50lEaV44yo+u^GY3c`%jX|Nx%F{K=zt-%idPdZHP#`NY z5<2i}yRI+x0CLjdY6u<9KrF);7(uSP+4YHf>{2=o?l}IwH3`s>;2vEQdBlzc&mX%E~%lY=}sDD50lS)8J9RztN@PJT= z2>?ytTLZD|ks!GHo+6VIOX$ZoJ*a?9Ap|ugfyn-8Z9J>#pt2L&<}g8K82{rTsLC$0 zlOL5L1GG!*6&VppeaCo0a8x@l;FLG7wHsk+ZD93=&=v(HY1fq#1%2?!{M#o~Ju${O zGW2U%GlBbe5D;RR|5y#M(|g$3WF=Zg{zvR?j2D8auGhF&T=K~}sYRFkq}khg;<4Ia zrmYE1wRhd_MLIA6wi-2jbq9~>SW{Z3P)d_J$M~v~3(XQ0KzdRS-NsP>e zx}aH_4nw;bVDrfe_@$i}X`O{K?SbaceYc7_xn-PhFbq5vd;ltfvbCs%VAsok{`MY} zJP<8_+oQw|4!nlPgKBLHBRb1aX^L|p(J+re9Mm$(^V_)A%e4&L3_=mDEyqaF}MF+ zXT-MBaG1I7Yvi+l`II_K7}8W}|L?TMAS2L_$TLoZHkpob`xMF!;JwCKX>Zv}wuDZC z52BTUC^QV^ZJQB~*cyiONgxPeB&P`WKsGYyB=T3{1Rbz0nXzCTpkGL^RB_V&$V5FJ zS0c=zgaY6bNMn$3MS9(px8)=PJgN?W)oI@r;LGiPe*UjXQXTtG^xl%`oJXVzay=pI zped>XKy25Eg}hiXBN~2<6ba8t|9jNciZJ?4r>GBZ#kqeudCu$_g6P-FJEm-{*E2a1 z=l>_otwnkgvICwXisE@qn3KLCH(rRB9MBvEbH#6AVJ7o$xkYzO&#t1*C$O_qzwX~J z2<9=gHZ*>ss)*Xj$@WV}P@%4Gn7jL);gO|D+E}KW(-DmozBL*Im3qL%@YBx0#aHnHg4D$&n2lywXpg!sIx5Z@|S2P!sUi956vn9Ao2{tRp$`6lE ziJ)&0V++eo_&ICnlW!f9$GWXqxP&u%E-ZrtOD9UPI_yR&*F4SFQ0m#@`;)8d_y!~K`LDb@7Vx-9RsU+_nW`yd`#&>cH#y>B^R#?$YNy0)roVDDZi6_NxDRD^OJ> z;Oht63jpsg&lf*GJFM0?)VDsL%=>!@*sHH6oWhqBS_U?@*bdRQrcyBhf$pbw0a1ab z!%O@<6}f(a)9OE0ZbY`Ce4ATyWfa?2IVUu^p;3ugOyrVmQQhoO93>>xB!XLbF&i~6 zEUcYME`03j?qQ?6#~0w7QdhCY^9=R^-Jd5b3rCLvz~gz7rzZ{g^S^-jtCc@zhl5+| z99#UX0dNcXWyf#j)e1j>IfRKB9|`y3p{tBk_xu|xyEjL>4j$GCMc1)hPnFcN8`E_k z7Ca3~KL+Fbsp?D5{rHaLo#Cf?@8ht;gF=25`wOD1^QXPZlxEh=Xj$-^Vm}@C*5?a_ zIorn>_opRmzSN5R|a)^fe#ng+nup?{Vl(zi}xdd0eIK{SUzRp>u!6! zdp^|{@ck&&7wCLH8C%fj_j$g)KUiSU@Ba8WIEcg$@b!8;xITG&B>4Qdvsc=`hQM%Y z?CW*&K*0Y#wsf>}9l5myd_2z`ZEgDbJbpY2bbLNtrhdJAlrp6EPXT_fQ(^jD9}j2x zl?t!djy5}IVSoZb?&sUv#Vyd)_PPgj8;!obT`^RDly`bPbOWAKziuushW}hu`+fB6 zy^)iQC|q@Syx|5$y_fX^?|NQP1KmUD)#AwO5y6hhY-t4Ci-En>6 zO8E$i{3js(W=5A>LlqF1_%aIUO9(uM4ZZmlb}JAV0BkN{^;o~`D1u+VmBzvsZZlNJ z_%!K0jg%WZbFUy&PK%SxH8;%syf0%^)hS_D+nRGRxaTXD2!EX$d%Y&QRa$2OClhGC zTR|vIMcA=H{JI`HixmLgP9NCP<|#eCNsMNFLIjpf1GBidxw|}`Zo%`G8>WLbRJ*R% zyPG2b;q!?hcT1ksZ#L}dQNQaO@M!dW`tf1j6~@5uao3$Qmg;4Z^Z9YMR{hWR{k@u? z%ZKmnV{XcF(YS)&7aQ4O{ACln3Pn_N>ppCh@sj%+?0Y^-Lzj~OB=3Fi3eIFnq59r@%yvlil;HR<>2i}Ps9;`NR_mMbUk;(I=O zYD+dDncjTZo{vq4*_qB_&&^h>4zy?v+-nY-L3YGoFTH@mg;(Df@6TDd#YN z=36P;{`=K%WG?JG=}z>`J~ls4epU+hr|Qbx#TPmA?1hwb>HdXX zcPaPLrPF_}s(OO-*(=au3E`r*>J#{y%q;U^oAkfXm|T6_In!(J++I58Z#MF;T=_%iv;^F2GpK$d-&N}n(&8*rB@q2kZsgQ-by=q*g- zRkMtDSJ@hJgR*!}*<%IQ_O|%Kol7wsO_jdU-A4mAMPjH)W#aHjW#RT-6{A#PJ-JhJ zQZ3DFWu^e+`(D_RC8li`C;fLhLD?*ew6!HXN7f%Bm3?Db22qG)0P`|r_Q_w)!!drRYY)VRXlAxm8zp z?glc;7w%Bwa02c>dJw;2uqS%Um!K&b(5HMm83T8=+D4hF8)YvHYo`g;Mno&?A{-eA%8R+ zAX9~Dy;)liEe<~&4Tkw9JbzSRSHhbz>J&ZnVXil_i0BkfWfNp>ziX2vStfbhX zvHC?A6ZT~DIL(lp?ev648Hisc@hT`Y%2f>@qI6tkC7_(*mt4Rr{eye zWPW|@w&axoDBHfLhHH;44W7JTp!cl)_mY$Nj#jVX>dQemn=@&<#8z6*@@m{zZF;lC zfwy|zD$tmXGeEKv``m^9lB_UF2hBpfxu1c5mBaw@cfak_QC9uE8Q~{^_`+U3zxuF> zGcWt@eR*tjWE>S6fRQnrlAa0ZWDFxLyR*HJ#4_RnVMF1PFT_i*&M!o1!9sc9wSJ+* z%fNGSiL=dWF{YXxbHmxkQbd5{yip6aOd=_7}FWYc`!F4&!z*^ z`IZBe?c$U`sKg!m5`C?F=rmo39@7pCzj15d^eHcHCi*0{mhSL1_UB#M(QT!|hT}Gj z(+^L8^l-*)(7lT@xzbu&drCA_wSR0)D}}e&dJCg-Aj47~6L+gw<%jc@lk&yb zBrI8jv6eheg}Y@CiB~^SURoX;^R6_eqVpy2QqH}0b-*p2I~j&g)x1a0;a-~GEDGb0 z3`-Nt)8U$;A0bWuX^`}TKT8vY7?SXXV5)QxdU%i>oxw@v_U2jv4uqg-ral)rNuU)hwLEUFex0lQfbe2FFpkzHU~O%?|`zZ?wePI6)&%=vf`mRh~}w`C*OYp>mDl^c!Zw4%#$2>F`hsS zJztAIX}h4-Fpr?7Jg>HB;+R03hG#+*z^5r~A(4_*z!_7nkP<6?7g{aEB#ka^IN#8{(1vtxC{N(u&7hRG0Iv_CB&(s2hPwEbFo$KWl$j%sTFs)i zh{M#xN&jSCKTFUC6{coZjIWiRd^sW(Td~!u@~`LIwcYdgMPhy8kfdC1TC$stS)rEAxc?Ci>qr{HV+9zJNj?ik(vjD5G_ z6Fq!@y|BZXA-bzyt{KTK5eVoRO|W4&LzzJ=NgGTxBCk5ifoXOw)W}TZY^6gn*ZF}D z(h?6~J%8vRxXicKGr2A?VZ-DZbDDw989_AoTI0oo@-xAOv3II4;T`9?a#Qj0#l+%K z3l?yAU(QYY8Lhs3|!Gu8as6#OgV?qTYC z&rLGKm7Sue>u?cNz}U)v`lqNgsW!c82Gw^!8$xBq$IMxaG=6ON`&l*7=^p(0GPI3? zKs*B_In?pm(C*7jE4%I&ITG1*w+Nir#MgnCKn;Qy=5f!%rI;M6QvS%;mpVEX8DemK!KfH=!(*Id($zz{TbQE z`IZbnRYnpx_YF}B)N!p^Ll>FPK%rk%VrWaLNSGy=96kwZdQP!)p|=PaBeO|9aNP-83s7ifFc@v?R7&(jxDQpu{ud+eqCNduQ<$*RWBWay`)^v$PNaP)0M^)#NlL3A zx{Y*5bmF71n3)}yI=dMo5^1#MRkx$6KHuIW2i_Hvo=En(!*CSN-g7L?+2ivOHM?Qa_9*>zLtB9=q0goN`X6;`@!p$|8p$>yPMM1~P=r+1 zcrPNG>PjME3c%~b=q+7R=*3z}BOk)&Acu?Z&hyucB9|a_$p@Bqf$@}xV>I!RIme!3 ziQt-dFT8$hC)zS^e}YV;WU?qWJo|4tsig1ul~7m2g5sYQH*rudxZnA76C|^V1_DE| z2(<4t+@om@RIJDbxc)jnvY1k*)BCZ=vLQ8>hx+i+6KXK+q^}ixz#WMEPFtW*+Y&z{6 z@Sz8j*oI5}?Y{k97K7GW3HsmK)g~ER!733$%73<~Jobjnhm)G3~z<6G&N;OcucUs9-c$c6Q!Z>k-XU<;bBgvTN4IhQG8F*GQ^|WTO!HNli1Ndxvao z#BGTpI~s$jf<)P5qJv?{+JJ+Hm*envD14!sfGDH)3wikDqa60-s~I4>`O3uGQhy`gJ}`jPsl+SA zL`|NfX_yU(NsRp>y05q=Z8_)wLJO@x7s$rcm)cvlo)jyO0?7l+Ck^)xTyC^9jzVGCACoGI=RYj@wBDi5JjsoS(L0EHnx2d{;Y+Mh#_<>>8%U zyN7sLfD(w%3gdI*7Jzkqx`7?a!lv-IVSW|QE)9y>xPYOL(HT3M7_(nBw!TQD)(K3m z49gU*083hju9aAHA$N^iX-eVBPY##DF>pmBik~w<^4$uZ~taVZ( z7Aua_61t)junTW`l`Fo3-b=cms-#tNAQnz3Pj!g^<01Yuyd*_UkY(AtUFVSB4%^U( z8pR}SNr;lk5N@kwRSG9;29kH#E?b5+eBjX0ZvWE|gKuKxXsy3~2ff&UA<*AiBRFqA z?ubPh?^4FUZzJ5$CYl2!rd$3+<<DEnmT^|wR(VEAO!rMi!^x75rJ!so6xtq0tyLV36In8jcxYNM_49=xQ8m#UKPLeCQ0T)hxsqJK8}G+n z(@SYh^Wd=Lgd?(4{N7O5Fkn9;gtp(paNIOT_nYWieKA8fzY7J{?Q0KA|o&N*0n8E;NHQ zkaW#D%B{*FhH7CRC=N8zeFA~GGoW^j&(WZ;NXYpi09i1qiLuhQSeFqGOJ>bD7C2Id zWR@VDy|2r^H)IUq6R=9C1NaT+O?=_v8BgKj8%^N@(ml6S6W#)jH{}Q=;qb!nJn=VW z+8_mD!BIlmsa?%w8DR-75*UU$}RODUqSSciLX|KW)i407Rz$% zC{j?=AAj7v;!wi$q85HhL4G7j9j(PmhP&QDy+p$g5_z>%V#K98?Y0TTK+qAr*MB}Y5(6-XCY%CH04mq$A`%^Af) zg~iCQB8+Mlmvgo@`bg+VYYyXiNHg~F@xSoK|H2#pz>2`7z32*w@Aag?e~G}{_x)y0 z;R@({d3tDmbd>0bncC|_B(zmoQ^7oI{}TTPsI+zp1A3~c+Ure9vxD|I5;D>)UG=H1 z_A0k79yhvA;PdEH&(|VH1}RPztMNj{r+;Ee4ixz@}ZM@nS>%GGbUvFZRcn`5~>yP(JA<0R%N>Bf{Yu1ca ze|aJXiaqHQH>qusRZKPZD?o^;URP{TPHh^KwqncXc-R%z?8@S=rMRe>rsR6=S`mBb z-r*j2RuEC=qSqB_P7bt4)^vcXl~fI%bTm<`mr?hruny}T=3v;N|CG{fn1k0I_ij20 zj~WC*UmqlcUzoWL3$fhX>*d|Q588wRn}ErRA@#UI*|rb}Esrd11K^;a16AufOS<`n z(srBxrehChg;qCnfs>2NgQ^yH{ z#y@l3W(%Z!p_X!?D%XGX)1*9#*hvMBv8@p7-rk%}Z}%H;U~$_lbyB zl2BFHHJ@~iU)dODU;rci#+B(QNgX?jBaUayg#p3Y;#>t#7g$L{*1y!<>LMF%1*?f?GzI)Xg~?N3DkJ0Mh&C%b&UoyET3m+529hI zs7z!ofOGs}P$Gu?1yfUCR+im^1)XVFam$1lq01uX-jyBMR1eG|_9a$axR$1F1xeo! z4rj1(Pi3(BMY6@CcKyXBP`0h7(Q}V)@a1+x4pAuPWS8unacJ9tP<&^) zh{0MN$zaukLZDP=#IKJ>3qJLvcFDth4A!W+Kismv!V=!9Mhu$KLN!ljOYc8IfY0$-YE@O!| zy&2Bn(}3fP*; z;rOPp*{D-ezijir<%PT+ow=R)U7;}YSBhWEQ=ToDy^I{&KLqW zS&oe}S~pl+pBoyQ#-RdZECX`#O#%;Krn=xvMDqWshK~lzTU>y*1V+{y{+$SW5*T+# zl+0R@wj+X(0Zv&LG5g=fP|bsld~vL+Jn}g^>XPZk^Z9hX7^w5bz!v0_znMDe`wDV& z@ba~q4U^o1FY}%)qpI$Adg*8LSat$vipL+bO_1eLBumbT|D8b?t{;%A$eb4!Cx4!^ zZ@&EP^Wu^@l>2n|N55|&y`>BII*KuNtllv2rYA%es{>^4IlXrTtUVHRcYWRqeD0lc zd+e_snGZcBN!Yz2Z#h)gf8DX6VsV!wTX*{mUzcNFMC4=7-OH9pyhuFdnS?q^K!QY5 z6^l{au@^)(JTN6}IihmXbcuCQGEP)-QWk@5y#ItT<2QBrdh+mmCk1>f?Qd%CFPag2 z+yB)_1?ckXBZKsSoUsG+dV4X}>k~_H*&6?6@@E%F*%04F>$tqaakzr44Vcb4$MJFi zKBvosR?9q6XYUWx8%?c?gjR0`=XP1{L0TmFIug=_Nu_@SQ>NxCny3g2w`P_GhLge4 zs~YpUIVEx^G6ldy(G--J-9;8=j{S)yK`Gv){YHIzkp=ehQa!xDVuKH^z33sVssv=m z&c~^{)Evob#dGDW3=`4C?;*QhX6&&y^g_v%efQ+t@PrF$2|7#j-G*cTG^WyXOB=e6}OWq)|yx< z_-fgBZKV%%TD|Y9sP&^svTEdhA@+qfkQNi4vz(?4Kq2~1I1HOqyJ*lSbu{wQMQ(wub>7srwT^=d;*^FoUk*ZHMMN%`ldekd>$9c{Qd z#uMSXr_2D{`Cf284aNRuFbd_bSY&AZ!vXrn)<>&)XB#SU34{VaNDi~VBJBLF{!q6h z%CR$-MLKXR(Yq+cysbF=W>3sMNx=r$qLLcK2br`ZDJCdBWOm+oooBnpN=V58N6`6@ z0MB=Go9RbWKRy_xEW0-m4*~0f^MnLT0|KSl;zWR87$<;IS>;iwV^XXj%Ux>hi$|rh zOwvhu|I$qNEQ}C~Bm-+Hj2OkNAdz{H5(l*~%O)28wP}4WgADsJaqWe){(`KNRuL>TnX;$>Fxk=XmNCgKJ~0! z*CtkZBPebLM%`jb+^pnMFy|<}x2rCWqUml$B*h|cb)0u7RFk(fS|huKIsL{JL#Xf< zL^7I84RKUoWInezw2k1@e`O;;meLYZ+aJZVpQfcqp zijRt5rYLfwu&dIb95QDx+QRw!^J99%Yf5hpjmS5Iy*_U4Pdv8Gl4#P-jU*>!l``cx z(#4v4W5~V45fq3ZgAD7Ca36~}cbT`ae>ix4X!byR`xaeF1Ck-ad%HTOraltd( zNPOjT7_GBNeAJp;rPDW?q$~mF^C1L?Qck8~cnw#d(N(mHrk-t7VrYx}T+@HNni1 zJR$bSgCiLoCw~kFv5pX@caku>Y1R3xzC^qu2_VBH#E(_;!%@MQE~-Ax6=3}OLCb=p zu@_7(kd7R7+{9jTW){GpnyxwuH^!B30 zD}8IK50xQ5R6>KrG&NWn>@{uon!DVhpIbcl9$1PsSx~y~*njJ7f;Px0Xw0nVQn{jCT^Jq)o! z!L1@D*tGS96&yzCH4Qq}g5d63dLoanBAz9c86Ehm2*()F zv*Pr1{&y&)? zEvCC8H=UHnC_4z+T1AhUj(56G#3BH@qMFd_SXsc&#KmCMBWn%u!pxF{W9WP~a7qux zEd&t_h1hvnOrzXGYQeOrp5kOgYr>I~iF?(bUHFD<-?tpK1)!o2l&K`ZGCdTg8|rCY z$fTNs(u@bei-_4FFVG{wZ3L(JJrQ7lTs`Ro!19(L;?pqkNVlZzU6(9I%?)eEB1@g5 za?g`g`7o(A9~p_V!1)}ldS&YWIs}28pWo1;=>IW?FkTkOpvg_&_-vt z0oy0t!S1uC0zh4d4p8@sp*b~`IKMaUrs0L6>g~&k>A`R(o)6PU7y=9Zw5gW@Z zG?CaZl2T?_RO|&dL_c%#tisa*&P`OfF&xmvSiZwC z47HQ%iRNo6(|qC^r*x>TvrMJ=(<7@<-voAnvAguHK{uBuODKXQ-Q$+E+Pn{{_yxx9 zrVz##T@)suiwxu!72yU1eC z1}IAg1&hWt9^s}r?!lS;x_itzIZIQS4mu)BKBtwzkD4cmr|TWbtS^);Lpz9`&jm2b zwAs~S$GUG&mm^uyJ#Z3j468kmvL!C{kH;I#fEpZUJ~h3vpeF%?2MqwZ0evLfUk zqL&B@^MWzfx~GeAXkJNu^48)TWFT zb!gmFYlPhheqw&+EI_+cjR&cx=@ttO26N&Mq3H-f5 z5xfcm-%bHNv&bThNaulxdqyy1*$hHPtNzH4*3tGXnJYWfF!^gZ0X2{bHx6oAwzU;j z@G=Ot75YV3{C=FQq*sa=(83WdZ4R0@+%BY;Vs(5Xy;SBg(O=36*zPn$vz-j9a@}Jv zf5+Wzv{^?#G?p=!G~7*iWQyykk(}E)%3D8=aP)z6>2TvmC)yL#y|C0b?Bl8azBgDI zr)XYDk~b9W-u=V58FAi3rwLwZmN%4)_~Y2t^z~^m$w~eu&&t)-;S%QB)T%c2b$Mu|3Oqo)~lb*gCdtXW~pUv2B|Z+r|VZwr$(Cjfrh1GttDx z&0F`YTi?HPo~oyIt?qqR^0bm%x=fyK{X*rQpdmQ1-N#7ItCV+k7Gub?KP zdeQ*;MRTy`e91Xi<-3fgD!iEuzk@VkI*`!GFmI(OMREG2!utOVAmBs{!0R%Poo(FG zX|0Jk*G5jpi$}a3cwc zK4`}&o-4ICsgNPSeRmip`R+Coi^EL)4r?-+$J*7GcL$q^uwLxZ*l~~0U`Pzgy0Kj* zGSnn9Zc0OtTi0!jix00=BpGBg;i=R+pkRth0a&KkH24Zc`M6 zUiCagN%+LO4s#M8MDFzutl%}|fMRKYOc|jpX@nq*L5?*bv&Jxv&W~9@eu(6|FTCCV zA7qdis{@=vZ#z(oAqux}auU&qo&(N!4-#`(iGmg;;fSGI!b!bU2zqXE>Jek`aon8DnL+l5P}Hxx$La!dSe>iCCMBsIu*11$}-B_BqKA_|`L*M3{+ zP-N}^9O=R>ZPbjY8=<*|rHh9ulfYA3+TUlIg8oqnIRM^n1tTQ{J1mjkFp_hL|NU1` zCtt5wI`3JmNCmD0gX>{2p_5eH{TLsl%PYq0mWp61#@OYT$KOxguHB*RFcqAI=lJPg zmAPFXi^P@aI?H1aybrko^XqYrTXB%HoojKD`}EuIUF%I}sJJESN*}UEn4K{eW9;L2 z-^fTL6%+i8bS`1qW9~xQkb?s9Us0ZkLdfR5@M)~o994gD#$ZHd1$0DtcNZ>M#&q_I zDw`vMZEC%zNV{C#UN68uAqrf63@GoX!$36jk8#NSPJ5xiRmj#owQEl@$zn+TrTa5t zHXel zkhhpk>MD|D5{HK$MxtEX(3LrhT#=70J`taiKU)9KnXEu*G<>>fz zfV2vgth#QazHS~C5wT8{YP$}Dlz$J|k`K>OUV8#SYq#F`B!A%3PoPn5!3~u=ZUn2F zoTxn~H6>%D&R_IZ*3oQVg$$NPXNI>T}5Z!m&AZhXnYO`G9lM6 zTZHNsj2Fdt9GVlpaBM*-Fv4WHy>QM99C!eDgPDTT#rL0z$w*x7G}%y$((q=xY>2jn zhbVkzEwp6>#8afCJi?{^qO+$rDX`ae*>np@Ch2NZyxg+`0y>dGp}5gtRs)~dV18wO=w|t~=$oJ%W6225 z<5IH=WqB~vJ5SH(*jP+kCa^*#)LmJkd&n1wSk2#%d3wO2#;Q`pdL&`h1C6}W12Vk- z5lf*Dndgz1Q!4r>HX$OFx%6o~vYZ)RO3h&rH<6f!jrqRZn1UZcl=`27>8J4PMg75> zIS8O0*pyUNZWXCUg<>9yiPUmJERxSKS1M08c&VFVMOB3m4}(uf5bT6D^Wchyte!zv z8Y47@lCfUZf}`A{gexXeSS_06kM5Ed_2mI-1CH}0l?#f0e1iv`49R;daS~_!uz_lp zPP~xgNbq=7YE*J!8W4SWblU=H(e`|~*CWc>^ftt!zT4{!P3Gkr> zYt*@QJQ;_Hk2{gu#)w;b_=~5bvs*SP>=6x@FxY?X9K4~A&B8HZSVJv)uT=XefkCo2 zGX#ml>4vn{M0EOy$`~dLRJSCB4umHLE{osr!$GBwn)!mrBlLLHZzt^-DXmV-7K z|Ez;cSdvYmx#(}`mkydXfq$Hl8{*~c!7MJ?7H%VAIy>|T++DL>H}7ZagV!fNB}*2A zt+5${4(!_Jtv_C2HKe(K%M=M|?#umLn%_o*?#FMkf7x4P4amHHF)qUFIBn8U<7lSI zCKRuZVQI_aPCH@`4x>wy<4INE(_L-&`ztY^4Tq1I@QFd#u~X5iguaRVQ)V)?7HtL+T}XsBaBSp$exw6_Gyhb9;|WF8(xVWhCA z1iIYVwfuV$na}Xjt4WkfFKQNl^dc08(bW;cA`;NLJM3X@*V`T!qHU*P-4LB)^obo8fFnK4R<1PU6}v;*i1DL+b7^T1vY>4EjVA2r5gKorg4N)>m&F>-bl3qF zJaJ(@TIwV0nGHkd9>WdE5W`)5C?7`A87^6FD8Mf!%!L|{Gfe+Dhof1Q*mtH}xDT(& z6iq8{b6E+fWn~?fKaqtxNH$WN3KwI=La-B{FZC+s-1v5|MN)8~yyhH2+;I{A89+HI zzP(bMLrIRC?l%o7lh=KB3XwgT^(UpZ3I3zZ=|lHx`4C24Q_WJ0&t^5@XoX*_`yryk zeTPQ!Sxl@B90^hSv+)yGjm~s(_ukhN2Tuuws~HJEN{SavOM0i&HKA5l*YyyaFQIm@ zK2i{~b$}Zk4@cCl&q>RH^>P|i&bN{w!RSXm^hwBJG1GJ0Ve~cdwWdoP3qKP0jf{mT z^VbJ0U;evIf_jjg3xB9natmZCUNvhL3)>#1!kB``tIIt%s<)ktb^=s8R+Bu#B|04U zc#8<2Ok?gacfy?0(SB|tHdL5461cN6!m8kbjtZ}84+n^gnM!HR4%W}H!79)bV_Dk< zCd`bmwJ@^Y{w&AM{#QmbWIl+vH~p<0=U-7{v(Fz-k2i}n>9Nz@AA=ZbEu|aiM*ZM> z+RF*jkte5mlBks)!a16+ix2CS7u(mRBq8O%=geCBrECJh$+J2Z{3jE_l?ri1X9xB^ zSeAWCBM;pKKp_OcIw7JmS@@B`yq0*Q_WTft&rVm1cP#tP1*Q?0F;*qgLx}r4QvOb_ zB$iEXzcR}a!4awt3~GQ?Td~TcVQj}6V#uJDKm761e2$(GPvPO4|fCi zQzx-4`DX-}dPB%~rS*+;p_@__dpDUT2c`ChkK{5|nFJrD4DTUnH2_{Ut4{{hR< zSkfIo5VtqdA2*{SQCh9LIjG1FgynM)y`2s@>cr|MTg}-B|C7YkjzV+%wOR*AGAP93 zTkiOYM8=W+i1=+N6ZySOPxEeRl|hYu7)k5%u;k#G9500nhTh2uWIjCOC}2tqgK0tV zK{6=95cain!?0A`@!yui#nFS5vu@_RKzUnew)Y1LkF&SyUPa8NgIbwzsJESs3YaK` zHZGKj4g1Z0FSmQ6@=Ay=Dn=Es>j1XH@(t0O=rCM%UsColW3H}T!BOd8);U|X0K&YBlWEh`oGd%Qmz)HA0_oBnbx8)2#^XUD!2R`n< z_l_>P^}hc}u6gT=zG}2}e;L{laG;u*85wmXw7KV2A}lno{+MKR(7T;;G0k=?$Gj#_ z)yA}6_OUOWIueZYXfP>lBpp^c=)yI8j)_0MV;_KWV(s1kEqwCyW z#}gDomlN|ofrX?!wQ>Or_8=!ODL$i0t9HTbk;Z=V!k}@gIWU}qI}Em(dfG$lxSEKEIuifP z2%T>+JlSOdfo62C?-9}S<4}Kc&0DRx@;5Io7Hf_YdI4sB?`sI+4?hv_-|>?gGK@OJ znwVChz8Njid@@Y<81=I2Buk0szQw_upcz}Vg>iJEnp$Yuigkz>AX zX!vl*)^0xDlsXIl=@~~FU>?^;B0pii^EwhBV7p?RgyJ_R6)(SFq>;Z;oaC)R5_0o5 z*KAPSYw~@>sMfHwEy+17T99`{oM{&Y-#O{lh{pUZxk8v{!jo072_iejjUPG-c?4wc z2D`#WKPpiu5*Ki1YAB9>DP1=lhOM`&ZCN4L&14n^yN00qcnj4`FHFIfVc1A&q+P6Q zP!{lcx+suT#YK%0AP*!{`Z4RL9^XZQbVX`w%cY2Yrpk_2DAAymK}2dA9JjXB@5#&l zsIXAy@9RLbp(*gRKc4XljE*3b6n;c084WX~QLiUd5E+i!#%w>XSyLVB~EG&k&FEfH%#h3f-={Z5kP@*DQM{xF%rAZbinJJ?#qQVBEVg^e*)GF#xl}P@;ZaX} zREpQ-7%A$(j&7=w&oM8Xb>{+A&N;aIjjs6SxGV0`nZxhG$+GBG-=PfSVz#}g$+9rw z_NWR)7Q-BSc+cV_pPNBFCPU1?4&Al5E3Z41ST!YSB5g#ug~YnD4x>A@zD)I%`37KL zyTHN7k)isJ^Or-8fq|~3*mP^YuILlv@2rhi(&_`8VE$r**SNre?&+hj!1*Pl(Dpyb zXqhCGe48W&a_I~U%|`fIli1)X$exOK6|O_6pYH>tY%hj2GU^sf+1}A_&c$J#o`jGz53&! z966P1(R_RwCQ{Q+>>n;fEfc<T)b5T!1PA0o!}MDCvmoPvr3 zdBv6aNyO&~LcJRa8eI9m$jzHfC<|)2=)UJ8EeV(5dp7o*)?y0FVjqbl=8v=bsvk5K z6y#BTig*ik=&0LP+5`Itku1;45jfY~i8vzeX&v#ny;zqO|}QX=?{ z3)1IK(WTlmhEOSolt;?4VA>DGY0>ByFAZ+F$;hByl3KW?895;VCf$%7{LS7Fdhsb zy=|}duGG^~E10ZxXb7)h3|m1l2`Nm8RYD0H?sQB;JvgIWcmm9HXz(S-f<%l<{}vSq)20R>)Le{qvFJ)b76K^3C0<>43k zzPVbM{AIg~{WP9Il_`(FVWh!4SM&w?-y}A#ZDV*C_e&(!#H~gzS@?c9b!gVzn+5&6L@B&*93MC(Uc|jnudSAr0%nNeE$1<`^W^I_i~?ifaG-c$F+-i zuI8LxBk=nz^HC161FJ6<{rK?b@w{ZYJnWb`bzN|SRT#A%nr<&6#KZ{A-UnbAdU?%C zuYN^*c5fE%UAqXMIe*wJHdeBhKc!P_Onq_2`4Epabsx}r7&2b|r#fH% zlC7ERTMDmE>Q0iJ{VNuQAO$F{sV$O!L6X_xk6p)wkBl0X?Ff7!LJWEStJ&qpdvd12 zC(W&%J1mkVqsdak87@qs%;E+WCiYe)qJ>$Jc^G_}cD1Cc@@qG<_|ILK2t$uBv(wwC z53HxiO8XqmLwd#hYakIc$n`5;Lz(n*r>R0*UIH{DCW1>u9BPqdEHU8hAVak-R-rT= z6TnL#^~4e~>TQYK4XxxA`|fL5`%W+7Gqd&%A&fUfOsOW*vHrx_*f>@*Sr&i{t@!Ns zH6x&ljj%k1&s?nxH9kpzI~(S$0BZuw$5zMD;<@y0e-}Y4bFcL7XBU0ZN}YBwJS(f1l7EVUr>enUdcNC`)5n4IIqY}@kA*s9gJ$HmwTlZ^ zjJhl%{4uT&&A_1R{G)xMfca-*h$*GDB*h;vKX^?rPx)CDMPMN+@@EzB_~*`}wR0xq zu@fzmNB}w?fg%t(jDTT9G_Gzp1Su_;3V-Fc;6{pRRw}PbV3;rp!Sy}a$>-sT3Nx&l zk9d$8^U>gI{~NMOn5#6~2gSSQspZ3wOPWjiQ=R6VQ>B68o75ha6ZUcGYM&~~1Yc&h zK(3J}ybE&vl|?oE?N+PPD=CO_7OVA`Cz->A3M_ZNSX7aK&4FZOIAdgyPX~3d7#k>k z`td+;Rk?W$cc%|E(?AsehX|B8;8@c(qKoKtu|m-}e@K!_{*QPe;ctagdi(>)QPTwy z^j@r-=i)1&mc_Q5T&7iMri~9A<@^a^8`C_ftU_^+`9wEGEMeTvZqtA*F8y6J6Zaz^ z8WD0yU>8Z~8!JBHVUV@6Eu7HT=`7DhFe;A=Eo;Aal(Ct1QOdzPJL{@Qj1bP3&68jI zO&$`lyvwD7{%M8g!~(Cu%5y??z5(1}L`Ms4v0EJzVht`@%8`}EM~XgdbMluyN}3qKTC(^vn?#ZfCFnCzn~O#z z``bn(;QPZ_$lr)HXk4*#?+wlyBCs9K!ZEO{_L;@;?Nnwn0 zfS=?J{2e^gpLnmfWxM<4rH zTDJb60|GtqCpa5+hcBad0>*l$#54caIram`p6bj9q}EizT%GQodVhzDs_o~O7Pxu=&|Vpw9x zh8-WM0lwblM$Z3!XuR)o&e}t57hH%7ni-GANpdWe{dcExJm9$eZ)l>+xqLeu^UOV9 zLb%i!hI}66*I&z6qkD0$T#46+*R3VJ#8z588VGnD0BHynfxss6~Wo(K;s< zCuF{du$98^Rr0P5Xv~~LDB&UY_8wIHP5bdc{-&m&^wh1uAN}8ljwb}i06%9{q6bzW zeSRdUnj>T~JCy45E(W!Kd@6i=-3X1k{dpsqsblfMmx?ZItK=18wjX;k7F!UA=iY0Z zbCHZ8Ot6g0wL{mUH%4{#bOZ}27q3B|2C4Xy+F;duZd|fuM?iXgC7|v z<$Z=F6n2d1&|`45g~w6P$7&btA(h>Xo~4xUpMS96UAFX0vum>m7cJCJG#I=R*VeoF zM5W9lS#*bW-O8heDUj|#L30RsgChEt*1OFPxgV?zVeyxTZOOg~Vn2!#&+#4p(&3N8 z$_3%0IKc{UC4Ge^aagM^6|~mJ8(RiIv;E_w&3a%rHrqn2jAw3o2Aa`Fk`JsAHju?< zOu`dHA}qLV7zWbsfxRl8NU(UOaR|u)r}oWHkDb0F$aQ;B>n~EvrqnmGq1*v9q{lRQ z)g@dWPB||-jM~zFgnX>>sw%AVaUX>4f=<-u4+U_N{2?MF&-<`(Z=j)q8HIH~I0>&? zp<2FM9xva{mGljMXi|wl(}j255viF&Eu0S`Ga@oR|O~)%SY|cGB8=LrTnss96rxE<^ z{l8Z5IK#QGp{iBRwqJ|qSdnG{DBFy_6)WLp01y+Yg5otswvUS)ht06>IFSIN+^zZY! zNrl~9^VXgfPkR{oM>vFSltOc=kaPPQG+0VbMQ222n?Ka}0W|@^6y^RR>?HOO-l9Q< z;#(IK)JZx+nk1aPF~N#YkkjuFQcJ|UCI0L97d+u>)^!qgQgIRx1%kX#N?(GDs57gF zVRnqSms;7JFvi))eTYPkEtKzuD7vL%&kMfS)`X$B_Ypbx+q0PW{fKdqNo2LjtL~zf z%Hi%Tx^=KOfE6&CmzU=>l1-Og0#-`#IR6Vz80`$#sGsxh#e8FLq#SX+XK4FwZ~gZ) zW*Al5^@+gH)R@iZXzs_;sQ$W^R}1S z`}VT2t3eMv z-Q0}#7krGF_WvBerUcFC3q9n@f8GS1ZSwS>PIq6(=RUncUmkisw(zuX^S#e7`4ze= zz@P8OxH1xm7jNSWASuXrZ#1?2@R<|Rp@`@XsX0{>%OI$OlRQ05^3rvj;1d&9w&YCh zViDE_Ok?Tf+ee#uj-pB?j)z~}h9Or@7_^*x^txc@y6C-LNqDx7u2wJ9k}qj_S*Fp- zXZA}`m=W$nsEQ&zB0tdfnKyW^2Vx2CN3pl(ms;p>UYj*{X(7h-jQ64m;B%QL+3J0$ z2__p%1K-*xQ>wZr?8Z;QX;_AJjf=g_>)*fu#=00{dUYX_wu%q>s+$1#s;L`#*f1X~ z`@MHhkb$rFnL(^?C^ZV`#>Pe20x^h_f;#rAg)Bon#RfJ0h|Y*MCcO4x(Kd=<(GRpd zHk`T+UXu6D5AEw%5w)0T))x_te}*Z~t6K!}Mi_@Dxt;on_0_4W)`~p&xE(un)SZE7 zQ??b&UvDi2AFFIm zWc1g__roEW4QqPq5#^A{3`Drpr4e$NR%qqPQ>OGWIfhEQ9EM7PT?Z|RZ?^}NR#I{s z<)?E}quGgXxded9ADVX!Kxsr-g@uA_3)mswPK-X3X7 zBMQM-zBI7F^rE&QUG~ENVbZGBkmnLnFTv$Kj1$?dgy z*}t>cOe>mT@}=YW)@`rBEO5ZISq*9ZKTH(_BpkQa!2K5@*Aw%MfYVtt-;b>#U0klF z1lGMNHOWtHFBeN4tCXd$pfpm?IcPNs?KP}F%{&NNX=Oht9G0Z~Ulak3bT#=g2^SyC z`AMrVx|Ai-#?$6;eQT@|T*k`c{QuTtTU0e<`1(t9%DG3C8fgHo{)pUGh0M$1|n}Q@JuJ6;AzL_k}W<#5>UI z-koz)AHLq?v1~hbhYiOsx#6G$TFwl0m43BawPK`p@EV=2kk(GsVjWaLWf`it@DmPpo6xR=(lJM8X6)1==@4vD7ExbCoS7ma#4A8aJ1DM zyJH)U2-MNJMpHzHyM1K}k;zc(UGuZE=W}~=6peh?uwh?Z$b_N~^5O0maiqGu6$yhC ze>N2Vfi`4SKjDf@CV^BA`>FqjUuZA9{)X?N3$4@w_DsCHUcGo*v`FI_L%51hZ7)o^ zM_)1;FuE;3tu3EOlwTBTs)1ODvy<*u5BQf_nTWd}a3G(}1)6^$mdt%?IgKLG;}Jj( z^pVDh3giZw03>X4EIn^F^{MX_p?5_=u1pLgH*7sAPjTQ)t9TCO7x9pi=Am>v zl!7l6ArpN7)N|#Xhw;gROt5RtZ1+@B0Utbf><>K%43)7n7krg`rtU_|ip%(K_Xbc5 zKxDs!;_e^Qmj<7f8jly9bB6=nL1ic0Jqz&2A(2f{a3n_-Oz2-+T6n9Rwi$}CUqceX zlfdiql;Jm%L~suOVhbAAEli|M1|k~h2G1P4xt9}7>#e{|Mw9-P`{=_PmFBqx&{Sj* zZl|!=Aw^-gHj|J&M>u{ohX?yAsv{)20Q4~M5eT4L`cnC0XtQJ-`95UV5Mp2L5^6ly zKQ6Cie5ahGC9uuL8-d%=Ng;xT==pLL>zb;@aie(g9|1*_jxi-evo(zp;S!Ky+10*U zaz>6(p%RMIO2|)yEyT*yBiMcqXJ{`Dy$Cc*(UL`3kmV;=^$3uxD z@qs?B&V;~OEZF!xm-i-s#UCvIVs>z%Ut%ZO*TxDfd2)#uWwTbdU`Fg5y@*0WjlWdsh7L9r<|fhzP#{NqBi0sB5N$e;aQgr8avvohVK zHW}fG8MY2QCcToH1YLi9N5rtSXXA2{(n)3YdKeuze*#CUXBqbH zg((5=)ke1}SLJA+C>De)-eHoHvt&CzCc@UnSPpN*D2^{?Vkv#9mz^O&{3rKPgHZzC z;Gg*h3OWRlARz%qwVTqLK}a9Yb#cNtM(t7(w~|L(;7xB?@6((8Zqt7D5d$a4pHD!r zX$psI|NrWAvML(j@ZY!oF+jB~%9HljKHHB-UdK*k^WB42&KREd*pqtemFIq>nht)l-HKiMYPAlGp&TTEQnC82kKQrG3}v3HM46CWYRUejiEN-;Jmc{Icg*Y z+_>rq=Ij6}vf_8roX#0^BkPFao-F!E+b%mf3*w*qx`QanHmsUpK~(b&B3|6#osdL! zPx_tVy|7S_KCLU8w(2RSm*4Z2XY2aDm*NiWmn3KjvOMtXv8c1?ny1R97}k0)Nkkr! zyG<7H8m`7VuMgkMjG#YcG+JY0rKB)xmJQHGrZWRr^aO`<*hZ)_{=UByZs=G}i$mYA zY=d@WPdlQkl{TtA<{z!DvNFb0kNFo3NIzG(`pvs;mknSQsyzNN<=eNpN$mJ|;S^&b zj7DkKbv5eZBV<6){Oar$$B>jdV?@Cco+wTw?J<%(A{Um-KEe4*#y^A@Tf>}H@Y=|y zs$&V5b{V-z+aJzVKH42x@&wSt+>GpuuleG>av*~j!9qc`5R^-3ckoNW(a}yz=`+(i zmqA1n@n(_nT*Q=Hbtren?WbqKSU@SPy`5NP8`&1{wXC146HlKv=bDzqR+tu%WS5yX~GPfLSo;3od)zO^@5e~5ooZ@f#8q=sj{z_&GH__Te z5#fDkE#D__=2`p(QVM(KZ&bUI3r-wak{)!R%)wyEXjMGEvI0%3Zq3AF&%eBTS+OOfa;&%hW zP-Mn1k){P*aK+{b*(ME%8H&XaDGQmL!lIr^P;oWB#-$}fAF|bi2xLjDhfK6KchufQ zM>F=Kj=D-=s5FYiF9s?>VnvFlh|Ad^yog!KOFuDA+v;kC>G=%)`M)v2?>02K zb&HaiH+F-hez}N|E7k>8zJz)N%i^Z z=h~r<6!K!%)-RQ|W?@4bWF$I-RTvJn%gl&@tHvUPpqI~tBee|Q}4mdqAD!5Gyf`HBs@4IZEKwy;UI|h__BRMyA zN?e3(Ni?K-YC>-Vi_we0)pzB9tS9}z?^D?-hUrFo-Rrm1$oO&b`<+kZV+F(H>yFI*?!q_WVU=}yT^$xR>m_sg$7Pa;K9@FnS;lXJ`bGzC8_~`K z{@`|hT?MQ9R02v<>Mu1j#n*l!>2LQGHgqE7ZncPmwd*m~R#aj;Q{ORET)y<-(&sM`e1~3u z?34+wb?4=HO7}v@3KuQE`8yp(hk~7h0eJ#VQqmpm7gQu`C5Do(4*t8G=_9YD#OC6y zfZ*LULgJ1?`DHRBI8rySmjVUoDiH4FKG%Ko27#irdgBUY6w9kkk~ok6ehKFfQW!U-M=OozI?@VIOqU-_ z#-V8?x$WRE!*U%#IQ-!OrL{ zm=o~|yk@P}dUXs;^C|(L0JQLS--CT=f~Ue7pw#aB=N6H0GH6d zW|Mg2RjU-qyob!>As<`!Ku$?(M%8`^=p6 zS4FrM5>l9;SQe>n)dQ>dUv!Y z29&F8pXajEKR9py^w+x|=$43ldVl`sh*JbQJ79czK+9)OZeFkcI`d4ypECRSGLf-1 ze|Ns~L?zZ`83Z0*MR7ffF)fgMdr_be3n~77E6JZB0Rs6#SA&GPwkY53`5$g^55FT< znIxzV5P8UL3lO`X@3K11u}RK--*0%KE6cD2PW$%AR+9MC1Y+u#fR zgu+ZyO27fOOtem}9;TJ=*S#_@VS2|)!_l4^EZOY8cKRvNQuRAktjq#PvUl()PwO}MIvzRTy8%8N=hk? zYifi3EC{n`7~lR=G{8P`ouy=)IUtm^2+FM-n+6PoNv)nvg&Fb;mXguzyW>!y9m^s~ z@he#0d3zQ%LYZ~m83jcotQPe8e8zrWg=&Wo@{BqyUo>NS5KQB>b6p+Y5P2Y7q6C=7 zWc6q|t#>R{DDH$&aE3*jl9IvZ;&JJjQ7PNB4B1FhkX>wr#SFFJjuY~II`vB96J&si z^8*{Sxz;!@+^GGcff^p^2}peph8zV!N0;9a?R1_9nZqrX`gKj%ajBbU+BQN6+&a3; z8Z5mNI9n{8JdvfL-C^=sB}DY;5p4;-r0wjw@wQSn*6tM;bxNo4rpscvnucOxlll1q zh+3QwW>mqD$+;VNaR$c6?e>+mDzt~TN`Rt+CqMYAc9y2lMYEn$y7bHct*qnmE8=8! z3w^-`p-6~a3#+3$Ze9`zK6*1teBy`?hj5cLXpUgZ+i{>-&P)gM6fayAF=I(rUAMyJ zYvA8+q}TNQ*nAtQQ1j13Gh5a!UAeG?lux!cv}P~EibZ2O~48V z_|C+WTb{iZPC_{c8_Kwd@w_eSg`d3Gb0AO2<+$>71eCsM-_fKzx*6)WTL zz)Sel#r3%Z z%_rX6KxBD;AQ#s6&Nr<1;@{KDjvRLPm3xei&SiZmXXl+QwH-cpciYG3A!owvQD=8Y z;*~A6+RO>!-b7?TQ9MX6lf0sqeR`v#^UHKK61kAGOKPzJ?R~#Gd-Gm!gk42FF9NeY zf<=1mRweV|8As*sr^#iQbxNF=r+o65tz>W$wPS5*IFA-f9ZTU`dxqP)a^FSSXISAl zYa%kZX%vQ?nzW5zlY8xL%W;tI#$nw4GR2rs81?d{4U7VOmU~>?}^0aaVWS4ty_WB5@_X*&rk!K?=t?Ik9Muez+tm|9%`Y|O+LFnA^Rj(^!_!D-j`=W1KmIXs}u zTae=GJ|&+i`er_q^ay9hL3gcuU(0`f(5cW7s-xcuuFzI(C`a}csZrp0C^RH06UF#YBQxkfyFQqJ}sDw5eybH-}}d-P;XYa4rhE{kbQ)O1g521spN9Pr$#Dp zVI3G0?64S7tX-IQx8B{;NQlWvyP2;(yqw+2!=ZX>Fgxx=aer^HLW3dFsN>atm8QMb z*fnr$ID9)r2S0SLI&N5k(SI;0pL5_vg0TEuLO7r*UWukv-d8@v&`vTq32Ul~bvED^ zvJZ!XkXqaoB9G?l_g9+mZT;BRSpAnN+Mnch0x^z@qxI0~!vskl`YvuKj}LhR8wCWQ zhB9Rs_tvJow|unGE>7EG5R-yv-JH`jh>f!Zk`4PbBH$>$U4>P}_4PS&akW_1=!Vfx z2ZoeZ8FBqmCzvY2;dNk@k!LK=*$zp8ux6=)YV9_hM#&sH0QHq3BhG=T=vUsp_=(|{ z#c6$b41zI61p+5d0A>+$)1y7H@e6aWLJ@p6`h^X`HldC*)Olgfzj0kLv7&_-FgE>B zo(I1kZJ8~vq9ocxU~bUm$ys|nh?CF&^kj=2#%)~$mZ|6oiiPTEk>m>gm$iJZhS$>| zX?$c|%z24lVu~sk(IiNxO)3a}EU2t)Yqd$dG1}sBkIV4|M_fV0L2i(6xI2f>yY;CI z32}i-GI$f_r+A@dTx9I9Bci}{-(IHT$Vai8g(LOFLp6pr*Z`NP3`uStC5~fIifPO| zbcsX6V+JHgii)-jkb2s&OoOL!nqchqCyd*4%#>t7Njq zO-v-n3)H7@Bwcq|yQGK|N~w9tof@J)a?8_s?L}3T`}Juhox-Rt+*tw7m(dN7MLpAA z<2CD9aXU=gbC6zSQEFmv>W)EDzvHsg0S?0FWYa>M_Lr1*iWyUUi`~|JO z)%Hs`Cpv!;&*`-7j6ZW-__S%r%j@Ea*B|*OuiL@C`D4@t`x-Xz(xI;Fwc1)MajcC1 zfWD^l7cs(!E>Xjb)U}_)T-m5zdWtLRb|=#P-&dzCmvdtE+Mc_Gj(ULdp2^|!d#;-vxXhr@CKq%w_^R9}>d@T~bp)VR zKN?jg~kJN`+`l# zjoE^eg+opIADFo>Zm{^`hC<62XnlHQyKbki{%cwg1FDY96$O9^_CGXTb95wMu#RnQ zY}>YNZ#H%|HYZLt*2LV{wry);CmY+?H^2AJ`?Keq>8k2``>XF(7vuz+bLg4`hf*G* zNfEM`5_{vtptBLM(F4dg;a}|C>2r`-sTwK`2hJgrg9v;Q<*RXJNBX~CXPNzKGt0CU zUEk*y*NWrQ!xc$Ic4R4PQumiD2Kv?TugC}a1%q&xE$;a5&G5qB3X@J4Kap||Y#ejI zN!TmF!Frt^cE7O_&|MqC_s0V2O>1mo?XE0r3?_-_YJV&m&-NCG!ct`qEttsB%O(%V zEo*dk7KeIi$t^8X?#J5-Q7)o&Mbj)?I9y_0J*fGUg;BU7AB@vfnGu2jI4&4{6y)HB zz9si{Qefhqx-c0|Iay03p$7kLqt6@s3z7WPNd;IC=TYk7{cO7FeI;XTMmPW*4_(0jJ)Z*HtTt-t}W0aaQc3;iYaK( zN6bAU4S+0nr{8c4>5Ht|mus!Lnas0=$Khj51r>vf$+%@K2+hhb3|EiGC18 zyC|ceoS6nkmP`+)ZToER>~Ezq0GJs8m9c$Up^)=52}B~q0<*ZV9xrh-n>v!LxovM9 z8kHQ+Rtwkdt<vVE&4=&(JTX6nLn^CJ5yBtMQ z%GlV2;}2|$eIXuG^>$D0LJUfE9APH<^E4CV$qy;7cj|qzbFO&Ni@8getb@tu7q#&* zdd+#NN7->CA5&X}9P?7npj46Ml-Ns#fN%7e#z+KibR7mAdaA!H1U(BL*c}Ksu~#9j zV8&aa#uJR)814bTOYVNzwnaZQ1QUdj(3s0qz08wKo&S-&eL6e7} z?@dgKrg66ExlY^grt{A~jk|ZuDmmPLv=Iz_#SG#tC+<8bE;T@Z$l%ur`Q27DBpnyva__yI3sk`d5 z+D0xIPwRdi5Gr{+Wnp?abOAu4l>Zsy(7rL(Gnm7PlBN?w`G7I?@G{HZ^geW$WwD^o9=CIi`fK<yZ3GSmFZxPhavlz4lqQQ$S7rRNd9N7TG7c_c+oOE8ABTN+k}@! zSjL&9^youvYe>RiLF>28bKDxGOD)iAUjTaT(J~L_u2Usi;XOf-j0577^+e+z0VfNf znNveMYowaOybPdp%UeW2cHR<4MRJW8_D&3%R-LEV<0Frhd zyQ=l{SphMG(1F7p$yu zmk!OSkpkhyLr4A6E^qy}XMl}Wz5YluhC-51JBr_R2pR8;NUP}3WSgv{NXYHeOiNuG zyLc~BS8d9&PhpcFGZaQHR~{g5wXT`uhD8HJ5N(*>7h%li$Uao-RS9 z4hsaW3xrt#o)Cl8-)rTY?UhibpoI2J3&xq&q^b3#CPf?_CzDyB_IYkwciVWDic?E} zadyO6Z$4S6LS8u5md;F5jFGj0%kBTZ+Y=wot*-`uQsY;NSdiaxr5AugcaQRxQ7#Zu ze~%QXK7>jK#T4}EydqQ?Kqe=FOqR_o>_hzLcDM^#UB5V2Y~Ti!5vBw|jf-1y)F2u7 zOLEm95g@dDd8u~~MK-+Q>+ScO@dQG6Z!d= z@9y4*P6d5Iid&OC=8Ji3!-n>Fal}rXfrzl@&GNVg?9OKo-^Yi??o?5b;Qia=oovds)>~}mq07M#p#W)Bt)gfZd$$3BXjdVtmct2LC zYSIG(!NfJa`6{S8rL>Jxss;su*#0=LtqIll+ zAMS3Q;J|t|9G~f|-QVu4ZF|15gB%MHWsP!%Q;=Q}QWl<-OWuyTbRX_--Ksh~dCc0Y zPaYdW0k2P99ijpdQU$c8v?v(81ore1MqRaf;b$qO3eSm zT2>J6jf0(Ov;Up+Yn3JnoMY5yhw0oN^R{BnviX342{26wci5+48_P6oYUE6&P;nCs zFEtA`6C}+=_p0N{H-*mx)2E)=VYQ1cF0aBjVX{yeVz8CiE}B(z|8vhvT_CDRL;US8 z%<&&x?y4?-+G5%4I+w#PGm>-$kt8wCkaE!;u2-k0dSglC-w1n+GtT=`eojaD-wd$N zn0d*0`2aF|v7{eHXDPTJF_5%5P_zC8FhnzRx2mrhb`?g5dn3iEWY}qj0BJ(4*InMm z#|(h%EusyUgcwejvodY0YJ?pft=RuX?v2nlUQRhkg7y$Bo ztaEstW0h%B1$$+LQ+9kdB5eIVcg zJ!85aef7<^(+bsix561*5e>mpEE-CZA$=Jjdg{*LP7h5ac09b~?!6xb7HSR!kigG6JB zML}L-Y|`;3lb>fPtyfxbf1m*cjjVxSs6l5nN;xx{;FQD0@S&tCQD|mMYo?v+^*L+0 zS5N#C_%5dHiCG&nAc+zniAdAVyOq@m;*OZzK6!QKnopYq#2PJZF;G#n`RdYVYSEZK z#WC^e%7WjeJh%=N=kFdCLo*nT=!`Zp3WzKHYNB!0t@3~IW)y@K0YppfxxuW`r4t=9 zvUok;p5gef4o@*ugi(}{n=qWJG7wF;y`CA$#f&{VML ze`F9ThfQ{72IMdu72KHR>_YblI%UUQQ$xP>5fFBGo-Y#G+kVR|l*ix&jrk6edA$Cw zam1yv%ckXr78IcWn;67Dj*~%h_Ix309hy;5q(l7#?mBQ86w2Yc>w8M?CPBFXBNZ+8 zM%}8+yMfeDJ>c4!p8RIu?uuXK$;YBFtQ8nTf+-`@(4PjXt(|c1d zlE@bsv&T!c3D;$6PQ$9P$E0WHkfqg)F3Uq2nowDXbQXN`qg=Fo z(mMtsdeRRGoOR2i1^2D0&BqqYqncJyhUe=#6OUdy%cGHJQij&;&_+|IWNmGSZaEGH ztAF@}r85&+s}V8V=Y%iak?{*Ia(SL|h5qtrI8cxMz{3J0QG1OCjUkg6i z>sV1^P5S{vJx!x=j8_#Z!C^%Q4417g@TjyFhTLwSYO0wWQoE#+5~lCfMUt-K~fvUfHp zN&pIj!+Ms*KMEO}Qr4<>XjVG-ct4N#-162H#PQ&|*;d2w1zdbWLyAgsMlFx7Q+mtf z#SVDkPd&JTP@5YiUEf&CkbcJzCu^cB5OdlVU?A(`@SO3gr3Qigsw z^E0nm&l`*Lx%wV3OGH|XUWPIOS13Oc0blU;A10@wao(yK8pXuZf*YLJU0^1Ly5Ugu zZ?COC^*xJ=6g34P#dm+8e?U~JD_v{*s0g0oCmfBgQ$K%L=~jb1qCCsT@8V2a* z9S1&U!LImwu)tIK|7KX6O!Uk_kk)6obzH4jp5TBt=)H*379?aPHDL~{Iy~JhY@b$+ z@EN+Pl4$d&z%JE8yv3i<+}a`BDeF{J&leKH7Q4_~RQyDi388>ai`~#Igh|LHnbs`? zJ4R`yNw~IFaT?b3Jx+MYl3DQAW&yxsQ|5S}ro1F^2}FzSDkcukt4tZUvjt|h5fQ?z z%ODH?S-x%K4L8n65mAozk@Z?3e;r9F&s(mEm>~S?jN&KzjEpi`C4G}o4hc0*iWtYd za)B3~I25qOGGNm#95YO}L13wq*38N06Hdtecv!LZKX$8t9$EMAqfF z2}ys<9(Jmh2PSZaDizU`ae(OU3Bw`ZTaD=RF0WW+wi;-JGIZ1v4#jVq*Thn#HQEt0 zuD}D*>RRymFLtW!!tjJ(*p3Xg`KXtrM=}P1HokSj&@ZhuOxGspd{66Sh}SrLXq}j4 zYZRSc3vm~g>9kM;eo^-<*}XesH53)!hP#TX@YP_{;xi!wt%yi{Zvea9iyCd&c}UXX z+uIW)A|`|y zT)TO~ssKCIUjN@Lau%q+@F5|L#Ys)cj76`>GmfccnyrU%(2pg4Y4$Q(#a0PUR@!Nn z_ue}L;@NXmxIfs9nQ$>Up}r0&)XHGAGl;9MDbO=DYzRUf*Y_1;doE1BWexbch3%%3xF8D+z{Dxjjrr6(H3F?+HJJ_eFbKqAJ&Ysi= z)4g-|YcDj|&eqtunXv<}$yg4r_x)>GAb&CY_WRk*m9GWhrotx$Gp?_ z)Stxv#8CX@_B2_#bNQ)0qGUDNiPaYxObVyp3B5od%pM>|%aG1%Pn(W^JC~f>I763C z1|-3=p6UW1s=|*km_Bv+*l z)0(D*8n`eMy6wM|;`x(JcJD*FWT@FE5$n0Xzc^9```62-t^>RtA1j73c^H8RT;<(J z^d4T1)0P>%e83 zVc1Wgd>9mSxsQMTejthFciymvYQsAYyhh-Y#rKjhgUM;OO>+#&+r{BY4a8B|k{KB>W@D zIA|^=m)9v@dzjg7!bylBeq-6u*Aq z&e^m|uX+j@I>Q=!UZQI=uTpl+|JVw-6XpPD8~=`UWJ7USe+P9ZbOg_g$1qC>2Y4p!QbD3FC=$e~>vFmfl|Kel zfj!;f5iy`z(;v+%l6wIT{rnSbR9`+3u}emm=kE%CExh@EFUsJ-6&q8h%(31?IMW`y z`E*aBo=;4_)g{~^rSDV@Bak*QIQoHUvVvXt{bOXXa7Kei5Tgn80K z@5)npYV7#*IkZicK0{WLG2{|)_-j(3!w3|N z4wMNG_3DBKu-GNz4%fPc67^uovPcab<+~g^>*&d={+AC`>9mCq{2#UwNA=9mcS1i9 znDNdR2=M{W)?F&d)CJ-?q`wOv(yuyyvVCwyjC}VlUJ6uoS5LICJ52&V0PrbeQSjF& z=)0Lm#XXn^<;4X9DfOK8WsxVbJa_~ zQF#wrlKu(eF=$@~3TpDaWFZyDRuDA!?ckQn$1KYDDa5jf7yAi2)v5n_E-0(mR_vDO zV>k;ves?E`7NVbRQY#s;5>>oxVN7HXyQ^mpON#;8ATR?0HA4K1ap=MEiMaPfZ34i> z(16s+X-FTjA&4JwA46R0e4_qL}PG(PNmsSQvTaOle(0fScUU<4zeNe=!v-iJ=iTwXrmAIhB@ zsBc6mi7?qy=VuQP&2L}0cR}KipgZ79I@WFXSi0KH_;FS9Nn3=F)_WpQ5F@-#WVkHd zja2My)8w@N7T@u(0C~mVtPd+2>v41UW|!0DcsjE287#9e%C=ojqc-o#apeeTcHv+ZFyJOuSpJs&!9OB4P1tA1y^Sk}TBo3_6(C6Fvp9uliJmJG z&DnfWpRmMq0&S+0B|dY@s!laF(pdX`WKrOjbO#AQcc6P2&_l1$OoPzAgWaVOmr$CA z;op^cG|UiNFTeNcUxk!yrjLQXK>wpwPUYCYI_SD@iDqJ*Ck3gG%8aBfUxa_dd`gl_ zGb5BhKm@F~aoXycMnCHuNI_n!YkbgaOY~AK&Q+NEgVc4kk@H$==+_LpW6s2QTHSZ*CmReNR zz#r3^xYNUl;+<{V=FH(zfzvtq#q=Jpc1RNY{Ad;n{T%0*3yC3Nf9Lyc}&Nxr9MUu6Q1B@F&I zLxfy8pY%gy^KY3~?IDW>`N3*b7ULrqnkxn>F0)4D1d|-vp(6wiNouL*=LwOMe^G|b z2HYHDV65>PN53Iy)ANVET7YV-)ad9R##i}k3EJ4KP|W^S2=2pF%vrhG{^tv50U(H! z)@T!$FVLV!`UCFb)Z61q$86wX>K z6zwUM>8COi${g4{WxswHvw&y>s9Lsxg)FS&$z*+Dd7hQphAU8^npfjU@lN4S!A-YY zQfc1BkgdChRzVWlX^W>`GhmDnm*ZT(fk8&l*SJp0;Z`-`9F|4NqV_dmed)xr>pwdc z$`J!tRBRa4!Qvudzmx7P7VRe0Q!DLV|7=;M1k~XPeh!r>Q-AGu8{fkN?z@s5POsA` zWbg1@ON8yFQk`oOfu*;^F~RuxLo(NU*K0B-`f-g%2AF%fsm;}9)t%}Kq)M$)S)kH& zv1)HHqcsVn6SS8PbVGY>#;HV^-N&_TK7>PGc`O5V&EKDd5WSH)l;sn?-b!Z`ajNf`1ciGwKPy!Yi|U!H6PJq0@i4S&ok_Mn{CDIgE@mFGpIw z2^Zcg6$uR63p6j1p52{n9Q63Z?SkdPYenhqmCf@sZPl6-HAGal=6FsKIw9BjmS?=h zA4z=;dW!gekU@SHP(v@>lgkDL)9NjMN*xG5H|T%nrLe9=Vh?5_yQa;!o>P2E zRK`Qxdy5IGM5c3?p_PQke7h5FdPh4OYV#7WwoOTE zB6ed|mvzIStnauVb%?Hg9~PL1DeFW*yHL9w@&^SYWIReSXy1n{P>)!ACHXn^06Q39 z@cRXhbj(EYs?D>me7`Xo*$g**9%ofrks?R%GHt2pX)L`y5($5n-5jut{pH3k7N{IR z)q>|L>PEtMB`a)YE=1^c&nUrZ))B8{Hh)+FYVNhZir1G+@BQ_SFMN*y9XuJCl9jg{ z0RqMx@JIJCP}v=>>bVNKG6ORLm*G}5%0y?nRZk*T$J8rK=4{f^hLq~M1v={(Qp&#Z zdNZ%h9|)o4tgySNbN(cwIoe`9PAofosx9hmqt*mIAqmc8DfxVHvlIutV9YLm82gj>`;<_3PRh*&&59h9WA zOi#?UHIVm{OFwSdMq;Tx3PicEL6oav%mfA6Xs8?_h~xHR@}4%hW|Ru|18GS4WIw(T$OBzOR$9~vJ^geb4S{^FNoS|Wfd z0tV!0!r7Ii7V-mQK`qVf0Jr|z1x!O@3mT&elsg(pyEtZ48H1Im#I+t$ZcCp8RAk2^ z;3huAc$^DIxwxiBER@K!zcqR_(hVON+~7ZL{U24yh6y+%l0L>_jQ>QI-|8$W!ofHQ zp>vDq?C1nhSN(nxkmE#f60JI~EC0DS64eC0|K~sRp#Nj*V}t-0f9so;A5mQJ*j~qB z?7AL`{z`0}TqPgjlndGgSGF25ZjxP!rtmDy{5;#*cM%kY0end-CDdFnIV;x;a496= zc2R$Z3Q86$hAS>>o3y8#IfKdaZCu8FnAugyPPZSI*_id#CxHY~{U>mou*=U{wZ@;y zAZa24fp;DsfJT;re5UO`R!A8xK3#_Mm&7fFa%CAUBxEqH`UAO3dQGns|Hptko?=a~ zQbp}@S_J9Genc%qySom3g_xK0MSL+ODGtY&FPZl}_fugtsQD>K)-oJ3UgZd3SEnd@ zA)z+L#4X}Uv}K+;f`Uh+xP3&ru6KpqFGJ3Khy)~naJi>4a))myj^tpv5BWZ%U{r^C z4I3*FuZfhcPhrxl%#I6nQ2m}f70k$9eiZHk6DE$7u{&(EjG6Zh-z-JKHJ!B6xGvkV z=*A$VeU*kfG*O&}Iw&GbwTy8`J?IxfO7%D-|8aN#uOffT4F{((#d0s?Y(X@o@(S!PJm{7J%T|II$hf_@5PpQ_xD-ebF>h#;>0&F zEZm;^h|V!50^qZ00(98JCT$_~<@)aAyjCBAvQyI}!=N<8Wo0QUbYl8wPWO{~G@~-v z;Wv(mFJi&Ba}6~)+H+Jf9>WCvX%bplw_$wkOc?6Sv#A$&C24e}b({G90G1&`PFFxa zDSI)j-`Lh*z6{NxJIR46MX z3@`Hg{gfX$J^Yd6l5&{v!=L@IkOTm#+}IkOLhDHNfO!9tMu@0LLU!HMSe}unt6F0E zTjn9;r);J)q_-jkHsu*HO5urWh&Ex1kmid)MMxiE{`G$U;LH$s?RS?W3#0~2(H8Wh z2*-toQTC%!cPHpH^9hwQvQa)wFLV;lgd)fqF%Ro7=yALysFUwG4ul_8N6vsw6Nub` zzWw4s`ZBBZuIiQ28ki!(@pki=Y$o~hXD)L!vjG;j5JuSK+V7h0U-Io25uEUYES$n2 z={f0z^P=n`gQ?N!ttMF!cX=jh5S)DKrLsMR%JWoa?wJzF@x)3`-er_2$S89{jYom- zK`G-`UYT(~SOP%}Y4F)E@o7LxN5irVsJ_9>f-r0J+j-@bUTQoNx_-wYiwE#{uh}h4 z?ri{u~iU1p|%fZweaJZe>#EY+pE?FpnI2E0Dn{^ru3`$1y^ zZmCjOEK8Ws1_ga1pU`Sk2nB9HKrcqRqY(_Eb{JD+14wUWKTz{@s(88NpaOqiEn0>4qT;>bHw80^4TTn^4k+0xkxhwPvPp|*Ce3OHpKty*skR{D=1SRg zb5x6z7ep^sUzep4AMTbmenp3c+{F}wUd5_z-jk-aHV_NIhJWDEOaPH)QV?l|&KL~C zTRTLsFaSxz&WHAl?KZV0zik+N+A*GMl@`O+gk$TA4Uv?(VCJ(wif7Z$H(OCPHz@5!ZM7ze9bS*WH&-Q$-n^rBWjmuYH>@Cm}x4oLS|(^L&gV z4`(FJRv;;QlwpQTmZ1$u`c`3-UedtTTHLEVkG^mAf|DIbSjyYNrSRY%U|2|T>416r ziZS-=MhQBCP`L75Le#}9$ji}2P?Qo0(eMJ-09@!l{WEfaH1{$z*mWG6g0gJxrJRIn zkc${+Xlv@xs$Waxqm&{GXdMow(oUMol3;;;=r+&e$~mF%*?k}FQ>Ank|) zlUGi)0ZPrVwYMBCJwWMc_4E8w*~6XPU%cIQ`)BI|qVV&XYUnDR&6Q?dbNr8ILxO7V z#}g^lyiP)lLCsZo(HgM$r@=kd^KcSsQ>@=pkOEr-GVK8w24-XYPN|X`h0L)F?ewP+ z!ru5iB-5uFq&v%{yP5UE(|eB(D)?5PNvh-j0InSgX$M|p&+*@`{M{k{aD-vOM)c=r zqWXl&pR)6N_-~T>`OctaGJ*yAW8G|9}lqAwsxyhP68VrvbCY_epDmW;Mpw`E&=XTdsQ&03k--vA4!p? zQEx)Ux=MC|SaK4LQ-S~+AB^p~cMe}{_W64GL z*}WNfx;ro>FLL5)W%VmUg~sI>U)z-%dlJ0?L{mr7M_L81?PMy!>w8BTPnb11Z)K~v zQp=r&t8OpICv(bGU-Y-1SHq54SHsECl`{F#Q81hZC|Yy1+1$FH9>PAF)pV|#%3B~O z%K&n+$Mp#*G%Q%>(}Vy{|EIa8T73oaHD`6Sk_q@H!r8znn@e83*Ghaia9Gs`saIj3 z1CZtcXJQj4Mx3eO-sLGQWt);@m&Ae)R<%tYP1ALNG11J7*|*2FPMy3ra-otgKveXu zUao-Oyy54;@v0}QlYCVUD&>lHF~v=iZ3O;;Axtb^g6{XU{zbsbWMZm~tr{q>awckM z{{#M6z5+CZny(GNi#$GJ7hnTPj%u|TwlpStl~swcmc^sm?A423jSt z>t9LYsnI}?TEOp#oeaoglbX0>BRN{kFq=@hHS&3kMvVR+=hS0_pD>&-CPg#GY*acU zK(3MreQ(|pH@`LRF;6l;N4}%p6AQi>gC)CE^S5UDnnjXNizL0h&htL4T22Tx{*xDp zNs6heO{5h7Ms;IDZ+fbzurq%aK88V3Lm&}y24wmyvGEhOom|-suVz>zo0`fvRg~w- zXyCZ%6nbSctl3W*jovd(SlBq|Jn&q){Ab&FiKxcM0)G6o=n9x-2PnUXk6X9Ko>TO3 z_gRMB_*7RnT&ZK`Crq^fO={JDUJ zaA0qNWLu6d#F%JG!?KOX(g4_8)31_hv@Wuf$cMR|s&CnW){L|iuiU1*iZX>%rX728 zvr@6AwbJ^w^YxXJ>8V>AUgY2-1eq})he!Z&h+F9)iQt3TgZuQ!NvbQzNnZF8kvY`j zLx52moPiw7tx6ZSgKYrnxyyU|vq-}A=SV-* z!HyToGL;Q`jj)0^G;ROGf@9XUoO$vX7OzhzaI~W*fh?i0j9e@Z<0KUyF6}gA=>pEi zkwfnF)z|pqELR*4vos1bCJpyrg9Tg%0{|HK4V7?K6PCb${bw?R8}mjR8x%Mk0e&z@ zams!w396tz!H|3A{XsoLxeBMs6lwE=$nVx?(*CxWg1X-F&^8^CjNp4;D6X>wHb7o8 z+f!S0QFM_E%JJtM1q3T+?HBuJP9k!Qw&r86XB~R^0ZOaCe-!u#{)MY*e@|+N1ym+Q zhHlHFNy%f3d!|r!m{(D{CY%SS$J|MPoGYn7wl6L-nygb_ez5>VE^u`>@f$r%VyFew zVm&vdHeoyoPl~c(*xPqsEV-Uqinho}N`tQNW%p%-&NY+LPdf7a1LPNyH_e4x(hYmvZW3*YXiCNY-wTc{pGwB0*%)YD~t z5L28alVKU=W|DE4Nve}@1#97mRq@`Q|qBBjTp7stMP&8}zDmB)@MaV3i@S@x+m z%bj1FlDfj^m~|ZlzEbky87c0IOQsnBcGi7+e*7@tXo}jT^rAZ;^CI}sJ7y)S5+3hk z5>keqkZ(@kY96Ub!D1bv6dUnxS)t7j)Zdb-F5}|&1quQ%0+`K7OE88)!qD!y9Bq#@RUYr`*A)55$=|Ut|g{O z@y74`jTTBsiRzw!SLCus#7ikJ3R+DlSL;w*n0{>)3^YeH+8bfg5+6fImTur^Bxd`7 zVo50qZM%*vMU4T0`wyksDX)PQ9fq7f%n!+vQI1A zHtUXksX-)`cC&kT8{0}erT&6-nCbQlll$*d@n0*bIy4bK>-A^AFDE55_nfhrcBKeN z4X~S@zjcrN;U-tPme=3sMkryM1JlGsU-oSW+_Vj>;pNI?L1v<|EPvI&mVa{hHKbD~ z-5td<6e)d+Q8uL3FBd?!TWf!1y~?oue|XLID}bF*umP7+!Shb|=HQD*e zlYl3i6qbXFCFe&AP8(V;-@V0=4n1)J1a2y^FFz=aF<`Q52&)LPXW5_@?)(Rf0>Tk! zO>D%cBnm&sBJ9tokQ(dVhgwCpEV}iK;M%q@LdiXc&~y;g1|SMS}i9!3PSPG&?ukWr_lnY?Cyw zaIj;C{la>IJKe1AuxU<0R|dZT9mcrU0~V0?VPhj*B5;V!8D}iXQ&?;0?n1ijW}>YF zT`TxnEq>F?aJ3opWwJe`YV%xXv6%`iHx99wK)Etw=co$fBn!WtEmI9(>OOCeTH$ z;>Ji6{l?%{Bv8r^)`|g{vEr%H&5hLNk~S%XtJyti~*#cyz6_mK_bIbR}F2l)4mQrn-cZ}`VekJ;cGC(N!6 ziDzpyBeJC1Zf!nJo2{uIt}43r*dy>6S{-02%Clk|J@2uIzJCGqUyi`cb&0 z{jF-fnGg5l>~-7yfYjrXEXDH@iU7f7L)|TYyylW?!}9RdTk?K+7?H}dQb&d?`oVH> z3Skp1UDmbyCE_>xXeC{IJsa%T_`XsWq)lxu@{f~mfVHLw)oso=8dWp{>I$Go%%HPO zE4ogaBL2Uiq(t~Xm7gt$-=jY)SEul4octSH%)L4Pd>a}l7M*nc=`k&$-(}~jdbli+ zxXb<2{--5EbDJCgrwujV17I&PHc?jl3x+No4aG<%OdKEQ{HeOCca6tYWz!=Nx}3*A zIUHvh4FDOTg8X(pI>2p|3oH5=a6uP2=g%*g50WpSBe2+O?Ex5_G&9o_h z%WhqJO17yL&}e_0i~l~80FQ>_1Dtm(3144p!JjhS_-G8<@W>iW#kq|FYH*+%CjT=S zC~9(q4e(tsHjV=8M)WQTP^uP$g@ZHvPS{{B+;Bz;A_3cj~_#K7ldy z08{r)dgQVfH0W}4g`21j%6~!mB^TvCX>^9XH|W74$KHNFZ}d20bQI(JDxG!mY;@NW z&hsSMH9haE(-ifdLVv4$CgNC%I9r`)l9B zuI}wo;&q3r+aIo6G2~Y;kPylOsb$Co2TxpE?$8~rHsl+{kNe84U`l`*Ti+wdd+ULg z$XtR$1Yikk^__1p}JARuzlF>)+15 z9=4uq170>Z;za=eZbyxT-?5sKM)jm6{7^Qw3(B|smBqbR$HsRxZ?O1*a)+IO>?U9- z7?|{=mNiaR%*kJ1HF|}XooZ$k`iv^x7_?1W?q~TQZjuVFT&8Vq%!=67X;@C=f3KG0 zrGwBA6nMJsCS;L+{D4QTeLm}RS8oG#R1lh*yh4r9{Mxs%4O;WWoyy|Lo`fwF{?eZ{ z%TS~<`M-28ElrzmYhD9Kd}k*Cnc1)Lo^ka)wtn(ynqG=^=7Qh@6P4?sUz={PYBc=E zi{2J?P3)Vc(Y-zjwMyk@zbOyz})C-j6S@OTst z&wgeliu8TcKoR|sG9%bLSeka-^}$GB-*`(Vf=!B$bHJ%7{*&xE?oF2oz?22n2rI)G ztl>bd4Z2&Qa>JfrTA$5<;3|0bBp8!X@pkMR{i+tn4+}+Kioa3WC!O#e;j_Jwz9)Kg z3qkjrUFvb>quvgN6!A{oBYLd9(+4r`J#G?X?+bF)osHGQ{$a{VXFhGu3x}6nxc76H zQ`r17S-G97SsA{3ls;ERhK_kXI+J;zUpjyJhM_!e4b30u^hcC!7Z|QE!lGUvH=K7S zBjE*b;ccu3>U&51m4gtMD}ffeV%_Y-ztlg{L9|`{V?>HR33KqnB=|g_*l5>Ir~LEz zTt>mC+)*@{^os?c;{*v!pU33Dhs|(-7kkI|EQRPc>Yl{2(l7G;FTu9b1NSyJ$$?jz zkDlwJ3oUh^WknGGgJt1HtPVFp?_R-JFWKd+Lr5+E54%vur__fW^#hMUX`hY3Z5LS` z44y1*^zN)9gM%X%(L z){K&*jR8PvS%!Y|rl2Kp_{(+D4Bx{mx_eS~NiHjljEyq$%ixIT-BLmtt|rEY^kz5} z0`H}+Rd772o|Bk)yR{wc>MDdV_>JE_r3i_*JAgV_DPTaaN?KRjNE0SlC|#o_y}++~ z+LCjiVXik9aq^1p?bVHscoA-R_SRwK%l%ybH5DNAufsjoZzvaFU-Rq3j%A5g%>U4m z1faJ$YpNvf|J*TUYJr%WJABrCKC0t)r+u-xBk=TS$LxRe>cbiD>rEdA?Daf5=SglI z;PQFWuD28VX0hWBw&4VtXs_r>mNbxQ=mKU=_U~%eo&uE{ybf$Sk+1?`9Y1vm`t~9e zc@!`%E%CSLy11l71N~&yJ`deq@VmL3+@%8^I|bl`%wg94WKorpuu5VA5(teFL)En}+ zqr`JdSyZ&-yu1ygZ-iM&cIIl3q!&#aBhCj(_XTmD)g*Q`HIK*4lA<^S(((xkz` zu>r4;)q0LluU-*6roxun9ZL6V2F)0sJf*E#2H7JBojBMX_4xyas!;FM1@8OYbGdeyiZVRVtPx0dEirL@kq z1d_br8|`LCoh0QoI*&D+w#x&mY|u`4vKVKHl^;v)Sk(?lh}@c``1UiLq;Q|ukH zSkXpzRMtI314#P9Z_vWD6)LT>N^JC>Z`RqUb%)c}l#^}OJ$Kd;n$(NvtFc@H{d+tA zZDkxiih1-yu1c5ni1Ws%_4$_A&O0(Z+>iQb=lAKZuQq82)y`4y{Czw7>^w3&^<8vv zfFyk=5wC6@B#%O}YX=ljYeHU|6szg0BgXp>WQe)y`_imQQN}gOs?QfD*3G4{YA(W! zIAU+eZEv9e_LgBfbkt`{r%-B8a7sFR-(oeBM+y8Bsr*LW8M$3JI3O%kv)X!e)!IVo9AdNuIuCm-I$HXmyPDzU3ca-@bi* ze*0gMlx-D*2aLL0HqRBqk3dcs?sF?)O%6J%+=m75&askh#R^5vFCZPn8M#WOba|2M z`eE)AxsyL)rX#02Ooe-k9JBs<<7U#uzUIHI&H>y#JzM#@czoX%Q>Q;29``Y;Bh=ic z`FVXk4|08Vv-CFJi%JQ%-rrukb#rsJczryd2P0FPO z!|KSJx5Z3;;C(UEjlhZDfcnD>vJu7cI__o>_fuSL&7Sh}AjQ4W^R^${;y|@~5ph!% zarY0cO^1QdfrdfYu-TyhVkMfTBF%4{h2LbFROJ8K5L-1y@?LHj1T!prbh~PzzJgre zUfRdmf$mP9Vc_HA(srM}tIg{^HuZ+E%ky@V_GV3vUXJkdu=}#hcH&t?rwNE)@{qj_xbAL`1#^`{?Tn2 zar%;v&-dy2c>H$y;~;Yg@$7!1|B&A8^ZI!l==OPgdwRd!$mQn&+@GH}5b5dpc|YGK zGcWnOz4&}Sj*sVa`MqDfJ>Ra1c~b8)hxxrf=EqO3o+jnw^frNyo4vgfH@WK1tGnY3 zM1DTs=jOxY;p#2^kJsbSR6_m_n>RB;zG&JG@29up``6{z>d*I!oyM@5kr&jd4V| z*}RXt??IUBlbv5Mim{)EiyiKUdup7xTmHax#HVgdpr*|mjN-sg4rR+j!X9lzecOEQ z9c7O+0uiVH-z&C@_LHS*3j2d8!&4w8H)_qvc52J#vKkDcacy+&vV3N+~cj*9gnvWoHSA09}>^pKRf@K za@o2&ET@g^ns*DbZ7*FdFNge85`Um|cFmbR7Gx=W>DBrKbsLY6v~;2v^f>!k|3B5`N0VtGE!f1V3ji zc=KmlRvC3Hp#W&-8nk7DLTW96{On)SH^VWTk(#!u-DM1lC)1 z7mg&i>(BaNLTxk!qp-Y=PtInn%+#wn2P?XjPBU$wB@pt5Me%CE^sg)H)62TrWGe?E zo}p zHP)Ro()~uRVwaM)3}pzdL??A2+RE$5hnqR>0JDqRqoi7noXxP_cU>-JuUj@Zr*vfD zJzfe%Qbs#7nC8X2lxAG)>MIs^9IKVAZN<3jE^akwn+ytVveT_+G)lQzY5#rbU7W9-6gW)bjYOf=ZV@X<9N zU&3)5d+ui#4ff=2i4bGW#*apMDROAHDBJT5i#xcrR9|htyXUOpN@msY>V&tHeFAS{ zjDG!Wpog8>u0?Vd_7c+?6*X;2QTDr)T!v2B&j4L-CXh?XpCv$*TgpJ`0HVV0FN|gG zoRz-e;U*24SaHfW7$IEvOweBc(QUllX4C@JU-7(ksDS7hpC2xv&*e9e?6TrCwsW&m z+`evJPlej5qk~Q1TmZl?B?1UgogcDZA(mm=YYzI(xNNrKuQIfX4r$Ec7T#!o!Bx2@ zUwx7EAeK_8u|J8NV(LXg0b2sz5Y-BwOrbBa+xTDw!$D<&hx0~50+>2XqNTu{AQj8? zWZ8v$QEMi~t=fD}a_ug)7CmHTtV=I(K1(cMWfozD2lSF)J~Fa49v5#NbAjci zf%PpDf~C6y6$B`Nu$LPpdl;ORqkOf=HV@Pto+TK=j2=ZNrvfzr&$Po9>j}zczU>!> zST_nICsr9z*@-EmNOXPYwMFHPgZzsV>>fn6`>Cq4W0lK59$i1Sn?pw(guwel3A=4 zIaZ7mgY6~j&^~oAGQ3c@sh~QoPrQc0(X|;rUJS>-4kjzLy5(N6Fs8_|JhF^a;89W5 z&@OvAWNrMMVDzCzdZ|8o%uc^9671=8GKP7@_2QiPQ+;a0+$-?y4<&V=sKIlf1g7hLhKYWBKu< z>K?x4VCln8;YV;us8O(zFm20LDE9H@%?%@4KaeXDPEDI_Z)yj~Edp|11#T~t+vMl_^k>`aOsAOCmiJoK zFTPo4fqxwD?C)ZLQD7x7PKp)Cu|&dNbaD$27%&BX;`8pV5fKiX=6I9Nlu8 zITsu<40f3$sn@*tjv{q(&touoN#N`bAm?>k{LT@Yz62u&e;#nexKns>f?A*!_-QbF z-}kVf<$h4Sv)V#O8zkQRJXrk7IZ@ZNs~B&Z{J+NInpQL3 z;e8&lwtOb4oHH}41uLd|tFf0v`{1(jt}KOa)ts{QCsZDDMU3u;i}D-_F7M{=Y%Z#? zT@W-XszeE0sk~e-1q~LFM6duFEwodj4+ODl%AF~iHIg2~Z-R~nPF4`9mYqC1ov@9F zH<_H+q_Nr_iG$9egX_*WrIa|m25jxOSV|$o6K!yZ5RoDf%R7-)4gI-7dv)3zdbTC4 zJ*U;o`N+&`8V?RfiMq2C#k5|exV_wW-2RMPy5;z&F$`2!LD9mYE@Z&!tk!3*QPYu| zQ_k13S-ACCXY0Z7k@LEDsX@6dGg0NbJn8t8c4sRVeu}SDB$C65&6A9et}=e;A#a<( zvOz}H!A}<>R;81i91nZmgL#LdX1DHi2l@O?j${>^KVFcX(QYjp$Lfro&N6!2AO<39 zjR4c`h@<4s#s+B2Inc|w%#eCPK{6x2?6AT`9f>-1m2I&5zjd|t!7HkfmxZx@EgNP0 z%$g`cp48BD_o)x$@eDLUKF6Gby9?OVhX2+4=WJG9NOGp+SV^mZQ7VbG7V6ffsMC_i zyJ27~8VR*;LqSW({B1fTeTvfnj8r5N0v1&yR3yY9e>Q0h*sdK^wwV|9w>m5+X|zVs zo8gLyEd4Xq(7D-<3Km=T@1h4a#ZBfPt^uyu$QPKUg65q`*v1g0Lf=}sGb8iS*vt^J zg0Zmz%%OE@dba3jxt#@~+;{Hcr1t)%q&ets(O0CjSA*7TDbZ^j5ZrSu33_Df=~((R z1FmQsMTM+vfLVb#z8uDpQ&E4ZcV>9+5V*5jO^P3mL;f`m7ooO0iAat3n13?M06dO` zcpv9Jv=Gl8$dYr+K$^V_4E@!okO5e)mV@Z^)j*9AWE=NckX+e=qGeDK)@7BF(!!o# zNV8GV9WiG3L62~ONleS_(~AP(&&{fBX_WH6`$WuwK(f_a{pe0EsXA5aL}{>4r9+M{ z)~r?KUa8EJ?EMs5)XmA(F7}xpXEgsI{kz<8`~jcE$I8LgG;)U-8=$}I{E@GV%S5Xt6!F`N+WByR@@nFfb5&1?o}OI$!K%r2?H&oD08 z{Lmc~VAzdTL4u=vA2sJLEfzU;Y_2rw4@!*1`o?NKtFm3{+PVSr8J^{m?3?NQIF&!6 zH_c+qhy2%p5=Oe3Ioy3BrD67k47*Cw)FC|SeQM-sT*c(`dU>7Dce1T=_Mm1Mxufvx zF)eSEpoQ|8#mk#eJi75&1Y%{tZo_BXow4l|fUybXDy*)%@8eu+b9ZwgnS5vQVhLq! zf}xZ{jipm3jbJU|55l$|;}93QwSeXLrCq=WC2K+k4_;hvirfX&`*%72zOUNq;qS~p zi;Eg;R|GR_S~C`sfZ>6$m;Ko!5a^%5N+$ikuLgSBg$SDhTxlOE7F*o4p z0)Hu#qfJH0>3McEgFfL?zdOR#vYUQVw^U>)rF3BLV*~qiXoZqLPj5^yC()QOvt;6V zoQq-&MQ=($$2quU6r|O7faU?~a{kd$3zT9r(d-OZWFt9kPd?ce#Il~KL-0H7$JEye zg&h88w!Wou^62l|j63=iN5R6dpF|BaLF_E6;Uaww2<0$lxf9$^QP>w`(KTi*%omzN zZu%D`1U9L{=wD9jFm4zeOj0n4*Y{tT8;1bV*bfAnS^1woQ0W-JY7fCwQ6_;OWO#(% zcPsBSfJ`rop^SdS_8?C=RNq(_Li(<&^~M$`Wb<#_3)j+^Frfq_8wRT6KG5;=Bkw1_ zQ?5e%-f(>0X!@*#U;mpWzJJM;-G;ih1%21iO`Q*y6mz)o!fd1`Mf(_&lx2JO8rWEy z)JV4^IoKEqsA?-JB^#Y$fyovpEY;MS4oim~qW~mj($X61tnR|lR2cXq`@adyl-E1; zW-xKg6;qapHpkm9ZRy`Dv?y!|{>wT4<+%3E761!x7*X1QH9Qchpl0XxdZkgh1^L3gyKb zltuhYJe_(v=!*`gu;#LPPO8ELV?;SxF@-I28FdPs)ScXk%ifP0D*ZCX4KXJs9ddTN zDP?3kL6aiJl2J-F^DqY`^D3nek<{36Ru@d*GNW8`YoTci=fB$ef3p%`1bd-UPqgKL zy3u7CEgy_&G6X(gjf|G%Mj=ZN9p2Jl{_sP~ESa+@9yxi0{c%2uEtJm{rlQS8KSH?=wN(Q zqj$~irxL%C-S!^_Cq4!j3_uXB;jo}C{N63Wah*I>6IF4Il?P5Z-;aU3cLqBkla!2`!%GF%@39lqKA|(KuwT zMql3AZ~B*0xvp7xj)THu!ds!011r@T#SJ&gpo(vd3@`mk+YxlTDoheLm%no);MdJN zW+rt`Yq#mbS0%Y0=TY0+f;)*rj4&C34Xg@GE#%mMzhoaHX`ZK9)wO_8ERp%K0~~p7 zWXB$bn>D2kU%Hp|P&OHh#-9Kx6NHtM}E{02csp3j-;P70;VkEqwh!ioe>1-%J4 zCGL2uH2H~rcQF`aHMg|Brb~^}Uv++me3?;Vf5rsiuStqdxmiRWs?7ajf=JLn&UweD z-3B!KPxQw5{rgs-or)LJz5j8fwL~7u7pnBNVI1u>!Wi`rlj(zO5mcbE+TRf^aHtY6 zIKA<4#HiPEVdzomiw`r9R-s+!Ae$I zAVnH3qKEtgCVB{76b_-zC3Oykoi<=VO&Li1Pti0JyYwPtBAy{cFP09sBXxyr)!;q< zmoE$tu_?x^M117IbM$4ia~+rDsy*3AEU~*h6ruy)lUW4LAV=`aAXxVa5&``VicZp` zaA!08(clGpc!E@LHV8g%^{#B_{0qWxx1C(dA0{TCF9%&2$cJ&GI|9n}!wiOiE-OZK z%U|*3ueF*EQVyQ0I>o{_``8(Q9FK;a#S}Cah75!mgq`p@<=6Ez{_UJDj&?NK)M zyF{(6vNqELE0|Ye$?z`Rg^mD1XXw8+7p2&?2$*;%wt%MF1*1VrGP6J-oOoftsd!lt zclaNeY4MD~W5jNWlxZ>o`RF~deo%U*Dvf_Lddw$y6ddXl=I-6Y6OZ4`U{;}p#&G9g z6IkRgZF<48bD}fROVkeehuHJe^Q=3E^Jh#L(#rAbJfyp^tl}3$!69GJgMg1p6{#gZ0IZEhErNH%Ss` z{KK{Z$!VSMo{dGV7#>iQ+Ab8WkctGaBDuzj*`ZPxT$}gW0snP4vcSNYE3=u~gSDA! zDSrF$b=L);6tay3LfKq@^oqkFX3q9iu5(@T|KD19_%4iqj@Qfm)zh1 z`u!4%gE=$q2fPBw^?Bdg_-`3y6jEUT!Yl-P+ml%PMS!KEb3d|T8WdXex=8ZI-B@fK zZr|{&P z=ZDaN;LQ&PShba{9YPKm3&Ik8*8l7-?iAinaLJ^+b8A{1{(FE2Kge|z!aX(fRL?`dx#=6fzu50 zD27Fbh5SS%F;V}d7jzgpEJ`pQKu+6LGMl#0m0 zt8luS&B6)RPl#ZW#&+%XW%LNJN0~I1Ye=OzpIjyEjIwuWI-Ge90L_OF7{ED>TMo_VfmFS|F5MvH#bPR#c6;{n3B{u@=qDfXm}#)02uMO zQsS(J5L+1MM;xxg&jz#{=Kzt~3>CBos}hmr-z0y~$SRDY(5+(Zrh*FQ{uD&v8@x=C zPzPPh(0dbSr<r>BS9&sx75c7e{DU5X<-UmwEP*^HrxGvHZHg3GQ$$NcWi9@)bckP#{gsc z)#Ez9pcWXtYD3|3AN^53qyU>!%!CC8AV6=niJZy0|exKUyOX6F)0Uus7Plz zt9BvDS|<7h;1d`7*`**x_3#E)v8gdXoI&jha%X=_@*um%S#(B~%gjxm1V^5Bjo8U1 z*{=}p`3vYn;v*t=A_L{#K5!D>VTnYLm}yoJj%y}sgxsP=I54m|I|zLw6Krm>zG+lR zl#>@ebAM|?CSjT~Trbf*j|u&@`fsWT@e!g!*nExlf(O$PJMmYicRteUYc+lM$HmR0 zo!!>;7iJFk@?m_jlI+b@1zXFlI_CZp4h0X)qW3I)ZwivK+yNl3fRyw)1gFj#_%S?k z>#%`&-)mxVaS-uCA>UKDErt50H&o)GkU_2v>a=>D3JL2H0w>>lio{BvV?NQZA5ejb z1+6jY#V#vh6Qg-|b`+I<(p0)I#&RJvIQ!!s17!VQj*1A{ish3T9(FnnxQ%*7EazIA zrD01dy)YPW90fq}-1l1)D-X<5lETAw6Lr3Yb6Q+d4mFO2ikQT_;~olB(wrl_D~ub zX>4>()M!HeeO$@+ZLxfM6z1KMr^8m!U*~h>ThwW5Dj8#5X*DWbjh|N47nZDZ;%sHD(`otv8bK-$Zzk%j+6oG}6&O{@`*ZFrRN{36;K z{!UB*$`7EQ7riw=Xi#K)(_p;-L2753rOcMQBhF(hGe?J|QM0No)@_bp-|hY*mU+-I zCbPtLs-xQ|C^}q+GCU149$bl|hS5M&5Ufui%^>zond9FRN|d zCx%hiqRJRKtif34^L~LM{8IxVx!K3Xr zw?bu#it{i8;{tw~iT<4mK@&_Z)Z~Q_2jsRrcXgfX^dN}HQN8@!j$i;`@F#Xr^FaG5 zND`pU^eeWt;z4@K&^Y`AIYo2a6NwHLe<_?9WA5LJj)WQ!TH+H;8!X3^lfw&k3h8y_ zBgWody{?I4f$YEqg&bN$ji<53c>YlTWU{Z}+x)aS;Cv1x%?XhVJwc(yp+>iQs~D9j%BNLBke z)APrm&gn3ux;pSF>lV}9OH@4dC>_l`9URJl-+jSmG?#63kgOY%`;D4S0qZpyU^ydG zO_pn^1!01!c@ln=0uJAtGsW`io`BPiengN)D)|LIQ1mN=@ylj#*sW92>b7b3#Sj5T zj24cMD$QaG2v`k6u-YL`D3pk=5?KnMaxpcv5=v4@-qED(#6WtvGZYuw zO|lLCM8(i-t>@5nt=3*C`cGxSm`!Xp2PL}jhrTq^JB~JAC~vsyKJpk>2n!j}Z)ZIT zaqaG{=4WF4Y-)InWHc2WP&tmeHiY5IVqtfA+o^J<93&rC-J%PmxkcFRLG78Nt(V#9 z)Y=W$K~c1|v*YmI#f82BUCFZO_^y@-Jief}h$&WJWRy z?(24=L~>JLZ!S9jLa^KKXi4dflg?QLg%A1GYRzWl_1J};9Mh~cmdbjTaV814eWlmO zk0u){Pc-evciVOizc$zu-u)7|u$Qn=IETrtg|0m@srVlwhY85zVnFj;2#v5mGw$;? zOr`#5LQyDwwPf1BUe6`o9F>sxU=!o&W@VFdGS_0OWQgRV?5E@^vY7_cp(gTdiOon; zCBOf}9t+3L*djgAX!NM2sB*{*uhaKB@|nU`PfFuQrY2C8tpFG%hcBkEuM?~KNL;%!S)K6K;6W=Zec^{9I%|~E_;<&2D(#kn zRzk80(&gk@&+!7BvzLMAmU4{=>LUNXX?xzpLPwBR#vM;fIu8|VFog{y4;bu`b|Zw| zMJDW^@IyS>a6kwnJ#3koTp0h?U;mD_fYm&)KIR}`_P`DQ}98t1k9O53wggiL3oUGGW zRf;>&PtheGy zh|5*eQ?8lQbZpTz%jA_PayW$BU}Zr_;eV!aneQ2Qb;X7pA%D{poz0}Z#aMqZ%04Vm zrz|?cjj(HYQRgew6{4^}%-<$ZPfHFQglJH+(9i%3s;dy_v`gg&<}`fv-nA$hq^B=T zm@HUo;{dc`9ZvxBJ~n8{2{RElw#>WbA~=(wNmY^xv_=;fCT$vQW{vVtq5!(okJ>rx z8wy4S@cMUEyU)rKXyYtw{tz3*FVCfzVL2Lg`t2y?WIpfoC&UZ|<+)IcVKb)ik@lZu zjw=bZV+$D9bk-%E%4Y9X4)#Boa{L5_klk_%m~xukZ-R{x_lte-#v4;OiA54Ac~kc& zJc5gsHRE@4oFsINBxM>WD24-;3XpRuX3-;#q^J4kaA?r3mbp45YCm019AMfKRl@z+ zzG$@mmX%`&I3m?!r}>mY?vhZ{*Q;JgBISpO@u}0Y3J~~JqSs^)o{}(8zU2NbYt1rd z1)WRHmM%Xi(~`nF7^Rp|r==9|2A1MzIO`TzrVcL4juj#Guj`wpTP##prqZZBpRlc0 zVn%UvCq^GBEq{?%2)n1;eJXUDIj!`~sRFl8iH~GfzO+TPISWiHT@|w>gAEY1qU@N; zEO9D0iOCt=68{e5+!ZED!)xMWs#@E0XC|kE&qrUur5ikHU zV}s-G8YM5ZA}nBY)5iz?e$O56Uif;819qRycT?=u=F&6k8R{Q%e2TU9pc21RSOvyYX;hM*-*kvk6Q0xu zQv*0R)Pr)cccaFob9XdP4S%^}(Xa9-~cRUQU*O|zt^uH)S2lTI+RjQGU&G1XP zutZ{#C+up$SByo(GQ)J8{nmnSBXyGcp@cieeA3(^1nf<3Vu8xJQq@h0>H%3(g?+Vc zV@*S6*TbpD5;O%9da*i7*y&o{O2xU?KB-SP(vD^Ucqgqn*(-&8kubW1mUPcl48M*R zzd!#*oq8dlHLK53FRpdFplE?jd~>>QK&POZC@P6zvXqK(VD{Wk3*sf#_1ueV%3$ao z8B62OcPsaRjC&GF<#~jwZvv)m$KfUvm?#qYJH%1bFKFviB+qK;TTyNlZV=ytDQ>p+ zgwYyS;3{+{z2xE>v#L@?ymR#zsUJIUL+sIgqr&A6M-IhG%rU$nbv`PcM$EJ8=g(Bya5a}#058!J0gyw zW}&JndP|(Oml6nJyzk0RnvdNV1L{eo3^bu2vGM+c;&G>h}>dkoQmt0$TZLM zn@n5D*Nd12`D|p5R6scn^7*^^QBT-Not_sS5+Ja2aK=++s)1)Sn4FyC{+yW}ur||l zvmA-9ZXu*ECE|Bd}(PMYySi0`on*;$s^Yy8WarNF}Y!0NGc#3_suW1wW@KB)NaE zg>mxO;Mh%I@KAdfyWQONM}#NNrhcRt(^t+V3l=Y5w-nMC6G^w}z!%V7CeZpe4Q%-cOV9C_ba*&rbZ=SsZs-)? zp-uvX`v|C>C4Z#9joMijbIkgiMCqUBzD2xG3hJsWHR2Y#roUmV!EV41J?m|zXmf$!Xb&- zrccfzhvHaiL!>P4fDTI01&MkT{$M8u@v=Lv>OIKfEZimIODk z`g_MWd`UOFm->T^FF-R9A3SVA-ib*GZhSUJEWx{E_H z%Nr`D;he&(@f?0BDVYkd$wm~}W)~44YRyJ@yh-@Zvnq>9XDUO0QD6KN{~KF_vrke+ zKh!AZiah)A27AqHc_yvJiieIM20%lk^!*)@SY&gh(E)+2@OqykqPv$ic=Ffm`!G&Y zU;RK?Qu22D-DpL#Mk3T`48?_eWIHWA1C);{hSkXdzP^l9#EE$z+1KmB!V19cEe+6*No(Uo)(qoOY+^X9y8~B*8r3WQ;3g%d^MIw zu;@bvpah>-)^Wuf1aY})(w1NGDvTPZmX9C_uLvlmzTt57y6vx00_5C$I_e&3C_5XV ze8qGh$&4=Hgpwx-$t0Q53Q%cj(l#=E=`o&NIvUHYs!YD75vENHTyj}oIM9|%3JRh4 zP8t@?%>zn-1eb&4&3RwkcDmBDcM{SHcM>uRDK7nHLz|{u?g#C`J=jA669& zCtLkoPOikHP#gd@78FCWh8sU|rrcQn&6(URc9`eI@HR@<1kw%t7A!6^>vA8sxdDG|++s}=2aTy9IGeUraSv|#wwmCb741~eu$2&-?wg1^|Hn<Ghc-CN*Q5g@R>G z^NUrq{$ca=$yZ=Xy8-HuF_>J5JyRyf~T^sZ_M2=QDdByrY ztxW_RqNPJ#JQs*do?X_3Z1~L^TL`V+*f8`lw-)DG?{2m~}L1c*-dye99 z^e2QLq0+8M|3nXx{$9b5I@<70F^l>V3VMJKNB$D(`w}v!Cb{?H*!V=eQ(Lnk#Pcyq?XMe5kigj!YnR&A#R2PN$Rt4@+_n zlA+G21dSf7?1`>71U0~Q=5PaP+*J;?Cw|S9i=J1Jk_9NNO_IE ziY)r7kJ#Z0u9;hw=7;IqWWA0<5JrzXZS6;PL1=q7RmJ{PWd|;R&J?+oiVd?ug#Lmd zkOGc!7L@#U$6_%it*hA@6l1#hST7&-TIPk18kDRF-6K9m7c8|vraWOFLKh#ZbcidQ z{>fY$rYmKn&8AuiF@pZ}DV{5#{wYIFkoJ#|*NT@i7VoIEm$4a3Unx@7YOi+?3{!Ih zvQJQ^v266uNTB#RYmr;cn`F)9I9K#v5*V35l}v|$kGLr^LvFd|-G=Tpvt{efJ88Ex zn&M0ZJp9b)Pnb%^^051M{zxZ$Nk<(Df$;V%ry3c(C$Qr>&Yw0?OAH=F!>sT!E{o5h zwEe$3A4(9c=OebMn>Ly{Z(^G#A@wdFmt1lOwUs#Qs(^t@1z$wWA1huc4b2$o%*KtXDt+BC*$SGHog9rE2m~KZg zpUv;>7f`&dpxWsXs$qQo`uDduBuZ8E@7YS~96tajC4rRHb^eI-Wtfk~ILJo$`lrIVY$nk3*XnLuPw#2n_KGE+l3i$3qN#Rz1iWGyUYKT z#B{v3Q8G-JJ3-@aqTzh4PM+iX}_fk?{%UCY?+LhZ-7_z69t7kBT(T2&-D@CVG-m9C0u z^0O&FFu%Vnrk84>CAyA)UPh=pz$l`G0*)@i+(*v5Q659$A_EWB?|u`8(N3Fp``dCz z@#zq!^8(C=2ba=Uk>zPonL`YF_g#=gNO+W+N8=;Q_T^0Rl_DFG8TfZpI z?K?8%UGNgl4r&zCN)BlJ6Hm+eB=igxjwR_e#0S9>e>^3en>g%&W^O3?um_9jTKzqv zS?vO6tGmb{&G~g{SdLDqEgpCmV4s({C{>Dxda1Q&6?S*fZaXp?f$h8B_>=v4CHVffl-32tQjYc z^!Q>NEc`Y-E6A?s&)_4HeC9w;NqaX`hxm8xfJqfYd!ilQ>MM_M;AYb#OmL|E59i)0 z5hO|H#|J_^{E>0Q(~`w}?GZ|VVKQjQ-yF1pb9_P>xmzR{Z(nrL6mY8ZGm z`KlTp`yc+h&qV7hEXc$pbo=)^gUmo8;s81e{C_z3>%vnlSC1k*A^;0!tImSs|Z-} zPU&!$m)6}CPpFJezeG%I6mk6wvKcPyn~=vw-xFU^`^UMv1+dMi?M%0vAwm3l!?dL> z_HYG+_S294zFbW2mN+@S$+X9cL!>aTs*S?R&ZO3#G=_9adiSC^G#QXuQcxAc2#d4~8-0WBn2TpKavnP`hkUz@@Bx~YfzJg#*vrqH$@OEkw>JfB*{9pR% ze>td8%?N@Zl%Nok|3sdyo4JI)QltULjS&Z*WLz#4*>Y(l=0NOkS_z7~oyxz<7gi-# zrlx5#+B9pw2th`5);_Ea`j}J z>6`6zd3iscukK!ibntSzH~4t=wz^K~5dekB^9?t`PfsrXt!zSFUA=|mR(Ig`WJ-9; z^XwGP$D!xmO#A&gTJdFZaoSE-X9M}ba{%j24ZIc2ku0pMrGPM-iTw+?ZzBo`7r2el zbLZDh<9BZxM*kg6BJ8(8;zfg*E}7+zMd1M1x49-Mv>MVGfNVI-{(2Jx^K^Icbb1+U z2YB7`e%#W3de3#YacIAtlmegki;I-}yo3b0Q+gjC&HdMJ`)_woegHRz(pqMJ=q0dL za6vmPApPU=)0}7$;&FMPF2J4>z~7)yn^g9i>@~FT1Glsja8@ftZq27 z^Mdxwx@qT*zB}*Um1jGIV)i>iea9?k=-mTvKfAoVp8)62Jvq7NJ?%41Iewn*Z@zA< zv^j))2Aa9mzVEM6-P_bo&*$Ukk$NQj-mQRdS(ndSQf^1F{tZux41uetBTj4_gNKTh zboGm>#s=5&jf|)COw)xCPGH+(bb)WUkQ}AO85+;t88KD@1%x2AB z3WJ3E=%#B|r!Py_FY&*?Kh=-L%evm0*#tpR&J)nTVgcu!zZw zbu@}>wicoO6_FmeC>2dzFi9Jb=UH3_kebxCj@o-Ug_gDE>mv6PW zg!;LU64m!6o8(jLNDISXoPt``iW@&GEDL20m6z+v@Mf8bhvArspMxLozR798qm;FS zmhrMBYu1x{@TSsBg5Ut3=Hn&=z0B!fV(DLE&HyoU>HbCfgE|GX-ZW{D@sdgfjq{XYH%;K=t(DmFSwPn`d&PQD+JvoG$89n(VT=N?0L+huKPYk2yVd{M?&D*;aB z1y?2Z{{yK&R=>_<=;rg4FNi2%=EBo6r0^vqP6Zk@H>$Z!oxyNolDiLA)L9I-G6}S1 zW~yeiGgZS4&EGsd!d!a>js9NMbQh!ILb>W zx-?%%g7Fb%SV-xZOE{mBq>pccYr-UBj?`}Qy$Je7zndTXO9W2M85}_C?juRz1REc2 z7*9FfRj^f5XI)cwjpYR8n4(Qv)f(qJw1KnQ_1ri;5qv&9g2fRgGd*+KwkfLQZ7t_8 zDsr)DNr_7*Ld}22ELa#}GGm_8uFWw^-X$2eN{LrjS6$uJ&giFZNa0NfzS6ar zqZ+Kdhi%C?Ho=WJQT3ciZOJi!H#jq4g4L%h*J%)gu5}6;)o>z}!r}C+z&f;XuoJ+t zIfIM?;G{s$Dbc1Fk`igYgoYc&-BMRD)S%sTr$FyO2)TcA_L$-|D1?Ts+gb*^yJjg9uR|L<< zF{de;a%7xkLXOBg;-1#x(upo*#xrLQo8!s&e*|mPiBAXnud%HG1sdEyUC)&NF@=LR zHLX=nwGDp^hF&rBiO?B2=IVD-4(X&3^iL;#7pR5a!ZW2E{?Qvaw79=Lfjd>fabaq> zvAiFvtvUInyJzH>tK>~Nf+AgEayo+%!Ga7%L<=N_o4q&mtkrmdqkw@E5d(Oq|L=eP z_i*1{;()XN{$Kf+LEr_0$oY`O_=s!0=Ml?>w} zCsK6N!?V<@4rgrFYOE@*$Ot}D+H_^cspePG@Kg!W(2{5f9!QwzEwS*#<4}c9Ubh|m zC4BQFJ|YVxVPR@1XJIwWzAS>)d#2pOWv(CZv z6QX~nuAvq_1ssrdV>&oG9Y2Lmbo2SN3g-}EBo{L!9A*G-_;d+Fq9RV)z;lVH47|~* z0}bG0jN^R(04@}q>(2D>3-QxEj)Bt{AKZ=3yscAl0B`th2}6>YruD4S+6HcTEKn^3 ziJqNk#KfnFv6e6uBbkcuwi1TM+Nlb>N$YIp@M*xf$qf#qToj~(5XDI7e*lDC}B88 zG91Ai2@~XSbp>xW973Q4T_cFWgr1g8c!(Hk35hY1#3)V<2@}uZ4pBI4JG{vr1{8ni zFe$o4><$*K^9OhDK-k4@4FeEjlyDd`D)e}taR5S$5&~c{!6NQaBn%DADg+U*J#DYyEp)bHjO&=fK@<>2P_}6doDB%7 z^7S7EAjDwG{qz0DkLQOW4QJJNVuF8`T_Kk_jEK)??!(Dt=004&8^WxSOVf?y(sZ0$ zzI}Q8diYD*=zJ#1{(E1q-+P%(8pbr?mZag(FT;4PIPIW`yjXkVI-Gm z7?;>P{NZ<7DFb7OXYMtmpUp0v=+Zp;&qG7+2$LB;BM`fXFI^&n05*ag9OZwd6J44Q zUqpKdlNmna6uXC?afFD3jpzu8i>DEnPIO5ctPP_HyN8wu!SKw!;oj-GmX}U+DKj*t z`F9UZk^n)7V?x9KLEU14{6O%Khp@~Z;LS=^b4{F&aV@(T}8iA4nQu^)VtvCZ{ z{z26<{Eit?oYN2{8Co=PBjb&C;zP3~Ch7p*NR7>5L`K!9H*m;JIf$H*WbVTw*f)e( zV8ghvS@(5qV3Ks)4{;*+Tvn0dK$wiIB}Ti=8109Ok zB84OOuR<<2e7?xyZIBVg^N+&-gcv1$cbhSKg&2`$tZADapi_JrAG0fACj*#i62asM zMDM<>nqlQQq=~@|)V4h8oWil8k;H(=2-4(ri>N#IfyN;SK}uA;lQyr2juM9CFilm% zKLyR9p$Du?3uS+Bhl30uAvjGT1zSMz=|YAQweCi>novBR-k8I1s+hxwWOVr~;k=^z ztWiZrrBqH-J8d|x_TT$@{ocz|(Unk-0lYyK3A3(__kd0Wn$ZlpG7ARIucS2(B{aij zvL8g%!wv>PBp>= zqG&4%-im*;ELTIXv`T2lBy9i{LntCv9Amm6>94m{|2Ly)dwr(*kJjL-8N5JK1OX$h~-YEVcOkljqRjn^o>Xol> zsvA+>6#e+V5*P(9Rp;|Gkr~h#p!vFrBM@?wu$6ySvK8UmBh0$C*#kPor-?C_u#;B0 z<%PL@O`u2>kzxV5jRlTm3)_n$H-#fV5mjXf^4bPw6N+XzX^BG+f|RKGHsi$S9zw!| zqsgHx28D}QK3e&BCTCnJ+~I6$9jfE7?vP4{90pz_w-okoOH}&+-ta8uFke!Z0kpa9x9SIYZ1abkKt)b*l zG2ENRi7APp4}U0i&?!CEUgxlp6t)yo{wKQoKt%^*yWqe4Y5YF=?* zBCp5ig!9_bB&z{l5u`vgI1i9LfTH1E?C*aOo{^)3dbB(0CkR7wIK1E?DMMpM;JM+Y zyT=snvl?9OQv;Fq3pFpc_c(xz7$w}J&3Hw`bR=W~qq0as_6b%<bF4Q9oc`+SJZjrWxdko+Wry*g&bf8`WMJz}d!D{y&1F@`RuqHl*tA?(GUkrcX z4aXp1!aH=R4MwjJP#*lp!C+upAcY&~1`DO&$rW;%kU_>&LmYvSql8~{JBlI*6I5K) zp{ConF=%{cgcda_6MV(Z9G@#jU&1uHOacIRq!A|CcF14_=k{fbVA8M-_$BJvLY$MzHP_?u*#rSVzwg z3s-Y38;8UZ2suiKMwjs+2qz(d!h~2TBfep-9T$zl{)MDSUs?l_=@~ zyg~eO7|t?s7;zp2l|1cw-t6Os?z2YxI;x=PmdYD`_vPj37DW`!zRxkg{g1fb^@wHa z*OgF>0lY!|2op>qtS2!KGG&8)Bq>B!LLmn5Mx2;1vjd19oy=i@&PIH5hGa=`%9QYg zPWmW>M~^Tx*uJfJR^c&4zyg1RUf{zWA0*#xUqTfIFmpwS_8W6w!ioa#Jxr zNW9DvHPr9O<0nkm?WoNYxiTpPp?^4a+1VMNB*t5!m-?VER<@)`(JP0f0?`VCHi&%Z*UjFOiLVnaZvjQua!4X zc!BUSh5LG9#Ah1#Bn+AaQiQGLG*berfJDb5ljfXAa~Zgynx z!($ECHG1c1UTKR?YD-qVs!O=Uh-Pj((MD#t<+XpKn&TlS9|ZW-5D zi;h3=219?PpxqDpwi-DzNg4ore$bS7{a`{!v@zg@Gs7UQ1OXwwFX(?h8J2=u@9(^Tb3|@(p*;u`1`T76 zyCTAU2%@od1QVgVaT_N9jLMJdBp`@uxVtD1KH5ZFykzyph54Tsg^jFBPZ&RzoF_*}IY0G7jgbWaD zLtlSh!SZ6SMWMiku7msEv*Z<$SE(XDL@gM zpgx1x9kJ|9_~FpNd48fG&r>dxh*)ED@nLe%MYMq$4bBHGs!BoF=7UDd)e6DL=}#Vs zsUN;>eA0qg?u%SuXd1YG-R!7kKttu^AV+_KyjV2}!pJRr3OC8n&Z2IHC{o!&)zS`y z7QuMIXjl_N4b>xF^J42lu)`A76FUb^t5Q-j){PKd-NbuNAxJ~0#D-jKQiygQMMEC7 zWs)t%ixkFNvIwOdbmmTicu`NnNnni@|aq*<`r)^l7QKO$7{I(7x{k@sG(t^edHTX^mz?B3*!?R4#-J@vj$zE#E9b$ zW+M*lLg>q)K7=s(#n~;%RoYw7fX-^PB0@(oWE3J_TZ~r*)u-pk@fPX#OgBNvs?v6X z2&Q1P4*7-fJ0XKps-39bXYY4Wn-A^t_V2kJ>Hm0@i8hB51FkVx^4N1YEChcYN#O_; z4enPI`XLhu-W*{$#IA(m-4}KIQ@NIZJkS|H!CFINIg*j!utd_SPnF_pWor31z5B(s ze(;5q=*vuFKN090Z#nr{L12B2aSLA^K23s;F^B<26M8n_rMNsMvS`2=b69?5A#K>_ z-!E#NQIBh}0(`xN7;y~L52k;SLV6fd!)otSfi^sQS09DBSL}IzP=L;wrpvNrLzf0Q4pA zK7x$`H~dtHv&cAjE5LR4JhkMTiz6A`)b7vg1n2|QhZua63sEg}rVz<;2s|bX1=V@r zR6dU$D-&Op6S4%>BUA|Q3%D*zrW>GSdsFYfp<>5>kOLtQZ>)ddAIKcw>nz2?>a2Xv zlOCTb^7V33AI3pQVT$Mob?aOAr%jyz2XnDP5h%U|g+J7&qB=&%k~s7cW*j)=Gb9S8ND4v*V>O2~ol4HNbT7Ava%ht8l6bo;Wog^unXO5{VVaDnL91 z(j!-dZ<)oeglK7AB+Q-Aq3fuYeO-`;N;C8bH?0r*xIHVUdY;EP5fhxoJ? z*4d)(f9NB)ThYX>gKJ*8se$2en4um8B>*UoLwjZaF*Sd|&Qd%~LpVA77bF@V1ny+s z$71V4wBaq9nxK8x0;kl>Fd?B*g3O%+zs%lAe6*TStNgN|5(3D(9?}H&_tmn{=}vq@fiv; zG-uo|LneP&LinAgu8?DjkT1N{Fag7#5GILwpgP}wf)fQo^e0cSkXkBrOHr~!rj4ZZ z`b~W#hY3Nh7u*bJQ@$t+3z(p++^b44A&YsDMV%)R@fB`0`E@MtN}+1f8r2M z@vwA!i+iZZyv+F2TNs3xkfK(O_zdX|AK}!eahQJ)WS;r%QzDqjU}(?=&Vp2`FmrrD z;OGQ9Z9QjwD=g#N3`GNj08#%XXNVn^ZER8x_!BQ^Zx+1 zGGPHG0tylfNC3?g002m8mp?-R8n=970Sf|ulwD18<2DlgD_ZA}lWBqk0di_)wk#*E z+N!N=?Pd3vQe-)!ipO#+WwQDA+Whu6!eo4RUmK7ZUh=)J>!y6J9z zHt_Q^{Pf#rrG}4p-#&l5+Xu`x#pnh@9wx|L_?;ieAHaX-aqIb4?!eC3*hXNi!(wA~ zOKcq>Dt;iXiLq;#4r6`<%xpP%*vIkr?TEO%!>m1j{{EkbUw1E?pH7$K{&3#xUY|BU z9MAig-SO+;{rvsk?%@lb-SpmW9#8Lon?FzQzrCDxPv_0IQ*KJypBoT~jyuZuiUSkIU`!+W@bxc%kuy8rTZ_x{_N`SI=Z z?)U1;Gk&cc6a;x=JEdD8`vK2e|^||*KKb8`{nR{*z9+IubY2t zo{#W2Ur+G=PlwCy_;PrHznu5)$2a%;=gsMPbNO&lXa#{fH2omOPs<1Zk4>v8;b;ktv7e%;-Ua5J($y*?j* z{o{Q%PTf_@|MU3*@crGg);~Xg!z!Hb{`H?*w?C@(_U&bNaVz@s%kg}v-kzS<=UNNq z+F@Zj`*_O9jBW!aOnGN2+YHekGXDfURE;(D{D&?DpqI7ABR!5Ig{AQ8y&PafFF*uW zbD_hM2UcpKEl-%+02-gT?JBG4%oAg@sanD_k0Zd|0C6I_fw!-=+hG%byUA>B11Q)H zTc(azik=jVHdQ@z(NhxiV?d`*0Sk2XQ#dT$Zaq2S0TZ^HtyJAF`==(4{6Nv|KxaQ7 zQo~`>Uu*c*VTmd&3W^qW5KM=|7PM}J%BTX1p4G{)UqeOI7&`oS1C71~{e*4f@TC3=ACdOhhuA9m%eipV>B3cTm2IUX; z>Ea?Zrc%Amuy+F2?c#G_8wEPEbtjgb?T+z|!I=^G@*@u;-rg3Nj zZ**)o!!S)ODn+}0k*H9H@k1>^2#_E%ao(GJNZ?H@fdk`5G_k%E?A>}W_9QxH-!-u+ z6z5?i@GRT-0ZVMN3S!u%iM60;A<_aGE&8Dr`B&h6B)bX8g>EWcuS*&}1+f+sZA9}& zU6cmL54Nfi*-)=Nf%m3eb=$GMvBnhbT@5y9kkp{~LDgV?cYF+}qn^MOI6!NGA+}qh zJk3gY0pCKiM@U|VYA~@KHT7YpK_`c+Qf65aY^y|p8l*r8!yrk4JsAt&3L{c@!-3O6cE#GbyN$_Z)XbV|8V%;u9kNn`aJcEW|)v z9B4BMGXed7-?wolGl_xRr;$V4wN>8?Y)a;V!Uu4m%Q0IkkrfXvW??4OIL{prBk~I< z!4Nnc>^hF(7^l9Kh>Zsq-I)nxT*7B`0nm>SX|xg{ao}ZNQqu54%^MBE*rtj18Fl{% zTvd8#%7oksgk*-q3iai1;*v=#zsnrcS~t-!=MP-0Bf60Pvy zB2!GLpwX?WY}@e+N8q}zx~|M{TB0BxTx5y~73@|+We0Mq9|%U*m1^R!j=2Mn$cIMq zL21Q*eHv{Nid%B*}*aZ<#n-OXR|X%XEPx0c`DmP^6er2gPgT4-8C@pd}jOz`RT1hnm+O zsH>;pcY@&Z7M(ygwjNB_g4t7@gw{Q{5V{0Ro-I~E-1Vk}%R9v7)bVhiE?f>*l{KY{ z1c9;(T(YW@c>fN}(}f>u-ZEWfy4WjC?*Ay^w_^qYH^8oHVBCkO?&#GN9Xi8Z{*F>*iY`CmoU#>W;Wimn{q=NF9&_ z(8cb`5V0ikMmmAEY(1C+HjAp^aV0v?jf|c3Mgpe5FM;~632ZtE-#akxjrgIK5VUQf zsz5~|MFI!{A0RhkqT&+9zcZ2m!hv~zvhhPLxtXDb`>3c~g$6tr-hv|xwJ>XshjxIG zD2xXe1D**r9$E`Q!O%x;9VK-P?;&GV$9FwUNNyy$p*2n1;B!Gvvp7+=9i z1j2(0-b}#c*=7~QStyYLgQS3ZGw#zx3T$WEN)3zZCZ_;jSir5Jf$hgQ!A25lIBntVCrTm}e9})V#PxX;*}_ zTo_OX2E(m_DjOADh%@KP3RGt6!Guwm1i6){fCm>CC1CQ5vI^p!T*CbpaX)1g_h}R# zLAjVwt9LbHTA#q7QWVOemW5@s%1VL{2j;28548kKriXHxDDg>i$^;I7L{xXtoh@^@ zXC<2A!A1XNLQRR!))kWRS7;Otsc6M*SBbpX)r)RQEfZ?G0fr1HEi!!|7}Zv$=C-Ru zSu9ePq*N9u3*3+?D8bB3BZl`t)!Zx!gk@{fprvC})k=upgG*!*XdlHnG`|=I!i>YL zPA{Q-2j+tzKhzRjv`{&J-?Ym;1EqZe*L@2)gyj!&ID1KO;lMnZ_@NeiYYU!lyRctk zFTMtc9)+q^%)0jy5%J(6g-odNRB8x;x;C0G>ZlMnxHZN^Ov%lzL|XbF9{GIBgqrMz z5Ryd21Yw4%*HLiC_=3GeRUDZ2SNu?mGYbAnt*MkyOkY8d%6is+z8C1n)`N*>!g!Ir zgz7!GKqdi`_e@qn9GOU{zDHC~Jrnn7RFm4cnpC=PD1@z4>Lqc517}DB1prV5*42q! z6k<9igjCL1oCf&$~H#ivw$J)#S~ z$^q}dANpfR4tJMkLD?79Bf$u$O^(8CygNz6r>~|Mp|hX#=SGOB%V+X&3iEZ zVS-Yf6~h4zjAq=#y6HN$As(hsUVA}R`RM+T=L$N;u# z^|FZUUr80Ms>d`{Y%bS>plXRajOZfMgt#J`W0{73x{_%m!T=56Ip@ZI92Y3Ikoq4M zhb844Bqh3coHHyvAnfLn1SgON&T|eaorNRZMYXf3%@_|IWN-|YTp#0-L6wR`ccyOb z{0-4K=*)yeqaZyHwb0N^0l@=Oxm*5qrwrP4L>7k}R6gZ04!T$jg~~KWB@obr9)u{W zgyGD86$z1R@Xjy}?*DEeUs%T;VM6N!Zw&5T<0Q#9aZm3KDmWus6kiXB?i;`aRhI6$=&x>mn~Hjz5DvPy{bfa9CV(umamly zq8xh6gS~+7Q2QKvE<~alp&Rm)g#lHy8F&V>;5iz|oDFwfKm9_qg|I9_q0fmZDJ;iCI^TXpJ+ zo<1e&ARp1?pbPhstHwEO^dux0)3Ea@idCkr?*iN4=5VkPsz(wVQF0A1ea|tX@7cbZ z%^f=9-QWiBrW%Im{tcYM#7Rz&3Q+PZVJf+7tI^^CfUy!(_^Fx~>M^bwx96I7y(agWZ>%G3xRXaxc~ zcqIqjPZ(rt$b!eAI;Sk6#&I<@dly0|+!L;J2f2mZJBkRtbIrQU#fI|z+gBfd+Ns;S zKrFZ^9J2AyT^*}OGTErnL}|D~SntThZmy_{Tsd@xaBxGofz*tSuQ3wxCU|&rAtO%2 zW=Scp3Z3AHeK`6R^@gj^&z*N_rl#+_Q?rWV*hkV4ey~aO}*AvShB6JjXtN_}<+Z3njDBaZYST_@;JsK}_9@@?GH9ALfwRDzOhv zfI$_0+oyDiKya%M?}uEiuOX*Fq(xJYd5L{+_6y2(8xuv%sqF|4KBKG2Un{u9wu?H` zl|yIPhcubP#)KE4(yc%n*IRHZIfqfwf4)_rZ5;6or^UX*`|3>=`oZ&mJToA^rj3N$Q1 zv=BK+>>iQa=|g|Tn(efI{C9>0?-C*rodi|5h26phn_7SME@2#z4!Pd*9OEEefmSwn zjp^zOU8enk@$Lp<9Zzf)#6-YDql_RPVpmV39iel)z(YX^3Ouxi;;fW3qTefr&hSuZ zR0kfK-xcJ4WlVa|7e*^lv${uT{FbQ)9ewv))SG^{=&EDtBQNlO0&X`~?%h->N_Zwo z0gQ(>AIM^HPM)YK9lrW1GQj=jAT!W%M#X|)24FkX49?so`Ihjj#loR8WPtn2or{_L z6A21qfRqRHZV>7&+%5kK#s2{S0RR6000960j8a{5qA(QwD-5q4SNX%;ExA zV)WhaXp$@nSzKkSIIlT3*>95ysOn=%hP#oz3^uNB%+qrGM2h^TOVBeckJIxY2uGVU zN-|@)-w)|Zk8bQRTkEHE<1Qdh>@LyBkAgrNU<_X@OBAYqIA_-(iQ@%lIiHRfpfxM{CG*geh?<$kt4Uiq;h4VC0{22}%pO6gXDx4b~%;igVguCZ=U_bjspc-OZ2!O zxZPqaPWs6|k!$Y$B|1BofaVDAxd9)w&^8&WqCjs^YKe55@`fjRTQ}%c*Hp)JO|#=k zw%So8Q}5W0Y;;Uj_XfkBCz*=&s~z{bdl^G8jVVNCxDs-(CLunBJGRCa3ip(N@@149&$8VE*|+>cN&}1IXCl>S z&VQ!qCJ7Re86n%G!Z+QQAwz%p7=ez51Jlr2xnmLRlYL{`wrLfs52ku2Q{_Y}=QEqe zEZwtnz6|*efx6VWg z%%%{}1?Fi0FGl|SxLZSXNFnuTFIV##W-mF~Ij&b)U@3;P>4NbHQe6=qM-Z9x&@gOD zaV)?{Ia#jczhG&vEh@0WtyQ5{_@qp+!8QR4}Y$R_Dx&kZY1ExKXBTif&xkgzr+{14@8*z&rll!snO zAR(soL5AFa;)6W0jZ4)lRHL%4k~Obd38Mz(1=NIhBhHv|iKQ=S{M}JW&0O zVxdN5_S(PARmR!B-%_$n6D*5!z@!#`eBn&sP^kV(%Eoz+j(owX%)y?=&8oYHLgxlc1G4=2W2Z-D|O*eEuDU&qY1^(U>tQv8+Y5^&6*!?CeqoP2{_m**29#AmFJ6}ItFux+87 z6PG>?8#OtnI#C^kiB}8KDmz}nRJJ*)-??P(IvaKewW{j#JThmf4!VARIso%}b$YBy zH86e|?N+<<8moU-5spDA?GQJASOJ`G3lf%G+mPqbaK!zWJP*h#8SBJeOD+PZ;AfK$ z1p7Ba_?4seBi^byt25lHFmeL7RqZlv68^Z}IL?+0>M4sV4@&A3@zz*QP32XJsDcX? ze^_1>MN7x6y9V4+N^4!UpRa_ggs4e)RUBIf!Yk$VlCM`mg5u^XzFt#*YN*x^n!Qqo zd(hv$bcTjJ*Y?dn;CPf%l0KEpX|W+xkW_YiBXv$)^vd~~&$3~qC0R-fj`~l)5pDM! zw)!gM<(lffUy#AxML0$qDjr`x`sV)mPj!w28qH^YAC3tTlNP@vM3SAW%DvyGa;I&2wtI&i6wZHD2{g(ll0~LSE zL(yIA2LJ%{8vpGo`r{A6kEI+i3w4z6`ind}w`nI47ePXhTInLn+vY)`scU z$MFw;Io8?$o)!8-YiZ2tsMFE@l0{Uv<%$XXW-e7k4gRVY9lcVF271YuiNa3TaR!}; za?t``#=@Xo?+^{gfSj8YK@OIA0M0X9n*1WG(>a*YpOPFIF|~(N%emeN>mHz z9%>SEzK9hycoPk4uDLln?wIj$$0|t_S%;7Giu!mF^hh(cX07ogWK=VV@!#2t&%(cI zFj9v0C-|{8GUESWV3rIPimsNr4LP;rubC%#d*l0!HY|dZUEc~qS8PGU!lxEgji?6F zPa$1%A+29e+$5NW)G%#27Hxm~)zk`36J66wk%Es%w3mo@-rt9%lJR1>iVFyhuTZP- zHwAYdX&AG@r)Tpy_j&1opT5EbU9^tq!ntc1un+`|iYhN}9!7b8&nLZZ{<9fVhRiT< z#K#{&>DM7u3r1p*NRV$!88U)d6A(<(C(Ll7pJN&j$8p^J`B*T6nxlU;4~P`vrBvur z7#9BiKa8(pfBfq|z04oG^i*C#I4eG~h5nG*XG z#eLS%(tl#6kk6-+if@1Jnc_?*nO0@`JY+A#9oD<%5w|yRoWJK4MRLYNX2Dq(vNQ!} zBx>)C${nxl-rMBKlH#oEcKa7KnRCxWZ0XSNxq})>TcTXV5sZ4HS_FV&cJweh1qjY3 zMN2nvgf$9s4r$PI-j%D6MwE2Pa7O$kEd({`5l8U(?Ct?pg$IF{rTdrvrN>AjS+mUMwVRztMH>v4Cw>PR3-03B*Q_@WK!Gdp| z#igu;X2+7Q`KB$VfMzGl5N&pHkIn5PVFG4Oou>BooZ4e+w-%dPsywsD4o)Ok@LuEF zl*;eXe}8}ay>Wjwy+`GlJ$8P>=Ka_sws$u*Q*~sImD0zwslEEcb@v;Icik-~UK6FA z>~V=Nefgen?LS;**>=i{tLrWq%<`?~b*RdJ!?H2WRv-zQ%qMOa2b7-*&Fto(U)A6~ z;hYRFgMN3#Sj zkt|hgNNS8+Fo9)}Bz#}7^>~e6GucC#`vSf8|-H~@OY?2(nF*6eB#v_bZKLu`; z-qkM?nfbf3Qg7!p#HHPie_UY2#eQJ~ypaZn{&c@T<=PJ^=ASC8ljT?vNWcTb{H4HW zZh?!yAOHL}%o#N?HkgDpm|?okIZ_J&!@)2B+>?KFy0gFG6B1d=J}CAabH$J&p|g+$ zu%s&ZMis+6mvdwP2gpoVVgVfo>KndJ2#GdaF{%Vam=r(_)(~!vZC^$QR(`IqcUpqs z_UzjJu9%^?Qm7)+Kw!dDvfK;QkGDv85o^|t$cg`!6~NsVsJ63rU|9kboQKReD1gOl z4ds8>5=)@i>j`PLvOvQSR`D$4ez*a;Mu+T`Azmda+i=fvY9O#7k{ZDUbP*x7w?Mk& zM#4(T00WFOynq=ZFw-)O5rU2F4|pKoO{zlrn~W7YuQ6iQ=PdxQ#85udNscN?c_Aoe zSK5>#H`VQT=uF0jY?+nL>J@T=3F9v#s-Ayz#m8h$pA*vM$K7N6m60KHLdi3nfBFTp z$f(T`{OCdJ>yjaC7M@F(a~lquDJg^)wt#70@m0!gVOVVpU->Vju7U*e@7`eod~6l5 z`2D|GQQ(%bl4(~$6(Yo}dciOy?V*h%%gR_N`xUAtx5b?GC3V@F{Pf56g*Q>^$S_&LVf>h=!6 zCUoQW->q){8UOG~81zTZ-zrC*yAyw$@;(iR)eG?_<=Y*+RK}LJ?I5L`ZcXh@k*&s8 zqw`v*(>eB1-MM`v7K?U|tTJA58senaUIp{~H6}}B^7>CC;(}K`P6^ny7ajqOI)_Q; z$Sc|j$UR#j6>J%iL3vP8s>q9)44pmCs~k78KxG_|NypBL~Tx1vnshwUcWy#@HLbXCOwk?-1_}MvpbPCk`Ky?x-kvZ|uuRPWgNu zTLy-#UE(!SHzd~Zd;CP}??qwH6q`jXn|TvonoC<(>ZO5a+0blY%~Gt1u$@Eq)xq1# zHORBAr!|qYVr6BJ(wX=dfX^Cr6}*f0zQu7{P<5+Y{(ac$FD6R||7th>6PF=F0Tj1h zlmSom3J4L612_o)06;aDPyPWFm&}O*7=K~;A+$ZPm820&(g`~0p2hy7CEDhO64exy zM-S%To@5WOhuEX+Nv23jitN~FCp|rrblX2{i4;p@u^u0*%CCOBNvLs!c}`e5G@X`h z8aRzuOw#$#e0OnZ^-UueD2)+iDIS`)I5&SBefO7FQ4lQ<7v~v@umMEUJcx$oLVt)X zXt(odffJOsScX&Rn=zgs0k?eKjybvp;t6fLw%u(jB4$YdJ%K=iNLr2O+l*(k8Hw;d zi;4uNLeGL@iUe>VUyv-Xs6|efyZ0z2B;qV*Gtr7z($>VPMFBC#Z`Z3D0h3}Rurshb z#ucJN(>B|(k)m|28*pm9JJ*ez6@O`rqqbU<-k>D$Wcm+f_OArDW0*a%SpdO#198)4y(}w*-LCz?N4+*8}M(QCB zDc9T=H;#IS7Rd>Vbz9fBft;Rik(|zEy4kCm+d#6S0vosT0;x8sA#O7~13NG@e@Ie` z3QZ29l@5$_N0hI0M7a`*iZSXnC94#_I511<4D3p!FDMYBDQt`-*MDzC@Dy@=D*@g?DM4f@0QgpjyX!4SaDU%YRH#wCYsT+VARV{cn z!;v`7h2#&Yp&BV^Yk!g5DUh_^FHnKD5G1|FQjjX+i3-6k&Sm4rBl-?_&OpG~3 z+LPU~LW)cjOc4(yA}Fu-NojPTT|2lrc(@E)zn0L57CFd0j&G9=;Yu`gy4)t>9X_$Ghx6iw3Us5t5HS$|Ns8a(hDTPE2To2G)B^) zRfP48nqaN9m4-fKwow#;&sR?6p8b8CHj`93v!mG?JO{&lRXXKzU*qE`&6Nv&=2n9q zr2tdBz9aphP=7Z7v3$7U&7NtqXS1tTeli<_#2*r6{3^7ttqUJK(Sv`k=kBfd-0m~+ z+*=zS0LdVEXRO4$U{4Ox(kkQ&c72TJKksz;}Jqh$Ot70jh(`B#Qy+rgMO{bPl*vqTHt-%o<7p)My~g8hAd8Fx9b0W3~r? zN;_IzAAeX_*al^RtBrxC+Qvml?bIV3#(2*Q_Qrd?PkRl2z1KK`;~V|oe~o`&mcq}O z!p}uw^xdv~vg|sf8`3UEf_CXtRtCRjG&Z`f*WWnMT^R^xODz2r8CVWjO#7!zZ}c_| z?7_g^0|Pso26~obgT3C`JC^$c>Mhrv7RlrMI)7}_q_IsfzIN_97!d*{7}+g7qej!J z_usIOPTy`RDY$d>_xH>3yRDu(XnZ#DyV@wvZ}l8|n-SFxd7YMLH{Y>wR@K%xy_RG5 z;dE1doO_#c$1YHriuV%N=-pUr=sd4)hhfL^_qzvH#~y^%CPh>7YcZjmdre8|tqOwe;m8LIg_A!YatrwVoO-w4m%88QsGCRGS>sguQZ zX#S5&sO_Ze^fq@27}N}>r+*NwxnSDhtxX4i+2Ajj{dMI9{q2$z+Xa8uXQ{0U z7Ck51{b%Kho_UqWBXUKu`YIon>0kvqYG?svK(ez+%g7}5KQS+5U*0Wci|CY&A+J$p zqanAUOS7gFYG86)C4aT>LKKvp6jYGoD+M`$>3`Thc5T&dR%w?V2;*S>9A;&rhA7cgIBg)olNSv$v3>zQG< z-O)g1;J+6CBEf%E1hodV$%NsbkJvuDfNuwlzQXfHiGG%#Q6nDn05nov6D+PVF5wW= zq{^Ezu2SQg&n+hf?OG}xRDYE{s*F%w)$y4U(|n^9t-CYATA1*dgEy@uOUm1OlAoru ztRE{6*fGho2~<{J=Hs%83*w&TDH+PY_GK-k{4)SV7gx}JRpYu%<>qw0p=$H%J9KaJ zkB6nw`$wGlGEP!)4t4Sn23BXAg{ovbWDF{Z)v+x@a}1%9IZ2_Q9x#e@TRkXdlYh8r z^`I)W{FVRxP#t=>yz@*5%1A2urltiiD>7Y#{bg5P>$Yf%;_kuSCAhm2+#$HTyIaRy zg1ZHW;KAKJxO;#g!Gb&Y%@}j7JNLf#l=G?of?i53wYJtC_?{~l@13gm2ac!GWC$39 zVKe1w8W2fw{)ePUG9h4rb(i&aEdLAn_n-r_Upa~q7<4MjQEtOu&teZ(>S;XY6t_t+ zlC(n70}GgWUALa7UIPOpgi8$PpaYBPv|Y?-taBs+3ST>&1i6 zO39*ertCQ?Uy1|U2Rnz2yf?@Wy;NIf%Co)GJ;4O?Nnk7~)9OKHVPSCmQ&7gdwMG?w zhzvOT0^UlE^QRSH^!rq)of&1`Zy0=C$7y#8r7NC-p!7@JbroOXs|a6tW#W*O^g)P5 zrUxwhp+AwA69P>vo0u^<;S896+3JDH$~Ok;g=K5z-h0%Vi_L$e%QWa&3udLuqG$Ev z9afC{hB37R-XfyQX_}sW<5NvmKbf(e?2Fu3_S(c^l_&7(vh$IBpO#AUN-{K9#od*n zR^SF_FWrnsA$XnL;ZkB$fu&(V7|EMDUWWPyT*L-%=>mjv^aKb(D3aM=wBg8*8;JCz zA7JNOe^C-;u0H2D+2B*&%_7Ca8whzpOvmd{;5EqSOq5hbs5ci|>b)Tzog!N#`}bDW z#68L{egnkZr=%>9J}29Z49J!1<>Y|wnzV+Z8Np#nF^YBxQQ?^vsw)sY%Y0fG;`V+u`Z7lv_q$%r9CeE5?V#?4KFfl^) zCHaF51I(_0>kILE$-`&<`n1lijA%v^zu^iJSwvpFCm6f{vsRUD!XXn@u6P5(o491ls$2P1<>$FTr#}X66Sz=2NQ8&J-!-G+)#t9nz5$G@Mu!izSW};;E!6 zVUz*W%CH|63pgX0UtWUnD-)BNu~%~;WS(DA)+ym(eP_W6 zyO-4_zvH0-u@!Y2CLX{;wUHcI(7KI^SQO%Mw)5-Ki4Me8%J~O)`*|?!*U^`{%*JC$ zhatGCo;kRY}?0{+4kzz6-{<$3e85LjI_$x}v%KgaUJXqR9Z z`tewanVK0!b!ia{=@t$tmoNtb^@kQFlbjL@7K}w<^*7;8{PczQ=V^e^Uj(C5^<#>x zDJ7APjFxaYyoQdh&5^i8_JfdLx68--(~G_Dys;(h{8?S4uDIt_{d73b_=HM;cB$Sa zdkg+4EsVVNJ<&02m=DVhvfctSz^J|#B{=-fb@r#G1trD(GKfg1nK4RbLpBew*^aRu~>fTZ&Kf{|BuKtgWsiy%fHh_obc9 zyfvNG!Kk*Sby08S@6Xc#MTff)&&Ba{LASgxzbcE|0BOhK=M;oceu~3D*B)=!O~sCu zL#^jc@o5*!&3Mig5+04(MHfNO9gnBvu?tVDj_ad6Ng``cOxSY;CD)N-q}9#A-!?x? zAiX?itli}Y(l0$=)Fakuk-^;ecncV_Aw6YNo-sqUMP)?d@cX|3frkTdYD6&{7QSHR z9IuPFef=f`DbP&ZFbb)0*HLQ~Qhc#pIPrphK~>MrJ+s;u-0@Cd!3(r%lw#EXt zTajtuwsO-CBRwa4dlhumeGQ+tbu@8n6E{*QTHb!pX(I^<7;yjNaNFW6(S}~_FmT)v zvbNpD*lLQbUcCQ(kTvVx7FoArvqf;aiSKz6a-s<~kCPX$YWWsoVB5oa)WmN;U4_?t zUWA;ji%8}c;Z4{*eXIg50nlqY@)B8^wAGa^^{zVVRx4f zQR;q2?e>x--oy8!WqqdVuMcKTzpjqbFVWEPQ}Y*r4^jOOp&OW&-FM_)JK;2#!(2uc zury74qID_Zs!@_$*j}%5u0m~ultw?i^*)Lc{o(FxYzdEjtsX-Sm||E{9PiF$xJgd3 z0)vZ4WI2bBqqxTUWDjf1!XOjh%5E~6jw!lCK8cnHP`;4gFe@q77fxkVLMb0};&x5t zLWG-7gc{cYAxunP+h%{qECf{!xOJp{-_ zDdlP!y2xR_iAX^+DnC>3Pw2%?>b>V)iQOH{t42pEtP&^TeL}#8N7hf^G^h0&v&#Il z*dL>8<&wmXPHf0ZtBI`HYp;$5!#W3oDXuEfuXKUQ3^QSQ>VqltTiy>dx_4|y| zUzqA?d#WLF5D!qo>9p5fcCQaa%wV3=!DxfSXB`F0FYp(#`iZqFwE zaErc9qj`2#Fze~HMLRQ%!BIJ6N*pLzX)89wSprQcMpo%Z`|~4K&Eo>Ts@1aUPyz7q zP^Bs4q$YYAGvgO5qv>WY`ci-B2w|r-mA2~ER@kcbWc1hipMgZ7+I*tCx8}{7DGp@k z+Br!o*GZ2f5~~{#<*gRP#}H# z!G-v_+arULmOx{qIc1VgjOv<25=)Tzqbq@+dfk}eDu)j(G~?R@iC;O2pq@~Z0+cx= z)~oVt|HzekdQYvN9`Qq^A%9$21|)fUw$+J`J4 zymCla1|YE^SXt6ZGa;}MD3jVj(=M6KY?!Z_; z*$I+batl4>yf7I7wlm3zf3b;K@>fJ-#Ugi{{1+ZlhN$Dw&eF`sy@v$A>lpQwYWF7x zl@KS(ztn{JFEtha4>f%NQIl@B6^NQlE+aW}HLBLtLo$ZI_Q4?m3CdVGpKe5dw)_Yo zLHQSfuk|yHJ1OYkC}tBZQ{%IK9TT~_6q1G0$qbn>;rS8@f?92~NEV)1ia^B=0yawX=@f6rvAvqo-uy79Pm;SJ|HVw9lbfbLq=S?Hy5c|KIP6eF|60> z5S$$3Ebjq+IF$PGtFVW# zhjKt0LoeScghtFEII-l7V?UFJI+KTt(8V%a6>*T?Z%P|*;bo&U{St;YN4nk@77B31 z$tlhyX8MoAmz9c|kWB4l>>R|=I5|#mCXr1HR_L|2%b3gg^dBgR9A2R2zD~pwUo2kxb4mCSho6lu`Ua;owXOUfdKl*-lqfF$FzI+l#^*b0J* zhy_zUS3^Bu^#m_X$}vf##hsrK=`T24{BqqqO`B3c(vA;~)AbEf&t5@Wrpp~biJk0` ztRD&`j!$Yrk#&p6uXcaP#E4D;Nh#>RQYwSW9i~I|1Q9R#4-$bXYd_AyxG9Mr-2O`_ zIV@2P=e(n{fviyz9>I3Oed*6w+n&})m4{PS4=Ubfop2&Dg1 zC@SFNeA$04YpjV`a0?fkTdWuc8^M48w3Jw8Ab_S+(w0aJVkdUr?`whDPEi!ELO0sf zC277U7bE{73#|(Kim#rT4nLnSpU3Mr8v5?l4q4Jr&Td~A^?SVxG2`1-R(8CG*$%?< zTUU*0O+~O;9OVpY7m$rKV7WWD*sJdCJeS8cOFu-uaHucUN+5N#|LKG5dNAz*JRKYF zJ@O*&w^pyfXvMD-lOXQ<3kZk8v`5p%4p{Y#%JYx1Y{jRgg7;#+gS^zg@Fe&3GYFoP z-{a0$U{%{RHmUQ3M=jYUl?J9;BKS(7ejVbrR>d#8l657$seMGNiOqqWdp#v`=XY>4 z(T&)wuxvU6u@evBipDhHyk_7#bAE%063`~Pk?Tvsdt%j5#N%Y^lg000IqE=dpy`TD z(AyajKhKJ6D7L}-g@~z$18gG>C$Pm1{FE;7TwK`#nkIGonx-x45fJP zW_cMFF~(jQyLAJX<1^O}8k@57V^(VB)2+*){V-Et)zOXXIc^!?8LWSQ8+qVYFnP(L z#mtqG+&*sJ`o@QP9Ql1daYnyAmGh@2JYsCk-dzg@pKU0QvFdSse~|frMRUN~ouns$ zF=}WRKiv8{oc@GEpUK53NeoeN@I=#yx0}*(d>mi7Bc^0IVJC{L7Z}W*hWK>F()W0< z=F{R$2c)$SX8TOwj7z8MuHbG}Qsq2#-HGp&uW!jSB^wH%B?eON*15T>XoIL@*1;6V zQAoBx(`f>^r+}O4nW}Y`pka2c&7tSO+rG+mMX+J5I>?fUur1zw!4)M}bo#6{PqWcu z(^t2{gRyyr=$G>YU0E%n*OO}-?r)P}we()(%^i;hDFDVr8l(6D>E$l9VM^7^Lo0$Q0b4t{50Ihi8tYRog4Vwopwav}sM|v)Fi+ zR;4s%E9l=*sPB&~$qHVt+Efnk5iWs%ThUyf@5sELXJEh)neg+Hfc?5kq)1*z6w zb!0ch6LTU?s zPr80x8Y?gqALu(+NA&X;GWOjFA1tUzTWaps;TN#Q9JPeTUNm9#>?L1x7a5`|fyNYjtn36WnPZiD+a1v9 zJ;>%3R)VWyMVwSdPhoof;%5xl%ts}*ANU8Kgu+)lGO$uDhIaYj8gIZptsJfv1LgN( zlZ`xNMarkj8Vch*D5MRo;Wra5EBZD9mNAUCxjj9Q_e}2zS`~9WBi_sh#te!ostIUy ze9H`*mKO6m?`{x^xuL;fB4T*#phV3N=+6@)|bFI=G?2-75$`j=)*cT2ex#m+lk6ei- zvP3XvIKrHX2-kRncdrkud(l@6@;?)gIqc>O>hqzD1xPY4m{F>`t<8c5!Hs3)rG7C^ zO_GcNF9Wu~5i=alA>E$}aB^p^8>zo(&uzud;w@N`(w34gZkAilO9IN_u@le91^Fks zZL?j+y1K+!~~9sY8Z?aUoEJgDszkz9Y7w=;W+_Z@+_ zuRyBvoeF6Dd`ge~yLCW*gRK&0e}%Da&EocN+v{IRc>uM&riiNlqwT>#7oGf+1Ol#S z3}X$GcRAo4rd#_yka72os=l-Q3sMw>E{7Q)kfQ6hU}z)GOko`rgR}l}wjv@Cof)zt}%F zA}o3ev6+@0(`ksZmu164#8$J(Eql*dNKz!z7QhZOd!rA23{dPDfM5B za{GV6C`=GW0sV*n%Tf68&aF%Gpx=NlXwl-Ef5=vLH2c2}ih@_q2Nq9=ZKM1m;b#Lb zfs`Zj)9`;esH1yBjlT}6fsV@D{Lfzp)#*%@@|_0lGsr>NDQK{;DHbaq`Ty&n{#!v& z1AiS<+FCA(%!fCulmB&4!hao5!}!9V?uJaW12*talL+ zwkg_i=j_VUhPh=ZS3Z4(HX=UI=#+p>`@enPJNOjybsj(XaoYU7P>%vPixB4 zjaq7QSU3j)@Z<6EwE=oJR4j7TP>`*0Em^LR_kv$8%QSs!b?oU*ZJ>sIpoSIW{{^Z4 z=BPNitM}*Sp7bqJcmp~j*%Hnq#cIlLjtfZiO^ly!$mSLNNCFIb(|oxetWOAs=L(6Y zw&r+a??3Nm#Ka-Tmocb0JcMNs@T>UUca)5BfUVdxNErd0WZ^`5G&u{Oo5ii`jq5_#t{i)Bba9 z<~nHSf&8pN71;`ZO@MqW$x6GF71SCRJot`LW^&1{~ffcu%o4eb_FF0q ztOEG|C!uJ3V#lh|`2E5z-u42D8n+w?chN@Po=NsDIHLZzmMZ^W5~}bY3AIi6-x7-D zeyU`Tg8XSK6ic0Cv|O09GF_JST5MzB`~}(X_lgj7n;F2hDhco ztrI6QKm?m7t?R1qh4L>HbEcpjqH%_ucNBB{ZN%|VZrJg``*exSKpPBh&HT6(phkca z%=xb4jD}osq@rg}We`60#EqML6@T2WTHpo?QVx{%%-wHZbiNX~i*wN&Z}*EZM0?2@ z_DKNc$tV>LO*uacBphVrw;IrkPei}3J+XAo&3p2-!`4=gBskw>mlqFP$Yv^idCZz9 zGVT|ba8gInRH8)K`{1HR`nnbZz(G=Ebzz>6aev;3ItHaZiSK;U8)Gjsa^qB>v`3Nt zKy--aF57$gV~c=|tCczku5Cek|D(bCddaGX>fNnbWqzu;c;EpG#<2d5wOMyz`8z36 zIMo<>x@7EQ5;>f%1R?p=bf{tI+W~wf-*-Hjv9aTheL*(Pq*D6Eg!Vt=+g1i@s~E$wPd zn3v0Kfa|bMglH!Fru&Bx<00K##TG1klKAb->a0=j$)oOw;2)BL|4UM+Ad+g5`YWR_ z#Zv!6QjZ%DY9JZKTln!-B@77rAC#(g-poZ^5(_OOZ2y<0LjTIB;D(=0AQ{yjq4Rsf zPBq1y?3Z@V7ZucI<`{|tVJI|hJ<%ug#ot^dIUpIu_QUa_w`*#fG%fm1zSGt}GD`gi z1=P16oQR*-Kr*VFPDARijOwQZ$*3s7N)G}-di9nuU>78#J}?Fxkcg>A5Hnq@QA4mL zfoRHV*WA0(c2CoQ3DtM8r*Ie~qc~fL`XGKl!*e0Td}IR2CyamOQ#$DN4J@E#zEDE- z>l^i4yn=S-Bwy6uns@O}%@YO}9r>Yn@kHq#C?r8rqBMsJCZpDnGWx?}S*92<3Mx{3 zXXnL_=kVxQX_=M1hIYD$120=~f(tnd${LqyqWp13klitk#P;ek9ZoJ2ULqLTg{ZB(YUxf{R8;bltFIC*n_5H$A!H zbv+)>rY^SI#ROQ#Hu3jO7m$@I;~yVF23e{2v+>2|-}jYtlW?Z47<#f>9Ks{>`Z`IyI0)mKFOuk^04188;!&#reO<+zcsH&4k*`4%B!OP|K>bFAsSxu#D8+0 zr1$t&Cv4VIhhk#YeCH{D=d*H4E%Y8~@y(@cyD#(Cj6XGF!W_=kr|iFcD>vHXG63Z~ z) z&dF95XN6hL+VyN@DyiLytX{b7UeH5NW>*Z6#vh$s;~zcxFc9}^ixwp&ik1I&>u^Gq zRomB%NEcH@Xt(ESf)r~E*|rqbhI*&21`n#WRy5f;wMaS&Zd(s(=FE&@Rni+`nj{K$ z$a2<2F&sqv$ay{KHxU4FQCdbgetbi?wL}%2^JRdpXvi%_I}Yp2RF#BgWc+m8 z$(9yPp%~?1#%)dZi5hnb{+^1Iwzf>g+I6PpbsYnRDdcb&1HX79P;g5Kcm&1+EB=%7*nSx8=z?GHAV{bY(6hZ(B;aMG}0ChcW4H@c+(AnGVeJPGHd!x2+mY zf9M5Le)J{=D3=q6FradGuChDcoq8;eD_KaPJ!%(!GYrGNbKf0+^|_qB-FQ9ATlc5T z^L3nn{DL&bFDEyN7~+zYVnJqDa-npLQ5;Z8P=hS!2p`Wh$3(vvSXwwm^OGuS88j*- z{@T(!Kq!5nNsdOFjXDkC+s7=m8Gg+T}v079sM!F!yc1P9w4&cd~_ zKYMitNKd|`zz=+36kYL-W!#zd+jx9ozUp%{yl{Qo&*H-vGJyWW?4!VoRz6Wrn$xFw z14fyDs%IV4%N9JCj;XT1R)>}h6u*!oFc}t5f<~lLPc-2Sz*4dGnDr9y;`jdHclAxcsIZ+kTU3SPC)wdtztZ3 zoS_zii1WLMbHeGjBt|ywSI=>|)pSo=UQ#YgsZos-84EK8x4y-k!Un3c&$60`%)$A` zF~LzsH_@VP_{M(b3Ma>~ljWoo(jalqJE(o(jyRbaIM5%F5Rp`!<>qAU{5I+q zH99Lc}_^P(j|~pr~84(_yHdqUa?IFdH~9KN2EKpv9lPP z&wi<0iZx1H(v|Np?hCcTr*9jWXs$07O;uzpbYg@6zu-E14H{lX^D1U8R0-l@klOFf zwiC1O(iDWyDJ4#XP4~%l!sLXFn9|T32X!`6wPrN&G?Dt)na~qcM#+sCgD)Bm>xA79 zYkf%&!m%w6;^^!-sFO2)&b4*~)4)B-R_&W_+WIYsYV)skgBQaIJE%O?zjf26vwRAj zRQ93+ywrT#d~O}|Ef+!%60a&1W3E z;2yjnv4UpMF?D*Cc9(Nzeg{RtgKI~M;!6lS1vMm;I!`Hcv0S4?iTv?5p*ZP1?dFMSg3wztu+V1bw``^g?Ksa<}+-4Vmn`Y zG`*kKuoEsKBq|gAa2UyT=B($N%CI%%zN+-t z+VJ9!tP-mM$n+#dQF-`w{d)E`3lrHeK%|^q5n8Ger9jdQHE-H^G{KeW0aAz75@Ai& zJ(a8zVD}btyjLzkYFJVO={h$M+>{0X{BbmMD61H_( zq`py^GMDK$A;sK1b)LqeM}kf;V8Fj5w?Sy-$V<}YI5@v;l~jtZ7qjIzobaeQBxxEd zvIa@!qEvH-%`r_QI;i%gfodOONBia02f>F!pYtCU)0;ljdA^ntkfeB&7qK>zh%g5% ziy*R^`)%3@rrNW^xeP;bfv%??s-IQ~o(1C%{?k#p;p56zk_Et2I0F(W)T?s%utkE? zh}U=cX9P4V#R_rlij+|mKQ|!MjsB)aiza{v4aswF>$GyW`D3)6`pnwM`?}fjmRfM+ zWA-yw8E?i1e&>;;KV_L5dW~4*U}U_DTMW&F3e*0P z0EgULHQ@%g2(o2e7?;SaTIm@XXx!!rMkIWk?uqL$&Far*k1B{6JZfIM8>_rViKN=d9TJ=X&$fPVIEy)Njsl1>R=kbAw%C8Q<>k^k z&3!ErB2#v%zyB^xe7v?>Fnk*Vfsh)a2_6L&*f--)&bipWgx;H$e6wLN;QFeRo4Iby zonz0OdbroBSVVyZeNOb#@^|c&(ar;Yu7M}P?iFsQA|#>t#kr+D%+Vl`sd^CQl}1B;U@FkgLqH^2g>W` z#?fXynhYHSTY-AXJnkmG`2znV;&`cKqo?dn6^HNO`06#La%U1YE2{Sb@VYPnTJwvE zlPa|4Qth=bPg_KCm4go|&ApQg^x64t@$jQBkF_7O69&-9+dH;1(LYwRZRNhMo4EG6 z9(dRr8{aYzi(uii8Ghku<8-O^Kyvi~xomOMSTc&ozhl>P!?*RITH5!F0ayxzr5S(<*iGQXfVs!-N?H_!(WU&{Dp5(~c8_POdq3KTe4JN~yTr ze(t1-*fSvI3Qe@emW(MhC2BHZa;-9q(%C22f#bBc3$D*F(zq7NYeoUfp4=vizZjX3 zMAe;~{wAj(!bMPNlsI5STRatxCAiTuJQB`9njm0mf6ZXrK+)kzrj7G5WImL@bGJ0; z5np@=mfT3o3VhZolEKgfuIuCyG<`0!`iamWPXr)}U^8tH60z9qJ+dd+XQ?38Rw7Gr z6;171&%l3ca5iFj&i;tp-f=+h5d-P?Lrd45h|Nc_`5AO{JkPstgknzeD;hn?@j@LG z5!Uy>Xi`=!3o8k+XOax-Rh;V7LG;W-^kN=23;a34N?BXU0z&E$b7C!c=0Uk*8Bs4k zzfXY3-_o}s32jX3(T+Z`fc-FHnwhE7!hu_>O&HZskQ#cP{^RCFn1is@4+vqdIk8Px zC;eSEzT;7S*RB@G{$=If8PM10!b z(HE!4$I?=d84>`C*`L9^wvFY)3|>>81!h=N5$63IwCSvtWzh>6E)x$zuPUOatOTxe zHJZ;aU_{h63Ar7atWt-MeU@uFjE{rij zuN#hy+fNz86V~rjzG&}OP^$2?J{*H!xSQKy~9zglJf)h*KSlQGDKV~wI8AloW7M%5*`rx?2-*r5V4t$7Y^1=G~M!`LV#31Wz zg;^P8K20K?rmg%cMadYBeiadIgb@{AA^2wspr2JPNL@q?rE2L#*5jvF@QQh&cHIUN z7jK-*q~k&J)UIs-5d$q~^eI9Zi*?|!7m3Jm4C105sg8fjVSe!tA=|w(;D;l;27!y6 z=4rEbmnM^OFkI%>OV!KLJEz+tm;WUpzg<=ut$;T4djE7BB2KW^Z!60ng0WYrvEP#T9tQi89%26&5ZZXro0KAV)z+1(2+zhbr4BKUVB=U|2FK?l7rLA!QVzoc^eQKdwf(%3J)bcQC*n zKA?rW^0p>RoZ+xAt#PrCk<+BNHyz{E-8&iSY8uVE65ey#EY;<;B36Na878MtNcszE zvu342k3>v1OoR?k<}Vl-K_?1=_VdVuf3$f)Th#+ObXN3H+rRT!-kFu(n`!0rzbxP( zihi_b{>amX76&QMGO{*jb(oGmE`>yK0Ox2;CP96wgpLSRFtd$nV&7O`_&CCQqfFPctN(!`lz-sp{q6Q5 zl^gVim}&%@abu`B1AJAuo^eTYN%WTGoeD05#TUin_}73OzL5&2aH+-?FO=|iS$=o_ zy4gP|`*UH!u8pGcs(}D+bq&P}Z8~zGl@N!PznKon>iJ1Y{*)x?RkD^&4~G3I0=SQo z1eoE;BY(DF&$GP1`;RnO5iaR1yu}PWq;ccQDY5Q?!Zb5G*0UqRt+=(6+U z3e%IYM=1@lwu)B`}cSHfpzjHe2+DlPb=M4}9&f{-zKhE@Sq6Q^WJOgr`0EhGPAtmh-eruf>|M zy|1b5`XGw@=?44kI#<2e-hBi>QCr2bn+3QSjV&iZBz zl_+EWhzG-zfvoRJePW=VpxS`ooM1xuzR)y?u6w!a-MvMD4t3ChXOpwN08Or@Trx_L zW1V~LyGE5QY>?$6?3wsPqi+zWkX_r7@ra<4Z*JN)LU5LXEg-yB>fLlC-eFk$rwoF` z8tut-jB>(*X#EOh-L%S+N2rdF8*Z3lB=>>h8ONJw(+Z5^s~&9HTF63R$++g-uf#`J6UjkefK#xN?&g5IEN$9x)UDT#nS9bE82A+s72#fB=kp!=^zDSvcOH^wJ&8-^3 zgXW*WKZ%>v$@fR*+zPYIH!$naw~?_?QH+`#?X#P)sgo_Ihz!ZcPpH>o7(_$SO)kBon@$PfrcGCa5o)>T-f8XoE=DL2YOirZ& zd^nC*(8Zf>GF3V)&h0iMMF8#RK|uz;jpcT`Q|XJ~%$B%Of##zys3zi1H#ii@1*IuP z2q9n(4+nUqn1Sx7O!rTpu>Y7D`Eo8dTVY(uq)fuxt;ZzJH&x3gnTg>l9rbntC7G|+ zm-juscy|N5=vM+%WKANJV^6!id2*R|zi%0G<}=JV^X8!Z)tF4ko#`!Zqez!QLC*g&BD zR1PmMNsV-W5LcFO?CsZ2Q(Ar2L2CIcd!%f>!rCEi z%Ke}9yd{Hz1?u{Jd)cx&NXX}D=btRrqbdzcqEW|ijZN_^^I`yKKd%S0pZ6;rtTE-^ z^*n#x(6BZ?=6*_V)Ha3ko6UO(vABkNGSs%Ls)b?^$Yl%2^V8s@;h|wWzwFgA==A+d z`56{fn8uzfl)3jor>b_%tUVViqOR^E9Uw=lkrzuNMceda;cq~vNIp3#NsWrrHkO6H zD*bCbVg(hDFK&m|reDvm-M0`*56f@SZu;)|p zvk}h1*swtIp2_se8KF-OnK(>$8#=HoYX>3gQP*lV9n^$^K@+%}C&}jpe^++Vb zki|Owc(PmBAW#}na+rC;w3U6B_x?0!--~DH9Ev6mfb!n+?7qdPnzjrP?@HpOBQS;| z0=~}_r33kjL7%pWx`djHo@g6?R0M`Ed}GIb$#NGBcp8X##px~!)lcYM3GBVBn5{tC z$uZyUpF~o#HpU-534g6JmU-)We_0+U!0e;17O!ZK9PdjFor=rb6|-Q9ki< zYIYHQE=ZKm<@a=vuHcM8oF73S2@=VirwlWZbx7$F;+k4iZ0z*{MyEDbE1i|rFGsmP z#Um@eyWCIAmz!_jw=cP$<0uG^R)9DA*VHR0VM~0K!}>-0@0-b=4+{wso%s-k>027O z9q3HZVPTrj30B+$a6re^-?dKB zF!ibh$M&S!J?tH;Jxo?MV(> z4x}jJ6)485uSA;Hs{(|YqztNOZS6R}ZVLz5Qt|1c@iWpKI%KOhH|BRf<5z2 zIsr6J?L~J%^Fc-}DS`w|MtW0079otZy$5{53^_8;rr3;1{TY3Zp{zZ)-#A3mZ%yA& zG|r6cXU0xGlR=Wb@O4@>Px_@tXW*ZV`;_H2X!|Mu(`D zhdnVHqncZ9u!=A5kZY>VtGev>V<~7dDm=*BQyDJO72p5rCcW;CUdI{^yNLBK2pml=>uI6-le-WW*Bt3fN2i^4lTPdB zS_KW)+cAz68q)S)b{`WTZfDmlHSf=iK-+msv|pwX7%B%%hyy>h+vbnk$|BN+27h)} z8&t>6_~i=NDirF|r-rqkNG;=@RDH55?PV9XYWsceSBFk6@M~rYJE>&CovmNn==AQ7 z2|O1imEtSLx~$}_T|89N!26h4oER_To$|Ik$YAG*o)?XPYsZ_f1N1#y+tzq-lk z9ts9H)m;bGJp67$qb*T-s0?i+v)@KzX)3-uqt2j%QcwzO`lm#Uv&~#7<*Mo^M3b9B zV?wVfTX3c(W4X0Tmy2tAQbsM;>GOYRVRo^6SgoEfvV`70bC!8aC z{!s+*6k_#hPjgL?jGVzly&=f?-Gc-?OaN7NBur>~mUsyDFcNc}Bq%q~huS@xKL(%` zmAAg)DHb|PY6tUdGLCpAzv{Hj%LZ zFrhz;_(-}x;E^07Z)e7CZ9=B-T4g}CC|j+Rb$p1-${SnJ zB<-3agB;SZypc&VH@qTmy`#Za6@GS>V9?8hvn6A9;~ zo4jk__ZR$P_RYA|rW6-9V3M$~cv@3&;Ob>K1Cn>~j)H>TT$Aua>0NNE-HeL)`u$2M zCaKia2`CH!HHXvTh?zS z)ruusAA|(Vd1AwSPMNzPZ%J@#IF%&NnMrJt)kc;AbOx`XEp&2h)h0z7-kZhn$`W^E z`zYxDKV+R{TUKqmwdrnTCOO);u>F(~3i|!Wblm_W;P>?R^2I-UzsWmUS@AbUT zyFSkO3-)as9yp;}cS|GFCJP-+=sD$J(4mH_Qm zd9?S@4`nbupyBsy<^{sm!z-CF+^GD~53DK$e9GN>%}oTx0esAt?<`x`-`<89<`2)I zGQkW5Jde=yXBJ{Fnq6`8ejpIx-Q3!XbF z=Ry}XS!+}MNU5hp2X9$|dj{Li|J|AVl0lEncKn>we}nR(1_w2c(*hX-6qsQ86ZeSx z(Rxvg1Y!Lnh&%n{-j^*C%Lf+(&=dldl8B^VQyRX|;fA%LNH9eA7Y$Yj=T2DotOFOx zB(^JB|A|!3iuKq}L%R!ZuXk@q8XG&G?wDrW$c}6{9UBd$~gv8p7X^=GQieSPRNfqUMu*qb8< z*H1a%dmdZ`8eEzc$@4w04nI$5_RpW_E5RMZ*7b@;qR_RDc#)^^M}JEsks>S+yzrS? zp>vOU4+Qc2s2GT&@87t?0xXrr#8o6Q`37pvAt7eN zgK2XA=5i-@qxMtLsi<#sI%rOQv8UN|pP4P82B;##x9Ix+dSasETjX=0C>Rs1x z!1)zPuTyFR9afleeb0VcB`V7q!rndblQHM1&yRDVR-T%+Qxl()Ru_)h(hw}`inpV` z8aT<#=g+A734AE~Z|9W?BbvVby%9)k7Ec|T@qX@}Bf|VqGDCcFr&GPtQv%U`a28GN z-g%$uJ8S2u+X|P;ZsJO0*m6J(fl&KV*Be>qWA3nC<0LzY=;gFz3(vudax<*%EW8d< zYUKW;CxWA!PUG}trah+P$1(AYdd0u>L1Jlyb}==beVF=*{wY~Kv4;AlRP57M9}3`u zD;TQm(lOfiKPK(bv3M2~nKy$<;q|6b7y}h~6Xt=64shRNw_)~?08%aB-Bm>zkZSeq zwzxnNqQCXQ|Gp&4M(&~upC6X?8`tyw4|S~5ufHP69TjC*eETfle_A=AgmZ* z^c}cgj2Y_4+PwVy%O1lsm0G7YS_<`IHg3}_%+Dwn7UmnpjgLPCzcwg@66=CxaCGD3 zH$IY3BH?FP7Z`y7ea(pRMdpHP-C~bVDvQMd zPAt;yppxNV$dw45@64ptthVyly&1(MQJHc~q+B$liQ~^7Uz^eViEJ%c!b(lRG!Yxz z{#-ENPeI$LV0NbCZ8a!NlhHV_~BK++m?^dq5}YM7Vfg)Hi6Os~hZ z&(zEoLD^82-)v1R)LK7)+vgWtGMlw9k%ZML-Uji93&6WGqzx=2vD?z7=h~E~-4=by zNddpO7Bn>j+Lk&=Iz2eRx^Fp0&no{RJ)#w`y3%(mEi))n`wN_2ErwDaM*67Dc#93! zUc}`p*)W#U_GZHY@Av{P%ADqGtptnCQ7E zt#j^NoQUXxp`={C5Y_=fh1@IBK~HU6I&W;yu%&CRZ{7W?C(L*ftEMJr7leLX7J~@WsZk$Jj1`vxgMRZsNKrFd`qAQ-4=xW(+qv=%#%ecEzFvQ~5 zIHrNW)zQ^>4aDdH^0eL5986x4vUTMlSX?+?vb7F%v-8o%LwEBoq`jV~Q_R|A9gxqBsk-YM zRd8eKV%!O6ObLF0EL=TqIC9cAtWKI7yJhOO5=*j-1Xs}qfpu954jerY(n}Zp01rs6 zC?urWu+! z*iO2ef)N*tpD$JijYoI)&3YEa6k6y>jXcB^wSa;Xx{tTa4hro$=>ygN=Zx&gf}wP| zIumob`*$9Xq=1qPK132=ERN5~BawjZA{PLu;zGo`7m$hugA_UdQon^et2a6Fr4%gk z7X)WvoUA(DJMm|iZT|yOb4~Sl2=xF+rLXhN0U$L?&4r-3$@=l2OUgCq_M>EqY;G|= zmA-0?%vmI4ZO~{}h1%bMzdjys#Znw2njK9eQyuv*B)aO2K-WT9$n#~5hE%ro3|xvg zCsFIHxZBl0CN%{zX_{Fg7|<7n?Bj+ z?S?@Os_%On_hu*@Nr9zj1O|K9(eky->=(@E)89|K=hNR91~LpWUFB;>OUE2t&qmm$ zMwI)Ggz-2Wu$)tE=ao?-B#le&(X*VhicFG+!AV{VW^nSNNCv)3ucObI%}Dru9Vf1@ zFVDS*5T9SJ&Usi={Nn;a%5{>PKgjoH66i%a2UiVYvT}7W=>$828i*=$=1fP>|L_yL6l^lq*(`M#Uh|Fe`l7R zhL8_zfH-xh(|TnyfwtdSO)~7SRH5F8I2Ic@AGLOVa}GS;!}q`CgX@{@*)=3vMR=me zL8zz;P=idl-a3gVzyvf7Z@wZ~`YF8H{GX5FeN{LE2&4)$IV z0!fRn-LdUU`J}l-TZLxbT$ymz+|W|cn1NnY4$zC5OMp#o!iOdXIehwLU}#8`4_mAGL$2=0?XylNA>;=4bT0jA5iI*k99}UnYPfM@OMR6$LG1Lj6cwV>rckb z;yG(ZN0y*?!HN1G64B{IusBxmmESQTAp&#fRh+Fj%c=aB1om3>icBwC+V?kv8a{Pw zv@)S9ypa!cXdIe0n@?J_buDKm7Mn6Z2(wDInXJ~fCS($T8e(dribwgeiCO z_dOx#&53fj45%W-L~i7G%7H(|+U3(K7h3u0dQVmqv9rwsrD8G>(_2Kg*?6-(6+)?6 z8&BL{z|AlJcDkzo3D71UU9UNzGizpEk?I_GZy{YR2M%Ff{^>5bYWI^p*@j>atVCjF zT4V#YCoP_n{YXrjKZOk8*9#8(Cb?dKZcx23&xND|?eFea7m}=7JL_~`p2*;_r@fqH zJk7x-PoSqA9@4JiVfl%m?b}#pCC$_Xr#W)s15?aR4N8Lx%?FgbHyd#!8b97BO79lF z-EKg{e6tD#ss4JsKC5D@l+;6=!vkffBYxNdWK)zFih!ku?8uzY7;+LVw_FDZG z1Ip({T^NIU;paK&{JZa_aNPSz2$?qz-Z|~k6ku%mWW~R|k5LVMLzV+;PR5CezZ53~ z)2ueI!T;fDack~M<;n|dYD}iupDcq`r!|rv^`bGZA3!KW0HF#fx_rVZptOIIfUqtU z2bq7LoWVr!B@%po55;~9Ct>dmMkokxB(@CXQk0dSD5tZi@PYHIYIftxOt#h9ydKWK z7mDKsWNVf?HLrGqH9rYbcvF{FpDCwOB<19V;40x#^OwDg?zet|u2s3}U6y$a8}3J7 zCov5mtRX2?0}4{Kw0-LQKtT%BBk@l|N)+5aZ+nkwNNxcXq&R7B4uFDG8c>k32MSUP zIbYh2XBBSlaS73MIy$>FHUHN4*~Zcfe}V&5f5k zpgK0VA~m)CdWz5~`ug!^b1C{y8{LogmL8G<4wW*=9898lY92sv{ zGiqQQn@l6kdd5*P6s2lS7kyQ+*tj4sP@oVaCY=Nq%>^^ZHZT+|K2Y;+S>d8!_xMBK zvX7o6eB8bH28cPaLZh}_Mu{lo(;@)D6#2Hth?Tmy0gzF-dATh%NthTcn5lANB{Hi) zE0ka3mBvqiD5D*4l#;%?O=T&5e++XNRv~{ip7`S%S7$;yZx*?mx(TivPvHAeBdl|Q zW*g@lLJN3VBtXo$9{YaSL6}v%sNG2H4iF;%Evwps>sPo3b$5K7R3Di7tF?{lNA}yD zkW}CGqXcUPJc|)DAB@&$frN_w--L<*1dgaeE0sYKd9BW7tb@I@_&`KuoOF(zc<$UI zdQ7Zp0Vxt#$(PFmjg1XegAvFX=C%f9nk}~xl51nIO@`c4WtqbnWVVKHz9rwazJh7} z?o+}!a*_9oGd&KC@uoFr z8FeQ$9nbO+ij%4dhcvDnLGk z#0QH=vuiifvzZ2>e^ryyzyn|vp&#KtSTz7(72Pvd@fZpf$5bup=k6a;NLj@OlYcS8 zewCpYk&Z~4Vr)xYw>pThMHLc%X6RmrL5Y239tI1Ayn7)1E!mkCCu2}`(3wJaJ}L*m$*zkt(I5407ATa1!KohTOwEEjQ_(>z42WQGl6k)783Ewr&hjuI zGc@5FH4*>*F+Up|0P~BgYSw)>h;3YeD+$3VSPK*4fv+lB{(=Uk2CavlN-71e< z`!@77L>Il!r1Q{fga-V*Csmgm+k~~`+bXuHxgdZSW=ZCRxuOlu5H1d>CYU*(B^Suz zBt{4)B)>ai2d!MGEtZ(MD@31cAB>uVo= z`+n1d=E<_>lD)^`!%w!1<$lnU@jb~Tj_)Xq2QyaQ;@Q!p>kb8Ryc0iEHF9GO+YKsB z{5sGlAN4Pf_IIdy>63%Nh4Q+C=;I215`3UfZs0vDI_qRpHRpXN+5Q4lFFfTRsBep- z1PG)e-orqkgt&>100OD!$9k*#+&+B=<-)PH**oq;Ip+>_<7D=;VNR>WeYSOrqojZ6 zc!sM5P#YURJ2VUF6|LVK5iYN^M{BX6;1q3wIz@S`BpcEMW{(!c#v9tgK_>M_dIr(4 zOG&O%mhvClV-A|dlsMcUKac3P;;Nq90U*VAQvI_?Bw43^$&$5xLGp7FSC$KHlEBry zH2O9dsl0D|=x}+1Sm22Ka7m^ts`Gx5x#8%V=6Aq)U{>lew2k-#!GX@|y!?sGU>g!- zR*|~~Jv?;-{EYqsYZkx*pMtomk7d@Pmv=PBP(0&{W9HTtas(;GBEmjck0et*`idcC zkB7JqbjV6RUTiNC!_Fam~uUaLe|v!9Bn|FBmq2LEipx2YwDLxY{a(`?5Wl1Dx{QBE=)b~aW(jq zJnW`rvRAc}}~+}~o2>TTi0^vCfWb%Onq;cerxG9-h51;O=(qzFa* zO&7xGDiI}Z6OAJvI8XSgp7GcyTS=0Ccpxn&!q5trn`rqxTD=RXQh~aaj{TpzRDy&? zLz?&jmHgw+T`Gc5%_5~9aF+@#&Xb4Lt5tvxrL=EG6=6X~#q^|sHnh`*8h`k)fj#ip zmmulJubmPAs1l98- zK!XrTrHoyd^y}eQa~7aM@E^Jcp6`9}0iN$|KB%EC0Y&m;SqI2SQw@jWuPqjBtV?dZ zxM6DdjlZ>FPFRL=8z8vjx$Yl;s~;%&jKpAq$75!^-yHQH55hdx&wDsy)ZiR`^`+eu znP5xd#wwgT>+&e2q?`VLx1Q>~W}I2K3f9`_JAvgG>-3>^D$+@Djp&ZPNm#m4h3K=& z8Ro{t)g6P9Eg8s0S$~62Tn(_5ap)g0AmV#b^Yt}ak5#|(X&=(9-e2jWE#2K87?vYE zA?~zC_0o6xMR+1W4mppgB-P2&2Y)p3G#MI)5>GL~EK>T2KHUZFh-fLX=pKrYo9?l1(;>bp6%j#wN6uSLW?Ii-5arV84?Le)R#gr9O_F6n1JE0c8UbPkjLU$$7wYp=`F+ zAlx`{+9CoQKV21<3QfMDI22_W;xuJSchPQ|22&MVRFNDbkB)x3R(E^yRuJhV1wtIGuI z4Vvj~oN(jARtId;12sh#^;G~Z0xqbZDx|i9fOo_jm-E`xZ@I#Kzd~4Z?%U*{>3xhr zJw=~euie3x`xX{$SR4UT1&sY0mgGv%N^E5K!7xT`K7Kensbj#_kMf(5uLgV`!CoAM zE4r+K%FJlp{l5&bs-v>MM(LBo0g@>oi{iUa!}glwVWS{N#*_-&g=bc}n&7mHiM*b0^Tog^_61L3n!~Dam^98qYG)8D3kiOY*OIH zB6iU6SzxLJBcsWbBvsi%(3$bkC=IE+8CFHx73FwJ`>0Zf;eT#^T_*%-OMr?e)YVt8cK;;^j(SY=*s&O{60`N3K9`{by$2&O0=Av5O*h_p z@U2o-sj51NA41}sDJzoTy!(0wLS!YojgO~m!O%#K4H~tC0MB5<`X|lb$4be(JOx>V z)(lAJPcRJ%!wSvlk15ub1sW+%&`G%{w&Sg&IGayTf1bYih+(uP@h2i#Q~P`r`@pq{ zBwNp#CRMPL1oSIXT0S6kRh8lW5uOMKRh-+|yR95^IIak2lXd|s zGI(k~^jPWo!nu*>6IL>Q!^ZtZtZe4S_l#Y;qCBioLmlVbJ;wwE{#NhCeaLwvy6YxJ z*!l=^O+R_0%N{|vWbGR%<||sWL*~;!(}J$ zQ^sfSso}ol`mN!B`@yVn&s8E%{|{ti<8^c%QUicn@IzqSB5l_9P#=*tg4jIrsxT+? z7j1Hea67>-qR3e`f!qaX%L^&z4ku0v^i{fx@Kv0a(O92YVH*K}TL>y8@CSDQZuJ4U z<;Xg&zR(0Q@Ofd<7$uhfXl3H0_`qku-Xs0tC*sn2|I?%OJ@h2ZqmC3tW}if#?Y#NF z`IQqmzdEy)d7HPz&9D8s#Tt7(AUwD)q9qBMd-Bn85szq*w!zQ{bQd2!Vv9`#49WDa z%!}+}L&4&CE1r2M&Z5C{m6GBp(UG)8NZ&x9z`4nnm;9;+e4QEj8#9M)=cEJ1MF)>u%0#mH@lPF_E|;F9;@c!iufMBiAoREsmP>X(?mLcAP199H2nR*uR!r>9 zb*$F4IUdYeE$FU-2uXN!4w{Z)mhD>CxqLc})<1e{H<{67jPGwrUr_YDCQNu#v02?$ z_-R}=?ihI2@6=aVc+#MwS!N9juU@}ODe=yN;dod@H1lOMDS69u zkbDPSBS=1^9{Wi&3Erd`$_Kq)Mf?%EhO^_93E~QIT)|TTD869Q>W?4ex>NH}IZ{ns z`W>ZgrE=!P{8cpYHLK^kX1{MO z93W_!-t7RLQ3iQaShZvQ5^FT|2`{EOj$;6DYAjq3ycaWg3EqogDnc^dT|Hr0A)th7 zqEH6oRIjNHuoqJV;MDV8jC^3uAocdoPktr#9Y2+}!8Da7eotUD0MJx9{~$&lNZE}5 zR?}Rm4NOx=ah}}&G}C{Aeqo2rfpLnPHn7uX6WmNc3mCN{h*+lVOn08rkd+L{`h4_Q zi}il2ZO}OOU3_uJUeVLrkx7XULARN7-EC%{zC-E=$<>;Ew#&^;Rw zY;GeKpe17>4H9TAejNiy#SS2qu8eZC3+vOB^fu%b#g+1v^c597eT{LE0{?Ph`NNMS zsnak)!Q|K-;?Db9T9cozwk$Cup(jh`3&p5JvS25S3_99VKIl$L!G@a!^z*RGuy>ZLWGS)~3BK)Fe`N_1>Tq9kzFigEroe34iu@b(*%|{~ ztKD$%sD;vK8>Sx{A&5FG;?yVk^wfv+#)bG!ON#0r*OXMUSOV^pPtgd`HC>4R*al}- zWExqhtNgbQIAMI{Ud^QxDV$WG0DhrGt_lRAlm@Nj=%{zd%c4{VN6mJdj<_Sc!;zjr zN5z6iFlwnlM<2uu{%*_Nmv1q%k`}TjDZ+xRpXvn`O1Ekd-1zJ{$z1_f++4^0}zc zM+1}y#05Q4dSRlhrObAlJm~jZCp3GC8}mm5JRXdjwD^|46``^sfR;SRcM9MgV7+L@ zZ|l+D2maCofs8QVSw7~T`#@lpF9Buv;*$j6n!>sTyQWy|1fN|~7g}$gT~n0W34m+L z%;gcTR$2tKrVPlY3avE!|Bk|$!GL8`fxvVOPWFGhrfBF*{hwV^e=Uat=BW+jmj5#y zGgieY7;fHDzFZ@wFvf$dxZ}LM#F%zKuBFud6L2=vt;%1eIqO)m?YnRI4%1E+uNK;2o@7pEp+rzt0&b`D~TD3IH=jiHttgi`{$J9mKj zm=a(PmaPUq%@+)B@hiGF5sKGGHrCEegE^1 zBH}eQZGUB-n4Q~rh3*oRFth*pEY^N4WAM^s>Tw(8{Y<1*+wVK_5S%yL*MajR5juS! zAGoa&esy3!Ce40VCng0QBnW%|G9Od_;+s<87zINW?CohN9z2Ve{6Ff>20+~z1P@|# zsst412!H}TWaf`PD)QOW!|Wdh`#6}&L^PUTgwoj>7&AOpAq5M#Z~n$@(RQFWR^AT^ zVc+w*qEYUR#cYEr_n;c?-dqpe{u~9|o$7iwsbF^}WC>?$j0;%ZX>ppo9@-XPdD5Z6}A8`{oF7piQB`^(hllN=uTC9v^&+Tw*L^;2Q(k zS}MByXOd`7qyTDtu%8!WxpKv@b^0)h*m^0;BT2x%@!Lw%&WB8iAD2s6i~FR+_ipe_ z+OfipYTa(Kj%zCyobDMK*~{{6rQ3|`5T4%Uzz~B^xWf*FTNBD>f^oY#h!#_1QAH4< zOEwg{EGObOWyn;TD<6E5($9c!3Z48*OJ|4hhYvP^HC??wb4uzQg_Q*^rDEBpFxk287$E_(V(c28S3(GVNYt4d3kVQO_MF1;7Fpj z)z!e*mK-fb&Ffo^IbT)k70cqv+!BcUrIFsU2*=F?7EaBJY(ER9Vrx#~Uwl)IfNzT5 zgXHrGIKq-2EBW=}KtGsSBK7#-OGviZVb(a~nASdMz8Ge-%3rTgrt2Bpeo4|B;%e0{ z;i}RQ!0R5isZbn|>0~8@C*XDMcziTL@+U{US3@cXl*eP9w`I-U3RZYpFA3M_wC-rZ zhmM4}&RIqgwJX$4U$jMRS&J;3HGg3hZ_R$+eHTT#rF1!K#Fli#Z#;A)_j9JdSIluL zv+n2TnV$5hpPePnI}#l68>7I$!CflS{jhcxj7Yl(g0C-kPl-6|LFl85YbA!gBOJGp{~Gr58fNTFxGqz5F~vgKU?%5c)mKgo#(?{QWgezUbpLinT4gSH;eMl~e;G%j>ZU1_I;LLoVS;yZ3D({XbNwqz60t`nbh1j zkSef%1U+hG6EIGde*z@v+;4#ynHDX2uyG1@&$8$2ZSB2-5c0202(Gzgpb7_Q&^bgX z@)T?sBWbtD;GZA=z2`b)rpcP@O*(rvcMf$@X+&I*Ij+chV1zrXO+A}CWyRIcxWkR7 z2+OFn-{RsJCcMYUy&#=#E3RZiz1_+92)~tgTX~-y_H3M*d@)YZ06leCmy!DguyN}C z^VG?|CqJ@A9$9EW--+=+{8|biEM0JFg_lgD42=i+9b4QPUPm>lL0ju~7@k=-DG(Mw zJwx6dmt$;h{@?CS*&36f|Ksl5iUqqnEs`V!37!vrX+b`OH0S=e$3`};z{oLmD%MYI zepax%llOjuOfTfYL5$roDJirtmHF%1TR#_!_%#C2oyp}FMRFKgq4M8X}3OvQn?V2k;e4E_g`5mI%PTuFXWtD#EJ&p4-s2UNGg~mRK^hMqY z6sG=>cSey{ovE=fHdmR{Cp>5l|5EAtp|7&VAf4}H(_bug-XgHVyzCsuWz%}a{d>2#r+YSq>FuS>h(IabW@!OAe}OurNqP^(&A{N1 zOM#J=lTT)bpKgN0uww>uTZZn<&*Is{Zz}hLU5q6hb6;firSwt8Kx63T*l7NRh_7b6 z;VDU-_}=Hz8PHX5t^Q%vM>1?DBhvrcSY#H1pJmD!m7nfjP~(Hb@+rE;JM?r zux;ct0%be7V!wzd(`a`2LKVS+s&D1`mEW|ad6|Q-JkTsnH_dvNHfdO}nYZ5ByHfJ0 z@c4meWw`TlLC<@~iOSK5b@gk<(^4N#E0v;wpz3+<7+VIZo8@I!=9fXDFr%qX6uV5F zNb7nqPHL41g%-J^nCB<_FNMIY%*e?4jtT?>Q!WLAQ@>E={wPDjGEBzlt0(ZROm!8R ze5K}Qnz<_7SdH;CKg5sQq|wc=`IAspkZ9a>An|4TOQMchNc@KeIBqs5;OzB5OMcef|#m8pSw%Yl?*0G+z zKQ>+vNq`W&t*4`kPEy(u?+o_ILv9DS4CW z3P4q_$@g8)i#-rCn5sZA{_RNCq&J*9LeqAmiA0bqMvSFkcA@o~`r`oB|%&( zFxDcvkEkt~aBDxF@jqS=&EP1x&{7_AGT=JOZfFM_3Sg%4hHGd*yCop%J&fhOWv$)U|r^=Ltr1FS+$?ZDV7P`zk z+HQ97N~d0ye7b7UxAM`2bb+*S+Uh3DT>x-cy{l)5S+u^$;QBrut-ZaQj#Qj@ziP1} zSZNI}Hgtx*D5)`Y3_H^uzw=R&1av)qSqLCiak&t*~FJkrzDT* zVCPP#VrgSg9n{|U#apKt%KN&h2Ik&d2ND0SeNAR>+@E1<46pYK38-`3%Wr1)hjJZk0Lt9c z58SrELLXCF#Crngy*F$4SFYO>)8$F3q>25mu#>t({-uCtAxjvo^3nwuK3-EF5;Gxw zlz3AM1X-k5CSBX0Gl%|K1ZCncL(|ydd7D8x$!nlJv7P|TUwVkwaR>ZYJr_9cQnVk1 zu{_wNFEX4Wz#Dz^zGKbrzTiD8r=rm9`YGQsyK* z7#HRyZVw`kUVZL2D7Bp>k2$T{S4NG+C)+R$)69+o&fsDd)?R9|zaV8e^-wKPtfJQd z7psb~6Ef91B(@u$#DHSe*0Ky?xcnRfVl2CIbbe^XavCenz-HYblJ)xgsl;p?xp#e< zAe6!SAO`U8aUjCJ1Y<2{w?5gB+y!{|_{)1$EylWpm4{!RFLv}+upPbostJ`(RF}BX z8g=f)j*je~Q2HXCGWM4n0>o4Ap`J>)t)LA)&;M8@EC1xCyGn}tDbPF`>-JmM*Mbkj zLRC_(Buc08zp#*UL2BwNiELj80r%7w`a0`^LZvTv#rtnv)%(n30QVGb%kkK=dkSs& zTM{F^{hz#hPD4{=EzhPb2~RJ-fP*0)wInydj-C~#jO|8;OuZ!n`^;GCdj~+5_VaC$ z$_rzyQfRz612a}F9p(;Qwj{!bOl#cIHe2CgKBUO$cqSU>2%w@aqhuaCxGVwx%I<8H zHyTam{SctZ!I;hs9TFJ(kJVGk)&GUD9yI~K0&ipfM{s;~Gq1ZM4ZxMdGhBh#|E;G3 zLQmic;HnrEPKp*Ia1z`UnIh$NQ-S$h2NrsIOAghQqh6{;$b3fIw#zmlH6Sx0GtD*; zNl2g+aNEeA{O1XvZ58$Dl>EA8bbJrS3QzdXEB>vn@e=SG})#x*k z;dn;cLp9@K+q(R+JOlHxJoATLICboYpF#q8tOer*Bd_T@Kj?XOx!Qz}Yjw>4ZsEWI z%g4db&OMZo z`aif;4dB+sC5PjWV!b%yRwr=5$_hy`sG1%T#7tvvg{~R9ay8_CFdUcnzY~7-_uGb5 z#KNy1EZJMZuJjA>377f6-E)2w^qgN&1Nl|k4mC;2 zeCzY{G1@qI`gkP@F#R=y-3q+PvYI?4Hqv8F_o)-N*2A=*J3mPwqVv>Jt>AqDvJ^Ho z`BD;emRu)Cr-}->jTg5JZF8MM&L&8zf%u+#ei87QoX;Qej z(1GgY)!}(zB|JM)UIXA3%|E!+1I8`lXWWWl`d{1EafYV=eG6m_N92yfH>%VcU{sp&;0NipU4Em*T1+4h}y#|_A+WGiQAwN%t zA}dZO*>xD{dA<*)s6f7Y8X`0$9Lx8RG1SuT)YGa>bkISdIYG?-5}yuw<^1_oTNE}u zo>!UiKzu3WREkyZIUj0|G%YnKC(0^6zZ}-RQsP$xDA=klkbF-lDL^evDIs$6?TQ>- zUH~p!-iaZ-Y)_c#*N1ue1lf4RzpCH+u<&qZKUc8|SWAA)ck$|XIQuwKS1-B5jwBI8J2(skYQz1?mcH%KClR9S2tS>(73Ve8yQaEB359g zJwD!MA6XWOpz~o&q8k@XU>^*eBAQ|^zJeA`g{&gmE4c04a@+Av!la9*yUJBfJqlod zhDN4dZ&RKPmgphbJH)kXj?x z{QI>X&zvJl)31I0Ut~ytN`7l20>W|5y6A8Z>96V_@Cq_83jQv1&m(Emzx^sp)RF^yu%hr>HfAb1KQ(_eIz?X!q;X%WE^TX$>iMg|F+tqz} zub62^UHe18>+5NI4<1@g?7$66K~M?9b>n+Gba@pzlT0EyhFW@h8=HvtV&^EZCK` zy5_WUW9-Pi`J^#dOZ-6|beC*vZ(~a|*WEQL3Foy4*8o`4M^@&=alK()C0oay@8+(M zXv4nNUwkuOoY97+5ao{s^svTCfgwMrhQoqc%jRgevEwh<$DfFH`Ht*<{PDPsrE{;? zp%mQkciighv>)<{Sv_eGQnQOaXe4TwEZLq={a^vxvQ&dys(Pjm`c2eMMTDwXHeKIm zJa+c_*<)pVuvWG8F_gsp_Z2@=Cl~1~ir!r}+*nyK{=$PG+UnLtlV5RX5^>E(y`cs{ zd-fWM*3(ddAG(r!B{N0F64NQi2BXh4&_s4K^7S4iUB4&?-# zz46)2Ib4L{70(xH7k62ULtMI*jLC)tSY}U|hCA7m>S~AHu?3kj|4lq zxUF~?Jx5xp;7DugCDKy*PoxEUS)+O0^&|6tUwYsdah-^(gfLxpRv1>Ima?sitO&Rl zOhZbcq+Su1mc8LtT}%B_cjXEao?+i&+GipHFmOO`+GoZ#bgByXst*S;~a$n_IkE` z;{7XA2lNa51mLHE2I0Wdz#M9suL@yxRrCS~GcnO~(vd6C{qa3s{e~gpeWOPIPts*? zCY6>@@Xn39Re@4hY7=E=hxqZwHQB{|UwVqxteOd?;?IUfi+NmmC{l?5Bb&}#1T_BSURs!Wd!EYY*<5{hudLC897wt*3}BQ)CjDmr`#Y$wys<;O|wgV3~f57Mftmuu2X_|(AXGg+-4Weig@Nz2geu-n);aYfium03+uwiO??i+x|lOsP++h5ik z4LFZqB@^Enwio^ZORa3+?XK;RThN#NG&=b#8X}#0jh2t;J~u|C!8VsSe6+3gs?7S% zW@sZ3zyPZMXZ3Ho>vBXXkCUhbnLt|NvN!rKu5wj|;OjgBbo;SjnkKbS15--};&8dF zIJqor$PQ;)8rU-wednL6v9^Qw9u!bK#cqdCcw{yQ?c{rNqbpnQlolza{mc$iaJHgZ z9RHMZC14J127henkXD!VVxCfz+NlJ_lc7PRM-9Mwa+Z32=XrIe=v3ld_%0G<@(J=M zQ*BKHC=~y-U7FqJ^<-cL*5+r zIlr+H<{8O+Auphgg`7A?ozy13N9<)_^Tee`twquXZ&?EZ7w{--%tny5(Cza20Cr`Y1qqCP>$dU`Pn5OmIiF;!zd! zcD`j>@Q)uRV32sDPB%c)sm7x^SezgHozCXNV3Qz5BEGlVkaI74w}nJdrGuEGP&CqX zpqueFZk01Vgx~tO-xk45R!HG_T9|e=M~Bfklgvh=_Bp z%reaL0d#Xea(n#_{@R>Vx?1L9zaY+U&bzlEuF^o#Rr|xdVo`mmBGjNtG3R9JMC~u@ zU=6FTSwu-4D|()7;KXHq{_|YT)~krMdfcySB~+$!GT*l)6sr>!RU||TJAk>GE9(s> zMRXCltUS;=g8JlhO&3U5UtRp2-zGmk6AabS`~LO{Y?p)3UP%MD0FMXF+;!}Gj%oGqoRdCuwTo9C&1>Ws924)NB|8#AozWtT zg23pd_J7dph?HRJ3bXhZ%FHZ0YNYWvVUr**q0G-r{x_f0f*s9(y={WylHx5f7 zCfVUR>3RSsT|0!bJJnnk*B^_@w#8E^joonkG(&(Vg~+3C%Vv*J8qjPiQG>Fjzl=nL zpy5tfJ%C=`6yD*_J@t1TH~5qE*@~I}v7w0M2nZ{yf9-j%bP$uLBJp@!ESkrDM$5Yn zO(ORZRjB0S5I@44Q{r*XdISHDg&ozM9j|EL&c>mzvo-!`H+xe*y-|{$rj7Ws9@eja zX7t(SxXaFpmqK=252$8VAP1Tk`JGk2r~GySalh$QFdsjAsMwJAmQ-luy2$*Ir?AMk z-&k~H*ty~A(^+`$g|6Yd{IMyQXYli`u7Wx}-*r+n_$gP@HTU!K0~eO|>NF7<;p&Y&qK@OC5zKT3|2lB+yM z`$m4VFlg%t_q)a?JiX73F|%H973N+ir64aUf{Hc&QD@-sAS}#nRj|{m<}i%OiBw&V zcwQNW0eQ$PYjjHXD2QvBlh7r#csjGfCUL8Y)3{Mp`n;kM!6cOj<&ql{^c{#)6d z)oOJP;tH5PmQF+JFi(v`ZAZP6wY9zGl^CMqqOYyqhJ_7cI!73p;af$K7!K06uZp}f69~dWf zzIN?@J?(K=Twzj@27s%9ZcSf92GCvg(>y*J9MwkNUls2Xxo{z^)doaZfpSw~b10Wx zrMA3fkg0=O0N+)R+8e3!W||_5iZ4*&x_5c9sF&)m(&*O!1H)yDJ32RIQ0UFlx8IDt z_V#&)jE3)nwbr#mm6J%n{{8z#fNsAHhy*~f%A_P>VwP@$Jz~zDl=Y~0z z+1sPr#nyWsqXNMv$YZ@XUlrcznFL-5}>(?ERhdonJCs4*!Ub={@IjU1#+|Z)Pv-*tNS1ukp)< zUsB?*{~2nLH+y-3Ys9rgBoc6qC=b4Lk%2zMFSTI$DQlke)EYEq$g5&8swQeAH(8d( z+1c*a2$|6*@r8b&u@no0>%BkMT5*Xdp$HjLb%}R{rjfQOth4M`AzhjZl7f=Lw!WGO zIr)8^#OQj@wBa5MtG<$ z2E_ygMgjxlV8n-k%LNvsy;OJr9$IDvhKB;P_k14aGFW$Qe(>y_!gIi?9`mfFEDV5$ znl>#}YP|v50M*)84+!|afKbPi7rx8L#J4);hFzbi@gZf?Q=zgb#Y3DPu&i63d#M5U zt;4crALkJ%qERqNafQ1a-@KW`0GzYZ`@V4S()PZ&cqO%i>dE1&xuJnZu@4-xW`B_S zMKI_@8Yc86hTP)!VHj-C5$F;rQpF8p#}6Y@iJjPEH;QHV1AXFBo*W^xZm1YhEon>7 zk9X58xHP2c3n7^ZwnQ|DxrF?5(q5YiEzQnR<~>|qVZeclv8fOr3M3%_hFppC6=dO! zS(RIYaLZ0VThry*mbz25!6hl&6}53i*XtAF~$_{e$$%?8*vu zmc%Ecm4ySs0Gd!d?qSgsMC3QW-&n_!Tpp!l^Iu1{cg7gz7T1tSB<;{b~c-0l89-4L;cg zLoh*%m?9g28{Q zh1@`cfE5sBS6`qXv*8!|E`<0Bc}^Cw@i_xGTNwo4SH?HqB_6;9q<}SOMhj@EsGs%j z$=9(tJUz4I$usL&3{tX9nNY!nV(*%z#?-%sB8_K3B7^j&FaLeu;=Ftx2mwBDp&y-1 zU%i!|EF+!xxkmy*xqH8!8IgsmT+E_g>ACJ6L`n~#y(moloTtGD(P+?ud7W+3Y1KZe z^_NQ+A}vR1-5Nou%Ae;Q(qB@PP|>X}E*!B?C;~2i(E;En{?$3i!Y!1WHvdi8OqjzURRlO(4F; zEVwH)QU)dr1Hx7ao!bb|jN8&mfJ0O}_Uge{TlFmYt;6W2%;?p9k3TEx>!K;8OTHI% z1<@@ZegwczPyeY~hSXOAUK+A+0)66#!8cJ5CMPJ9qy_^v2(f%=(BCaS_2h)Q=YWEE zm}L=-3%>F&Lp<dLN!@M~O|z06Phev6-4 zfI^Y;-$F6!0>+OGC;6vY!LF-*NyQ2Dhd6gZgtcFFm8gFGy4J2)?oNkdRt^-I+>8&E zrA`aRZ}df-e9U~7#Lz)--Z3ag_Qdl|M|kr`S;FRZ?{YUpKm$@is z%5=y4&3j;g_!g+0ECl7tm=>KYVKr zd3z2-*dc?#^kC&Olkxm*U7hKd#@ZeHpcH|<~0BYw zF_4%g>bUxCSSoHF(ALc+g;;VkA)Yv^dufO0G=n;ySqO@fXk~z65r#z%3Q*!MY@4l* zP{(X#?roV4EKRgps%jE>tiHao*S_tkIKTv%@$ZwC@(j4k>xxk7?L+=(2GA9kv`Fqux=`we5ay*MiUCV48K`qXC6WUg|8=7d zZVrMLN(*2ZuRf^h5487hl1*TD5A%CSUQB-&Va<*8hW6A<8KOHvR8j)3NH z@~fpwDd}F;HkgpAmGct8g(6ihm!mN_Z4qdom(ye1wQMY|IVd877BDz~#nunF&Sf>+ z_F#WLtBNW|OfqjdFteSY^DI4v-aN7vk6Dmmst?IkV-hMnu(m<*r!m*#sRmAcj0f{u zYF-QY*ky+Lso3?&U_Es>To5NAt9p-~Cnd><@+Z221Y)=m&3@c9AZ|fBPethz^S(BU z*WgBx8j$1!X}Po-2@GHATl0UsUkBn=UyZP1(3hO*FH4PUhqjQfzGWLlHa22*l;-0Y z*wi4v6Nn7;QjY;Niv1d3QiDi)=)a_f0D#m0Qf0-vL|{?-5Tdc7rYA;Os_-L&s#whg zn8q`Lr|}dTjl$)LDjziK(x3oRgJ#wLk{T*#khhP06SL?mODO=ls7Yc`h{^}wEK(dd z1B^=+r6Cmv-T_C$8q>D%T%v)78Oxg=9+il8!gv+D6 z>m=o&uZ<#-NxWC0n5uB%KCcjiRe$#ik5DNxtKYd1MY$yUV0#lHBw=B`6O$egxMroP z?brwVfxuPz>%RlnbSOA*r8}no5C8(#D_s;H^e?ns*9AClfpeEXV7N|@5BNS(rKltq zyX6vPU%|4VyOM64AT^`&$9gEf2Gk0MIL9+ZhuDQ|w39vJ{zJe#P5S1|_V%oY?S$ zgL}nHDWF&UMj&kl^ok+}?h)2<*lMoznZ$mv&%wag~VKHXI$xA)hy$tozMQnWA z9#pNL#f*1&S0M3+qa1&0zS_W{*$YTrh$i;(`3kKdzN(OncTSJnz;!FO0Ec)O850?Z zT|lwu{4G<6w!nrLTr5s}5rQcgDo9Ah-$=v(ip4XaSY&)H78f4tfZWCYeGP6Q`oqv2 z{IutUoCt4(2krU{k1SChp&*^7FE-L2L6<1=??}#UDb2({TjJj3lZT z%tR$h5^+^~7#Rp25Ngq}*QYri!h&~P$-@=*27EqlpYUnynD_xEmTa9Is&m5h+pC`W zC-rzeMQfMDN;j2!!FMH*9)>Pzv9XEDxX_wuIP{kG4a3xAg2n-O;_zbpC zh(4_kuVW9A$~uBKTr`uPTs+w%=1o;~S%iNW02?l?`d@ptYSS;LqA!iF8!kjuw%NFE zR&T%?uK6Jc)&lbBMVfHqi=_9h1%9XW6V6Ny3zzpGg*N|{u5x=9xB7-@M&?9Vzb)?| zVdVg|jsWWMTopbs9Y{ydcB{pc1;?_av@!IaOZqxB&>Nlk*+6u1>mJd$kGFipj<<0H zCs~^499;aTEEN>&JR{C}`@yDZouWm+Wh?EU%hs7ep{W-5vZXKkT_L`btH~aG*aC%! z#QuBPdYi@NO$KUFI7kF8Tk2xfys|MeS9(M$jAAWZ@@PD_3Z8$EJN^ckkzOlFjON@z z*?ID~R(2gepadgNR@evjeA98Dxtf%AXz{sxZf0 z2)7o+3&L2(Hi&u)(m?dAv{xNN0U zIk?j}rhqS74UyxK*@vi2Xs&<~sI#LYKqn-x|Zsg}f;IPGcAwY)-MXGodovm46LkesEcGM5U^$8-PT=b*_ zt#)n@p}YII?xBT5bPpxVu z!!k*`UeJOlz944)15*85a=nnjY&+81WonQ4TPSuEeWn@@r=;CxQ{+z7w1HFkhYqN+ z?wMWVx7jhx@X&JtC^6Bjr-i2^i;kyzDqSCZ-V$F)2J_DaSXmhB^)%ReVVt3WSl8}< z)E1+4RJK3(RJIPLSMYw0d2_4t2^Q(J&T<>(sE#i8*a-w{Cm)1X8J5Ey{^s#nNf}P~ zV1me?{gCC#p?h=GUi?{AY1F~>z+DPj5V~X&JtXIQx?lGuQ+IVfkWzmckDCkzH# z`|{}k&&?|rS_)U?aSE#z@n~K?31N>ip>eD9a1!m?OH~7IW|!|^F*nu?oVJ$a3`H59 zzjBPqQAI8p&&+?8QN@xVz){emGK`uBy2Ov5CL{@(ty9A8(1KBXhO0?iMO=*1?<9#U zH%bBG$MZhnu|v|uxGMuk=P6M*lE+R=(ga7zzvfxhI^LE!Tr-(pq`KbD0sHui@K_&U zA77;9tYQ&^yJwvXEC1EpDyu*4?P<5F;dxhwz8q^Eqo5d8p}ZkAX8vePG0HU;Y!x@? zN4gg#!?n{|k>tn}5xZ7rr${Vq)@wIzy{S{5Lj8I*!og-v0k~{&6}*Dn z=N^ttetGQKX@K|fji#JHmk269j0MU>Efs`qpiB%e;{izS%*vD>nBBgX!^mm^Wg;D~ z`g+=`_4(n7C{QMzY`QjlZJ_DyjEc}^tUwxP+V#f6zo!hZWse9d|AWjqF(N6GH6O>4 zo(Lj(eS|o-UE7}Msf4dTBG@e&;*$cBsnpnC?Oz@B$_)RMI3o`XyA3Vu)WO7(#Tu;K8~Dl{G0yJtx~P{u(xBamc{hJ1=PeDTEQJPxbcQ-$eHvn$OQbtrT6_|0dsea*qcs@0R4>A_#^2?6*x23?^4KgkT zvGXg#AG?#WSW8`K#ZM9VHqb;M3_9>SDRaA^ZeZFi74J9FVs-)>d2tFV6N9LUCc>OC zOA|5l_}ozIu~`**882$FccInz@8wU;13>(Yf|EQwc|DlR*MS?~EtY}tnujy4f3f=J44a;=> zmx%Y17d-qZ09O>CgT|r!%N6aiWN9TkNaP%nK(N6$UXzITooYOcER+;}(TR;bAPR%# z2{IF^aQqw%pxfh6bbe0!0MPABdQ%OGPm23TqySeGjBdY78`2HFXeT%TR$HYX@C-o` z+Z$U3hWm@OUmzrN`CSk%&Jl+hk%Mj1=aspJuFD?*G>#MMy-RmUS<#xKA@3vu`}B>c!>Q0+HQ?TIk={8tR9W5M{6=f*i zQe1}Q@qlo3=6?s>=IjIE%GqO1(D5;t|D@_Y5UyxWSr{qVSw{HTjwQNet7G;_3jOk# zyHJJ65tP{aI2306i}S|mpIF0^VfDXBzRWfClL07)n+#9C?o>qrVRX@DdN)N7;?V>r zg*J8}JhV48NvR5jfsHgA&uxR~jX_Df6(Rgeo|CI(w%rx_L-Mc24*}q;(fOd!Gq{?wX-@K6k`wO< z=P%eKkEft--BUCJ6KP*2r3 zLv4dTn`nX|alVze*_R2)gm_kck@Fm0#bifRQ|H=7lt&^S<0S z0u+rEO3kXTnq&z>?}GtmhDmv~MOw4~O3l?3i}v-J<~Rmdw4V=GOHa!%rdYDpzO%tg#dj7>bttkr_$=e)Q(Jo@>zm05h$9E?>aAF7-MF=vH zLy{`SwCE~C=ueId34Nj-X6su6UBJ+ijzkWtL8b))uMqzXbCwcX#9+d=$|m;eXVCwx z=QK%uc9VwhJa)lf`tfD1qz1L-ixB&;`->K1A->_``4^n{(&3TY5urL5(l_74oMZPY z>)%kdKYOGn)w7=@$IIIH-Nc?SOwY)Al?GNJN?Ir?RbBJzdeHMyCuhxILGx^L5FlvQEszpXs4c~e@Pfb_ zuJ#?NVPwq%VIXP#cu%Na#yP(GJ5{1RnF-0FE~b(-QQOVF_&fKiO&ZvkgGVK_JvnX* zrae%ODVu2}Cnkj)+N72|GBkIM-H!CJeHR+V=3?qYlkzIO^MRm>V3zG>`n+76`iW~* zW$6Bxw59nT{kxj*U+kYgHh2S1BtI@+*;x{JlFg)ah$G?K|5?dLs#?UfEo1GWx@$<}dTi-oJw8*Q6C%0!~^~QMt6@ zK+(w@f+M*mVhTq2WRit6`^?wi^$UjR`EGLA`|sxazDQ=Ki_{XyX&Zp` zFdeGA=_a3}v0A#$zRYOil&xVhanaWcGEjmGJ{pctbUA zQsY1uVI&j=Z7YI*lo_~bVZPq95VJ4S?4^Zl>`Z6#{Zp~O9co7T%c)b9O*i{)hJ$MJ zY%(0|&1td@;nMY-Cfo|vqy>tmVLZr18?Pl)CD|EtkoGL-?~VCeVXXp zc>Dlt<<0u$zeuGFWeoIR7l20nNLqfP0H_B9c5@a&O?kP7^fleO%1UyU}uB8>kzT+-x;N!@6f)+Yh;>Zb02WeJzMdYOgmgg;o zftAnOy9XmYY~l#U2n^ABf!?1bTt-X*)Wb~zip$T9eSS;DIgT+_lh>mo;*>oMpqW8p4y%i^gaM zZ3$IQna*PKB+y0*gk|0<QRYs8AXwR-4 zVw6r?(4Xltfbp=w3nOjl>;>%OHCTwgPXlu29aAKL@Lr?`-=o!)$4vs_B!2#gRJyrb zF;@ad@OC8f6BB@VA0SM9`4Ayaha8>4K zeQwG3J49z~Jop41Zo~MGv)-exnSs57eIIHW*r%r0wvhdN6@mVc{A|71g;p-KeTR&i1RsS{`&9z$x!!D1ynRBTDXuP zWlwg`r0LJv;G^AU2t$%Z4`H-kN>6n@%w0=%pTc+c_ZuaP33{H%6(H6_{|FES`a|bH zaDP~~!vZFiLi2G%A;cY{Kme+o1soMe0eFqk9f-G>Ya(z4zZtN4WS|W|zf&*$K9(s- z5j8GDYcBqJcmV9qybQ<&4k$dcylhmqC1iVCXgwC5K~{Iu&n>$Mygt#|AHB_9?#J&0 zw;2Jl!c${bSrsy7QCy+-gXkVbm|>|{}nZ2&a3vv zC9z5H&s^EfTUcLB)0v!#S)F&{#`=C7HNben{bJid10pFG*Cs1F+aR=)QsZ4F7UVEc^z=2qnsEaGXNr5D`1Q+QM!;?F3y?#EiI z&cxI>@VvbXMD90w;r8B0xJiA48j7**Eg5NRDCIywd^`BA zl^vULK{DxyLG=UU%n%VSE<=Fp2k<;`h)oaD{E8#^DltCNRwPE2%WrU4I_nKbUqdsT z3_9NyLAN=bBX}i`^r#xfAKDU$ZNt*9CV5F4+=tQE1H4P5n!^b0DgV3JEI3mIGKVj#9nLOG38vNMf*N93R1P= zI!fC7h*mn3a!VII&~{|wnZy|QVdB7`HcDbe0CSJS3gk^rvM1HhHyh8#N+n>Zb@JvMOJf+5di(D8sTt2GS{PLo4ej{`N&eWhk7_La}IoCr2*`X|Ykw zximaL<2bLa+>DzRfR&CK-|hxQVXRYzIBF^2jV!eTw0B)y7l8I&;+#z?@%F~TPX*5p z5a$R{O8%I~Ojh17f(OJoU#}|q=0AiB(_gX%XRodV_@unNtDHF$UF%2*<4@XER51Yf zTEI-cbfcMbSH-*8b+)* z)5AaFoDU+vOkM~O=Ohq4gJGpS&4*1ueTWoybaRUaM6DQ}6jcy3UdloTG$E+;P)dMW zU2EXe(LA=`{x-Rk1eRT_vDjov=G&JC8ee}ADe@xasSo~z47%>-^fvzlt2!7wpZnT& z@B7R5V?eN~52Xi!)qJsdf`5o=Jwdt?d$HC850_kgEJyZao|gjml*a z3rW}k=ELqKLoPPi7U%oI?BLu;*)_4I$f?RgJbu#kl{|uX0c(UgqcT04#dKP1NTF|! zEgQ=*IT&f;1$>vd0fbh1)9XMh7(CFrF?bzlf&OoyMZhg54ORF#TNeBV(UU%UP|9GZ z40igxNnAsy^o17d5+Ffd{2LGb%FrT1Oh+B>IoD!2BO8TiUpjiTYzjznqWOllSPQ(x zqmMUiGiCwgL-Ch7LPWN>eToDjvmBsN+AmYRZ@> zOpz=>@xOA_&VO$Rd<1E!p$3+$>oq)}b*r9~v(C$FpUVC~e=C5^6FSL*_IpJfj-1eO#zXpCD>|}Z z4qtdPX4JZ_urHL$OorC_I)|5Gnno&2>V96Q9(Z_&W7`rdKkycTfx6iakB)gh9Vr@L&2Mg%%>Qu*}Cd0>z&nxz43&Ys+Pk@9-; zp?wZTk)w6FQth?+ud^&dOc9BI4R0*m*IcFVZ>P`;onTZf>xudRO^7@Y;hQ)mLp46p&eJ1i0H z;P+hj^aZ~=gnNXsCqYv#Lif|bTaj1hOoAJeB%U|^_=ai!O<&j%K(Lrp$0|E+|P z78U@{3Qt-|7I=vLK_?DzVtAh*SbXdJKmxpMh~5F%S@t)Y>oLG8E5?dNedRW73DrFj zLc->n#5F)|8KTV=)E`xPJ9Z#^>tnM`HO)En#G|%T1B2A=efk$W>$XJT_{6>BSOmMI zvtb|dGfKS20CEd5-Zls0kwnwh+@LFDpvFIEgW6!C|MOe87ESoQUjLO`5VSOGATu?-|CZl1y5O;v z$B|CL&za^9;F-fUz3<>bTlKDwZ_TnG|D6LYpa2%JnGtu_|L5Uh=8b<99q{llnE=A= z(V^La2$E{)ErdWzDIC`BFeDor8PL3SU|L4E8xM^VI^u3LWJN@Hy9$5ZmAU zB&(mNR&h4)i64x)I%&}at!>%q!}2iE*KA^nSSp>_@*C4=2Gy) zPi|JS*+>)}NXnd8ySNkYajevTofukW5;prJo~-bdVr`OT(KbM@D*G7V=4ZR2u}+*k z{_ZsyZA5E7`F`x7f1G?e$D*?P3vZ}`^inWLC~Rf-s>Z!D5r(5m^tpr9w4WlRCCGsUBCF=OsF?6YsQd1h z_x0!3?*VFujIBLkR;8@Hk^E26G(K2J>5DQiKkRwzPx>j$)UF zOj;^#FKD76N3J|)VZqJz&mh9+gW(PO*i}ZnrAU?_{eiA#{WDN=?!nDd%`9Us7~WvS z3=g^-L#8K-3+fFlwl|>B15#2htj2F+tF)7ztFzTI26OV$-}M~!0|T+_3>O`-+>^6P z+;A4OgschWz|<;qSlNZHV3YZusTD6UwfZ$)1)f?>0Lymol@a3(EY;BMH+BoZ*t2-= zLExzs;1z5=CXn1eD$5b!l7%#!I33RVY+A6ap)XXfY zE}vHnu=1M}`1O2m%q-BHDeAuCFB^Hf(Dgpca055Ti|>biooVd z)OcUpg<*F{S>F$Y9jA}M55=wph08S zMEz$Tn>Oi!T751CovsvOVwvQ+T>&zz>D^0`3_cTUgwYdoFo4H8ueOj7gDcz12*(37 z&ThW<1Ec4)+OW?t)>r@Zf6Zb8(Ce^>CK#)u96(BX_2J-y-u}aHSaMlvA?!L;Tn*SZ zw``uK{81ofLj?#dlg=wieN5?(uh6I{!(J~^lI}Ov|5pV<{!<0gL}kh1Vto`N&`A{ZYhr`FZcrrfgmCT` zg&xJggbR)dD&26nIL{1z=5CNuNhlt_bgt_Wumwd7vsZ`sNP zy{R9^xKf!42}3lsujHHaJ;Udv(}-*Tknp~JFWcv|)dci_se%S7xgj@bAQtSe5qV?_ zE3AkegIGNWZh_J2S!Flr9Z{=N44v;rYnopp4)B;|0xtABTza zAA>XfzXoS4R+*Y##`2s0F*r+;y~olzqt-YB8=UR`*WfG(HaN#Z00!q=K#${k9G>^N z6rWt-du??dOQ2;gfz|cMGP^nd~cd?6R z*b`fbH(Wm5_h?#t3Kv4*1no9=NJR)qv5#ALTbt?{#_hKX*;t@)g-bYXT%$ps&%=1c8}A>3^DvKJ!X{vF&ik9f!;PRl zZSrj@AnqVEJSn;~RuQC%Ez?y@l~jliZHkx{0F10)QSbIh*|^3~SFuJnTyTWY=E8q_ zAg~eCybWr^uAsI-=zCX!@-A#LDyYaKw->s<@up68<08)e?nY?R&yWV}Z&tGg{LPgw z9bDW$O?%@5#kFoK-G7(060xUcqC-=aWe@sKLXr)P?hsZo9ckn`v3XUAc1dT7|kDBQ6a>VmM#j<&mdC8C{+KC_3& zmSBBOsMuiGuea#EXqO{;Ww<*AK^6?K#HeiVuCHF6ce|5UDCjy8xe}2bXzX=`>)PU& z?Y}qS{d(l#4X5I`oyz{B+8KkrOM~rx^zjC7LL6jeq;=CGFn?z)k7VPU)4E6zLP&wb zR~wADbpbb-gw9vF%iP4pQ_@dIv6D(~LCe~%rew{AF`EYY{%&~E8{F0%-CjF-ma zTr|*^kE|K*J^gAjl@LV~M3%Gu5jaCMFG2_tt^G&f%q8a19Sr|r^*GAGtXmKf)#>+j z)L*|@x@XAN51#sCa>bA{$B)r9c5z}& z++css4rlHkd$iKzF-WjJ2ZZx+As6uHuzp-H{`uHjbQOwY-40*onm_v-@HN;hmZ(L9 zz}nW;;LDbr@RVt!5$x|Z+|NGfOQ@jEWqz2|sHff4#Z`{+wOx(CU9MRufT_z)jBXN| z_zN@iLuyh|uMTBRc^CQuH8JI z^-E9lAJiZZ3hISR@ah%!PzY`D7)eG%BTADtBFFt8+p}whZaGeAVb5JSr|IA z?1TP$JTp^xGtWnH>#P2ty1Lzdss|Gg)YN(V74uwq5Ry@K-`Gg@H)G4$3z}3yKg!hm z+1Mhax=Mj-LGxD0@Cu{H~ni|9IWr+tr#(G)Ow4az1HfT z-x|p$7F3PxFud4dT}tCxA<}3>!Z9txR%rEClhC++*mU{m`uWVwepFNGOkUae5$tP@ zCJq|qOCF9K2XW(K21?Btw0udV#b}LV)NhRC8IJzt7JQ?RE^&vSKd(SZ=DPenBUEn# z@2zBih)~uLvT(^vp1}mRT8QY!5tSmSZ)aDXbf$rJSRt!iolTqdDG8~5YVN2ike)=ub48<`4IGZcJ|KhH>@s!$-)iA-`WO@tm z-Jw}?Nn~Arza@&B`YULO3wAcoV5Y@$(!4GI!$TvW5%xP#j8!q{RB9hOFw%Au~%=|g)C8= zme{T}v0e)p`6H&XU&PxbOmknRtv6-qQ>dMTmsKf}Rgm^nc6Y~sEGOmS*!bUN6(V?9 zb=yO=&=_)$JAjK5c%T7tR{xHtg}@Ho_W* z>B6P9*q}B}9~Cma=kOQM5ZW7qKdk~V=HFQkiqinw;7&Q6-}ZLjg5BRIo`?U%G&EPv zLT~3bkH^DUK{nrcz|0b*&>+BKq%6n75T@uOz#^qQc*CGIPr1X;L8fe@gFnfF!EM$} zgn_|pruT!yglz8q22%wIO9N#%7SkL)3;a6Js8T-%MhEpddfaJQ79NI)GkJ$O-PTH%}1tB86FD+i~mh|ljSvjCt%^O5c-BY~ERD|S!=@p%3z1om%H@}hk zFC&DSUhv3G0pEX)_6N11sXZci{DKd)rnK1Fik~s*UBL+2658i0)l84=!@NH*tBN%S z8Jy={h3G-%AUXHhiCN$)xS9tQ{VK4~87$}KNSB0t!>`zpiu3d?0^{(?N^f)YjR0sx@9^*D%#H()tUFRZHdP{X~MH z&GQ8s(c=X|=p-i2!#p;1ZmOv9^qn;ph|?$38-<3{LF~cHchxXpCQ05?0dzSV*gAiO z)@?@3RBvm80e`v(!^NE{=EtY@ z2SX6Cf>CKm-d_)z_J$7r58-Jz?=J4_2c7Ly+*3+1v_#O54=~-{9mG~|k>OT{AR;ur z6nyg;+2%{B`wEHMEI$u(1f5c~4#U-avkXK3`ekQ|$~w%46yVPeC53SvhCao117t(Px#;CiaZU3*k&*jE*iiZ*C@opQGQ{(veP;?OF)8Tb~Jkf0A=kvIKm~spJ z?CEoJM)%7_&CA7Qdvnf8=ijrN%@q9f_0eW+KJvd$m$x9${l!u{`l)F5^G$P2clS#^ z{mb)p%!Jd+OgHH1a=Ur;#q;H+*@=9m`{nYoB<1<|SeaeaE-=H$&+m34Mw`#+VG#6p z=f}G$iSBzjkfWhtHpxf=EHB)A#@}_vw7e*{d4d(<_V%2=L`%1arzg<%M_+eWTf8Ss z%u4XPPInH3L!v&=XF9EZn!ig>-p!S~H76i|KAL=EP(y%PD|vg?A&@2KGhq?}tFC?e zfkV$Y1l9-mqE_2@_vG*XJq`KqRbh~a=!f4I6zP>$dGyN{O~S2{8U);HsmN&ksXio^ ziozcVs3$$j(Uhh3&diseul;uXQwY!V>HYDAAv^XE*QAvp*u9}Ve_{7TWJ4HU!JGjf zr7*5pNPs`wo>{Kx?wow&?D@I8-1e~D`L?GRCFnUzi_KO8gi_m2#a-P3+N0uTYKHxV zPqVeT9f}koxc6suF&dT{>BMQh@aJ>A(q>2(HQR~c#>W5u;jiE$onaQVAq+K=OvhL# zzAz=5Ooug!Gv|i1S!e_%I}x;J8Wiy9$f=X|Sj}gA<9xC?WM5_PywkXRw)FWTmsM)% zYgtuckeWse&TN9=p3hc!_LW4>D2~aO3iX%eBk$rfcPO+3OSJH`D}r*7na>QoXOZ`z zCYqmnJiAI)Pza|~DHv1&d1#!HgtK`mB#&g(!e@jj&FD;X!hRP8H78O^XUuV9auq^F zA(6T|b~H>Lq@{h8wWWwB3N3_FlY}0Q{PFFL1&A;9JAte*N@>6!97^*s1~CLeZs0dD zANvxS7R!mvWhpVBIFy>nMJF*(HoQZ3u810?JW*f%;EX0uh|lvmt=BLOX;f5! zf5Q!n#n=M_a0ITS!idEMk0#lbbnEVbZ=%&I$TU26=lQ(uTMBXi{Z8cH2d4t|WOzSq z94B#%y`6sS)r)jm)q3d!lDdh6WcB%!m^i0~E8rP-f353IZZxoS7lwRzyOD6N`$Lws zx#3Ph6L0AJLJ8S~l-1U?t+d=7j$UvZlnTQ7J6OJ!Yy^5n@zP7VeDF$#YKd%J?xI5f z$Q_{=g6pFDKEPnd2C{cld6<21jC#qOTRGUEmU~_h_TouN^5=F5(pr7GJKCr%k-1SH z=3L$wCk)dKElJ&!GvZ|BF6qj&PQfi_56%b=vpA))$9}uuCup1_3f)!K%+uZ~9~98= zvYG4&nWW1tL45mQ>>uH;TPK`XTkYC9AwkYGzXTUMp*#sZ3zqFr;8v_}iGutPjtzU6 zK2=Lo%b-85Eyh4L#h_Uuqr7?hyAKLkvD>6u@az4~#uK|4tQjA|Gllz?d2A|eS!S_} zz(0Ob;u(RJH)TiPOd4A6-n$SKoMym-ew#<_+B{+0lji?T1!e>x}QJNxDh*#vRm`y~ape1gmmyQrjlk2^F0w`BddeIzm5_>*sGAcT%hcQv88Wg(*H| z9UiUahHpj5{If*Vum9sc>lC*f?_k^X!HT&0=C{zyJ4m)X8;%BJcDh?v=h_nj*i`Yl~ z_}XVyx*A7LpKZ@H!M=#>+&M+yFQ1Z$2~Xl#{x<<{r!V+Xy`dcI(n`uh<{(T%B|9(8 zBqaqxEk+|GGDKS=jjmE3)$9Bn=^qi^DKn#ljcm*x_^wGrvGwVbgXj+E51~j!Wr-4& zLgF}0eOO$2iSSvrIW|Ai>Cq#KCBrxiNHcIqe1+sknI|-<$%OX=J!~i#^6VQ zQ$mPHRBK@lk^h;HagMp86bB+H!ewT4!Cqm86(47`wa^?dlI8I5-(r9*DgDv^`%VS9 zqA>>{FEci=&X+O5f=dgMyulsk59d4q%e18bdsfG~>bd9q&Y)t`zcmPGbQxp(3i!xy zV+Hg*BHj`*p6{qsdi9Jcg(1nNuub!$XTA*hpQ+Hga#SpOXCOu|qG*H46QPTtCM}p! zjr{cK_`@p6Q!l=J(o|dWI)zq*lL~EaQ_Z{Y@EgM9fAdsT8T#(XU|VP#1D(4?qe{9< zKW&#g*>v6Q7wxU;R>m-##YA9-^+DYbA33pkL7)hmq?;-~p^B6&NVEOm@r zvP81ZA(K-IIhh)9#3?mU{I9ARtSlXOH-tWWl-e@bPZ{4_x!lv*&_B~JU`$1IaYmt% zY>UJFAd<|S(v?1i#a%R)qi(+pCEox@Lk8*UunFhrnCi!+bt zwKTemgk*|T&a0#?;xPM&WTw6)G)9o-@|Q#EG>F8t$*oUCtfQ4&E%w!b%MYLUOBZDL8p zY2=?;k2NJ~rJq`*5KLWc87t`i3SA#ds8RLBmhgl|MCJZa@GOjz-tsb%!>~n5H$ZCd za1&}{z%t@7IuB{oFudyMW2Zl4HElzyqxSS3lAH}2n?t?$v!3Ywb6&|CC0dOTr5YvXKg6&cdJ9Xw>=H_LJ8K{{7F9XwR7HH^eTp z8;2iilPTjjY*axBtnGIbI6F+m=^629!GSioy3`z?cIbw`j*Qo~p&q|u%XTi9QiY4^ zJKpb_vuCaEVj?I@9meNw6%YrsDgPev)CxGEz_i9t-%=Y&MG7n{3Pjq6Gg_AWjHIes zbBrKAB%O(UutnS0zN?ueVGbJ>;9_CvWm;$ln{jBU;AI7x&aB#MP)$#{AO460`BkI(34~)0&OXXQw=0bvYG&%r1QVcbuB1rLXnrbN;-@T8L zSk*&~TrTi_sn^3gZNtUd@y}Oia>=-FL3AFAx2(kTI)dLBk1)QV@jL#w37kTxu$hfL zRHpw!yFmOck6PI>NYEwEoI?}kD}wyTdQ_UqZy#S8#4pVWqCC+;cfP z3us%a(5A|VW967TTFO?<*ycX3kg?bac#!?d=C&6u2OX1|S2~~84s`Yt-A3E~`9b_Y zWnFnZRofT8!?}iQp1FpiWJ*!UoS`B^h9b$Br-U+3oN8)#I?wp?rJh?X$l;_j2lLnZ7)k{FpnoonBjdkFQxzzL>Pa1OJZO4L|6&Hy+qq zx1t{|o-zEqGRj}5Z`&PC!FOIl^(6N+9XD;i1=Y~=G(-hF-wc^bP151HJ9un9^oVH7 z-a%Er;JJr>`8%fa+2Q@3HBQWZnxi{(pD>~n;fsX$&Lh8jO=lV?jq6}50A)e6RsXA`>sUS;echJ1gcj1=3UsV0>mjqPrwv8=m2zlDcku=hK`RQSqn<8oz z&M7ZcgKscC_q`(%Ye$ud&4^d_<}nU5b>zWTbq={(5G zU7;2zr~I?%5f7Fxy&eU^k2=T8(#<2L9x6|VeJM=wF!Li+Q#&~xo-9x1DsdjaVis&K z%T=uB5SQp=M$uMC8`0&g6Zzg}^8I1sr$hNZmJW0^wfTHJd16^d=Mg2M)DsVrGqzo* zavhFZirar)f0scG1sG-byRT{K_Zxy}nARt4V28^#E_dE&NHgP~Wba zt4>TKCe!bt&%rt57zk@N9>K51hgZHobuILw&*l2cO95xSD=)OiBURVBCfCe?t1d$h z1I?_)k3Ibvb%~{FkFHFW54*(XPu@5b(I)4frB0ZAJ4VfE2z<*=>)Nkq=k^Srx{sOn zINwL1R35`~E(a+?ml?Ysd0SXYTt86qWv}*wZBz6Tvq41~#cyli-8qsvUU5DcwOsvh zE|fJj-Mb4hH#k#nTK9THtHqhogYK01CCN7wzSp(C^WN7Wo_195rSQrYR>~gINqW!w z@zRITcUli-Z&2wT2;SRZzDSK|dZs-!aO25)Ce5ftlAgIMieKs+goVhyles7%lOk8qvb0yk*DF@EoJ+G$>r^zL}>@JvC7#p&wy=LfN&s!$Yyn zYjC_y&)YI~Fe9U0Ern^OP;6`4JVLxVYDIF~&VMripKznmE+wdIjW6LwM%Klcx{UlN z(_ArUr6L(*QuFYRD5b1y?)Y|e)J~7w%A9bcWGI(v)THas);hB>q<3^{w4Lo5e)Tg; zHxKo>)C($+W|oHW2$PLXu}lg{0?Cb2?TBPAN;)J;G3i!}x9)UgtBJ-3Jl%EwQK-ze z>bna5r<(KOdjzxWL|#u}|D(L{6oXmv+`Q${W^u&abq+=Oge z-DsB2M!Aw933paCv&Daj)Z_GFI=pFSL<;=U;zhmmU>ES%)Bss%`fZzJW&t8i`exzwn9C~1{ z%(g98E&LGeh@oMnt?YoJ-Cmf93ICi@*e)L{4 zllRIHNSY+mMcBx z;9X3YnJyF)mlK~ZlpXDMaCC^YYhHS;yNyy)#qs`WHdP|TKhi@iC0deb-Da@eX$QL; z2Ym|t7kS}i;p-{7_*;CEA2rtv-Y`bjxotJ-YRkUWEu28n+&eGdG zge)IDHKC)eBL|{;=6haOSzMNJp9-SQGjk8IcYEk5BVHUM#hd<|Q-&r_p1-kN)ljTF zHqX(Cf2$+4@HTS-oynudEZa!i(~Mntrh1ad!SN2>V)*Nbv{DXX0C>$?H@iM-f&&u$-7UM zg`B%eJ~);od2D&|DennSWvc9FV@dtr2X@PgO6{!JHp6gx)i}A>#r4sYe?i{)%1}zN zT#{{D&pyE_8$RrQXQFv<-*jxNkBlae1-cWB?f9{w_8ad=kqMLds8Y#w`l_Di?=@qKHT zwCxvaIk$r4Ny$2Oxm#`Bfl?#M{^H3WuEd0xHudE{ev2>t&r&)dB9DAI%Dg*&(4=oM7vDJcHEX*e2;M( zuebxFK|yI7|0}w$Rpb_xM7@s}pJ~QK?XrHgb=c}-)Gony{VFbuqG7?wMM}3f#)6XN ze>X9mPLeMdP!Hn>eQP1`ZKANCA2Ai_t+=|F(KSYMFSeOkfJ6PqvEV)DE^oYalwZ`# zr#JCGPEVoo$27bT?Po|E54fGb+xYy_>ti!Rd9Op7^!6pc(Ajsb)HYxJlTx}#m8sP( zoLs@+ny|dftfi@Rn$8oa$v)8r=3O3oHn(-lwi>36oGc>WO6z!4LcIRUrcqgqhgv+S zZ3`EgO}D-ay<*u-h@bYs0WYJIp0O@HlkRo@ir7+vH(`H}=#&quHCS8ueVNsh`E;JciDj zOGLReGlculOY$_$EO&T^tV@E{0!dfHBhF?1+U}xPvUfKv7N{#xhozyw++mLj_P?*TKaY-muf1! zs&Xvjxv9dYyeDj}vg*TkcCzOMv)y4iY2MAEJvpz^e%F#bp-AWA@!^p0lNlR#dF|x; zTFytR>x$@XQ(3qoEZid(!Y$56z7Mx>jjRZdw>tiL>rAW^RXJ>Bs>p1|w5st4RNgHWyDNu(Po5kHlZ>7gu6H{eBm~uzzk9&f!hc zdtIsBap||-87A5sIy)IB!8=Aw6m22mq+FG@J7ZbrC$o%KI2EQ6yBNGT_4(eU6suT40&B->4mJDKd1GCfbc)AF+M^3WVGO-^y8pUcA#hFeX2xSV2 zb(iSdm+e+<8}P7w!+tEws=H1zV)&_xgF@;=S2d^FLjT!69|nA^S_n~D*md5><5N@MFBJ7G5LdU#c@O zA^SRPCjGu8<(Xq<+{X8vqe^9gh;5t9po((de%p2#VO`!6O@HhxGg=DPS<1qXZ~FO%4Oxg}vD9(k z^)>SJf zhW8;a_*Vo&S$geg5!XNsN15ua3EnRIjBYHN8HHMp(ms(I2=DLR&%9RJY8vJjI>VOY zY4+%)uGO}J+Paz|yJul;T8MmVWb32qE zrTA;ly=3lI&#vaKKIP!r$oa)*HPkP0>RM2(p!TKh-Y*Y#$7B;4`c|0cPFGBIR1Omt zrZ^7LP}pk_(FCfKczjH#h%~LCupFCYt*V8``TnJQJmIGwoKJsK#WIjudiq<|8~Hk3 zb#2z*o5x;ONo8vNVY*U3WtcGa;+u*SxqkZvI(i8X$$~D)lO`J#;?}tteuI{@#Y)#G zwC=(~*VESmb!;a&XX%eED<$IIRn!{oAFkhv;PVqN>|`kkaL^O<{cyF^$gjw##64f^ zP(jMwgkCo)K1VC%uG$|l^p3|QBH)Gim3yo9+Uoa}>icM!gnJ`hc^4ygl52{6Tj-1@ zg3Nb3vp(!sZJql#K;<_lu9rV5ZEN9Kl)ecTxBUUj!#{6f721U?@Zt&5I8Gc5jt-vb z5yt(oZ6$t?h{F}C0EaJ#37JBJ!`XT|x_a#f<#Z%2{-dg0#X$%T7teshu|UN?uXAAf z3nIe5{=j|N5|6`qlW{o8Kcny+Kad(iqy@@b-DOq?IGh9thvWShp@J32jv(CN+7QAi zP;f8##66fO3dZ?2ZI}b4ae>w$WH;d@F9x`VA+Qp{z)2L)ZO1_NFv7*ZT2Q&=5=z?QqVq(93)un#@Nzdp_D-$170X7RK~z#6dck(0e2$IFJQnVC4#HP;j99QIC!?~ z|6UyjQI?dTvcxHk6m=YKrv&~#40;5AD_esL#K1s;Z-)*necyt^$wuS;L;I+O(cYsp zb_QMCTTUEK*ZIFRf&)eq7=>-2jv^ESjSom2MMMP*wI}Z!f!i%7gu^lYUxjc2si1ch z*&@)a-EKWei^I+HOgu2UgXaRHDB9J72kxIClAJ&! z31b7md;-xV@C}0G2}FY+_8s(~_VWS|oP>V%MPN4xF$W&xKnpLKAOKb-0ykKlgt0bg z2xvikzps}f|m@=O+jh36evZlw+#3`1w-DE1#;66d+rC`&=RE&5P+LL z4B0vjLs}msfJ_4d7w}$01VH8#!c3$Z6F}W@=t}$sv$)T|9xIyx>NiA_=xqL10i58u zHGu?rzrihfZvp6LV0hIFKo7NxJ|JoarWNx8tuu(E03BO2Q4y|X)R6yg)BS=0*LSEG zx(<#*OU#TTfB^stEBlVH3O4>`Tr384@|$$6Hwb_w8P-P{f%O?L!Lzz(-30MeAT$f( zeaQrNvxu00vHOV;bQR@!7=i1bKEZb!*&ufo*64i?d`0b^Qeg4};Q*?0aNnbUzjgq9 z_Pg08nL`-?WL3ffU(I2ODTJoSpll9C2(QH=(9FY6ivsz&deauob=~kXiwL0rvY$Hv!ZRz>FssV5_(FVeFIzm}JcKwD*3P z#CcPaWBs5RiV2M)fcXcs!IP*=T)_Wg`5UyOuA3AOra%J9WLgB>sC~1FC6XZ$;XXYK zXCwbn@)aehb=ss|St9_uMcDS!G$3aYibWW}@FGknm!#2R3MB@cBuC+C_$A2f<^ry$ zji-RpC8$spB#mo8#eq$Vm;7K3GKq?!L=dnOUN>a84Er%J0eqLCPAmQ6%k5C7x=AN0 z1!|UIhAVQ3E6^w^0qqqiAf#-hqf@-O6K9n{1j=mF22H4q(goRTaL?Gf3S0j7Gix{H zsW}WxR?!4UK|E^bk7KfNC=>kqX>6Nhq9=$T&<<`6{NDI$Po@yEPXU`X7{U4s8bMs( z@5hO362~}!VU+o&2M}L}w%QxZ8L$pZ{rkSyn`D!hKq*Q;69i^ayDc1(>2APM|GvB9 zCfTtFA}EW2rN(W*&ckzbPdA|0KLHc7{|WcsziwCr+*jcKTapO0|AgaGSs)y>BeyYL zFXRdQeYyRnB)oSLm`Jecr3uE+=sx^QlyHZ{CGgL6WB>gu`&J};faE8&fP#nC`vsQC z9S>E1ucO|S>23qaM(N~tU?C5}`+*P2Cj-W*-0!|WIgeo{rf~-ZgpR6(xWI1SnL3C8{6xdD!?fhwQhK8g?5Oo4K=t%sa zk%lBk&^(KQDR+bnq8Q-PrIVh>|NlC4J7WYVTFBn-ih&6)$d+V;Y>A8iV&6H35y*5T z3W#SRQ3%X#n5P*60M}0af9#9y=$H|q*8}6M)4?3tUKsf1hcI9{&eFp;MHeu47ClLx zuznE(1^$q;PEV4=9|3W6BmuAz03AkT*ttv}jG`a-D@-{VhMDmNgPd^V(}I3&0f7O! zSN#BEpA1J0q~U?2vjB7Vi~ zj)Wi0bIc?%!7K?+OQRx~6{Ls09 zE7~Xoz3W)l;G#APx{{4TD33xp288-(jO&u{E2w$mub`bV*w~dsIQEPc)-x1~d7=UI zFtI_;LLBDFNP?b|sE6hz=E-D3Yrci`{CUc+@P(b zB*1_u2L!H381O=Ye=-K@P!N-X0TxaOGE*^N4FSFkxU-Y^Kp{F%myWjEh)|n>&Ac8L zbhc(gx@Q{Y#`>`xGn zf}j$tws91&m101HpTvcQ%q_))1A{Qs3_lDdU50X$;nq@nC8{efTs6`qyV-ev z%w@6k3|1o5T;6=WA-VM|e!}tcx<@G=8)^)Lbb~NHoD`nE|75+#zoUy{>N&STD*icl z<`2Kwlidgk%E@KfHleobi58i*sSP8?VrxI-4QSTGv-h7s*T(j`@BLQe!^Jis8hda~ zc)XQ`&2SH5o=U1rf64M+)w+N`T@P2+GrVaAy@kPawEZEETm4uUnOxTan#r;IE#p5z z`{SD#$B2Y-ntDE8xNzze6dc$37oRX2KL}%yR(qkZpx#hcRC?O0e6ILcIYC)-dTT3b zgdl=n(1JKqE{^MWRojI9Hm|TY_o-ZR|H4)i3IBj=!NJEI6*a6SZRh+^%+*vC;Hi2% z^f#yco?u#QvZ19)qvE>#THc%p`{h^(sy`6Ay5<+WhGCR~sBJ!JAsNHido$-yPl|4$ z6Lud2+hsGW|Eb0SP*b&as7M@Lts#Q1ZvionTscmrP2&MeJA{Q<3SDnqb{o0 zTc=m-66ljVaECW@Fjx{^z3-e5XX{m8Clo%L;kHu3_LtWp(3vgOzWY!EiKUkZ@&Kgw zr6Ye8A}}hV=>HcCm1$Fs%9z{|gU8QLQ$99>#NC4&!#hLG#rL{B^F4oc&rb^S-HGf> z3?FO14~A@y?UAXzSle8NP{Lt!l=zk*249=q;T`L*luAg=mDD~$a56XEeOyY3^8`Y* z2QMU6+%wm*S_Jb#!UL1oo)x+h3?nGK)WNM^N&)+{cAnfS&sD7k>_p<*^joBfVMh?2 zaq?@-jQ2M&QhVkp$aj@n4HcCoivSr}K;$A!RIE6EY5FPJkd^|n3z5WZ<>2IRATZt} zpm~k@AnlhN*B_xG2Bz8>@TVu_70EEvzRgetwVz@y18iYB2KB z+*bIz&N6bidPYbjAVY7mgZ$((M%B?e&9<=VjUrguZs?wkyAM)`E)g{?(RyTY zE(=w|eIdvZq`YqKr9H?UTpS(BS<;=bbx`-cn><4%Rb%M5)cT0R!a+1MXj4NQWL)Sq z3%6XwIiUfLg2iHf#bQKJ3krwQ^Q5SZH&kUsTOCO~Z=+GG>|kDSL8mO|%S~tx=1Wtk zAkg~y;!tyjHmdB4AAm4#&wS9ZXaGQEb0LEMfGBdOWiN-Jk?6sBfn-~+Z8YiKZrhsX zLbfeEdJ6;N9u!en-Un@JH5jxcB!dWU?|y_arhpHg>0gew66s)J`{iLcyqYqXbgvlM zUvFr3iGH$JgrA26MoHb;aD3hiV*Qjfb!Z+gE#blPO)1NIZy*&%JE)gZ-xez019hNA z+9lJ~;3|$|U0C0OpBfA&p9z-^k~skuesU&`=fj7DMqBiU*ERqAc=PYc;WIRh%prv3 zTt8P|Ijdtj)5PtT@JJC#?>N6S9sGo zDyZF%>L^AA9FUP=X{LAi%o2Nls_}HdHnInC|H%ETMl4mkFTtwDtV$5tMCc21zvDx@ zg`oy(?O5LZPeJwv>~leHPp-U~;fgCs(q zO+Tw%4dMRe&KB%$bo}-wQdP*N-B492=Zs6Jw1JthInW}ML6{()57&nhY-Bb)b9I>a z_oN+|TUxz9PSG{dh)Ig!*2o?>CwnB$25E;kS5`gt+mCOfsH{z-gf4MLEjqJA2@2|W z9;Wh*Se`{U!x5g*@K|~;Mc?^DgD`>7;F>r=F^ulpZjF)zVF;=XDCgJb$2_zZ9XdO9 zOr*WyaZnGE<@1|EeutILQn(&6c#(FeQqZ`+6r`l96K?&bX%TqQcr%igNB(@^e09WC z_pmKc)Jgti$1v}&hLV=RbST!lqyx_6#hi1U6&SNj$O5+mLN;mhG|<*t9*f6Bs_bV+ zEt-=}ntqU1cB(>R5kf<0z&h+4Bd9`O_C8Jx<-l+qraDB?``nhSWh!w? zHG<^JZSd^kv0+Z<7MrUj_{YCqfO|Qbe+t(IY_izBVZ~3=FJ0qtW)6hj`+c57 zIM8I`7&$#c;~ST8FlO6w{2&tTQl}LClu&Q(!SAE!ejg~QENE?( zrX>bV1b?S7%Hm@)FmNEm{@z#Nn!QF*TL+0Ll`^FIViZ5g;Pw;v3vvfHXVZ|Sn@+*< zs^F_2z~6YOZs^h(Z~Q#n*!SsaL8ftE=`Ylf3Rhr`Ki8SD92jJqy5JDxaIe{(8j$_; z*6;dU$243;AZN%X)BqyFN?R-K84uf7M zN=#`55%Gm6_R#N+5HAN0z?cbVVAOyJ#iA+4!jDR{E&8^NM-l=KqW z178SZ#AapYVhF<~uEStG!5?mLu%L<)pmXtQD>NCBGC7t$%`m3DV5#%5RfZ)N(^WAV z@{XCOZBNmuy6d^i@>k#{J0e@^GfjcSpnPE!<-v1f8+dAf0h<;wWdbL?svN?nzdW+Bsi;yxQOU>cnWLnEa8n&? z3mLF1URWfJIx|yrOF;i+W+lo*mh|~2E5_<4A<3c{tBWg$m3=fVVSMLLXukxY%atGk zi?^F=ONaMQJ7~7HTttqD#4KHs?p=;*vE{joprkFq@zU#EY!Dg33(tB4By=DG@t zU*@tIGYk>5KF}&hZ#ldik#n+8ea<)IP1-E__x#zwSC@M4opb9K5P!TxNBGx|^d&-opT05)%7cxh>VF zyYZp2=6N6Hi)MG1-)0x5#E`8kVy)P{BYZ^#<5`NWy>&I3#zRc7pXpw26oE!|n|R>8 zJtC+e-b;KqwBZ(8*ooXAqm`KC?SB1g{5Sw>cV4H}O7ve3tBC!c1zW^bJ6@e2A>{ep zcClini(+uEEEHcDqb38v?vq#9zWY~QD&;Qsg#tj{$gW3bZ+XTzXb26zu{xfk<~Z?; zQmwM)I|`hEl7E#KilV$7YK2pUcBgz8G?))ER-nk@0~CG|bFkkstUSrUT@_>-qy~an zT#1T`PNZm6bG+jBko?T<@3OOmkH+13k`brv17uSKz>cX<%0iU>8nX!+Dq;v^|Io*q z39A62#uC9RZbpG%2{Qc(`O(dyuJp# zz%9wrm8uDQ8~yzUg%|Egd5&HWaP(Oq%g67I6par|e4RWe3gSNYf!gmgU?9;SR40ak zw-$Rg{$1MNSRf_L@ngpqDlqg?HTf2j1X|p8nTrT&t9~HuGp3kCybKld7t2?J|qX<0Ai&0}uDZFX=L@;QGc-9MQa#G$97@uR=QLIpxIYI~|Hr)_pk9n{9Q z#y7D(+_))X40$;qP?|yOY5=x~(9VH$i`C(wm?L!I5peCXcKsECEXh?h{5F0T75hT` zHQYr^`j_Aj2?LWWKO~Cz{ZEaba4+zcqApq>6!!4$`<6Q-9zobt*Q};>?>0!eJ=9y( zF*M@VZf3h@bZ2x*weY8NBj4?rF0vivmDL{aQeUo&vrk;qHIbFHp2;Mv+&n|ej;>dh zf3ed6mr1}sM4*1Mf=Z~$1U+oqatdP~Fg14n$lhez+$yNE07~^6JmVTclIrq}m-ez~ zs=puA;FSTSF}l{(*}*Zz6!C-VjNfQ84(F22K(B` z7u2o@M4n2Gp7XvQq`}w?V;Dz&X`#;0fSlcfAT2uzmHk6+(}y6Rxz2KE_>;Zt9pw_D zKKHKA@=Z6=o3fgYjbxrP6voxUJA(}0)H+cOUJMT`F#?pFDmKCdOtkxy=sUSQgA%D!NWa41*Q5wLm0z(h|W($lZ%Y=;zC8o2^?ja%)=~3;TN=d z?jZ19es{^j|Nnoc&Uq^c%S(*?EY3v_XW|8h7po&d%2_V}7GL4`?32SJF6C8LRz0&z~t%?}&1dX$h zYUlA@3%kHx2&{zA&Oh7GFawlv@-iUo_*G6}jCEH*XQWI`#*Ha-}xVaNDcIq}6SQJ3?4>1HDmwxH|YPv4_h6gIl9eR=$DtNsn~MGrj7% z-+dKPa#fX@%N_6PaByjtY3D~r`|p+Tc8O~`!LhZ<@u^|_T;(DtuQ+9b&9h?gvf$vf zi-pm`G%93*(TkLvGAp)K?|Uq|Yso`ldLeV?pjdvHWEDset26cu7wLz?^zOlFkos=> z57BGVRijza8QTdAR(bEMdda*B*bB-7!Jh5PyAU0LfKE2lzjM%|_|-9${Hfd&X$v{I z_mqOxJGK@7_BeJk6*D=_XPyqS9Q)Ifk4dNZv;3_IeSWXYz}3{o2t2kh=<6>0%;K$m z_;1O8{V3JDuE0;cKh6X=?HzzS5ItVV{pAb&R;e&Y;BO)r))4lyYj!6YRcF~vD8s4~ z2&{Z$y{It>!PIEr0~?s8gsxDLxAjI+mO~F1VMowWqMu$+G*zB~VcIU_y$sSs*$-3Bj!_oy6bLw1EXYlre;Mz9Ey!?U?pvaW}|A?$$M;ulZ_crJNW(Ew~$O z+#1G|$|$)D`MD`VD|ZH5zvux^o$@n7)eA8M4*oVVj%o`tmvzQyJK=>29)l z=D)k>@_~bsCRxHit*jlQ!B8N(n^dZb5i8$Y;qtj6?4uAyHt5kV^{LRd?};x17!&{3AN^V!YFiuv|1Aj4mSzSCAqql^ z(KjiiBrn~1bntOtA8)i7!ZdY}SCqTy*-FNjcU%>J%jLO=D`Y9%`u?8u{IlD8n|+eu zX|cn`W@#}~UHF@X6p@j#a%^&j;Xf%q5hL%dCW|^Bo6JprSo;V|lkt0}SKd2PEiFau zJnpZ&65pp^sw~%0T6a1*-NBDMcLgQv>a-M(_cr}qMk>q-AV+AgYc;+OUUs|~jKEVC z+Tv*o(7uwnu*i{d`^Q)%Gb(M)XvzJJ;;CU>kb! z^Ampl1NSMqlD4h4Mr zmG8Ll@)Lw2=z1B|D-Rd+68>R(xK0h(mZa0)-tTdNaO{*Ufj+`H!=Tn97X084bHdBzqlzx-$%u4NBmkQx*MhHE;x{y|w2|FC9n| zsGi+KSAfJ5zQ+ZAbuP+)T5G=;EOv?6lOk(Z^NOKIr>hDs;-hw5+}<@em8Dm+R#ZNj z_8A0J3@`bd=?|qob#kAt?=E0q_Iq)Ngl66NmUB-7Ml-)|6S&{MOh=3nun5`|aNYUN zbMenQcm4nTN+#gW3~l3M;3 zY4Mfu@AHqvJJAI9il?2Se=l7!Hq&C~Zd5d@=Epi~&@xr6?F9c9+dqL)0cSTmfsCj) zO~$>MqR)+-J;vLXT}Gx!qiUwF8kdPj+j7vT^Jked@n`43$!Dgu zF7@i(cyiTe(unmH`--%oq<@QIg@GEOw^Wq2nq%gOZ}v0#5#cjP@o58W0-N}^*vkA$ z!ZdYLrjPA~_V>3#`YQSJ*K*Bm*6znK7v~i%`hAr*R`VZMyu60@FUJSF5-wT&JQn?M%0@RZJAy zm2wRvP0I2)tqTlau-TTy-J>;vx#mJ?$FPDrhVaOT;f5}Uh(bP`)3YaYUsf!`eK(<9 zY*VT<2=}U6emgs_ZJtWMm)tCO&{IUlMpQZcrWA;10)&to*>c%QxL$0V9 z;=C-jOpAfoJ<2LDRYssW3EdW8^1b6?Pl%!LsP}oy_2=^@^Q>R!*^+pa?TYWb)L-u8 zEJ&?ZF<ZWuDdkU;0_Uq4E1`q#4)MR3|2BD> zb_{s<-@E-^`0s}64R>)b{_UagJ=^+p_m1bGo;Z)|$Ws1^XYm_%F>MzP0t@b{j_oc> zh~g&g?!Okt{~TqPqb8^@$XFQRH-a0i52QXg9U9#e$Sqz64|xir^mUGy8p0*0AyD@G zVD-BFN~ohP?HUf^N7^kM1Ul%C1L4cZGQzjvnE(!FI;)OtCT% zxVMy#8kYdaM8;AO_`Mk)rpkylXGSY-0G}h5q9*oW1kn^M;PzcNj-|Wh`JwERUOKfBMn# zXBE~}@b&iB)2q(QSEi@J{ijC#c%Yp&gU-?#jfVh-5=Q$OYHW9H{3_Gpwoj2k-Ob|5 zMc#$sso}Zdnc>CmDQNe6_wTs4aW@8aiVrRs8{R<<6+Nl48%}7U-V|oEcoVm#9!V|c z=CA_-<}SYTAE9T@^q)0v0S6h9RGiMm=J}=pBhsx{s|Rh*R;u=8gEeUuk6`rQ@kVd| z>?@IPoF(P+bGB6L=QRhTzxF#P=A_=`_tEc89Um`*I6pl<4M5cx(A>_g6)$=4v^Fwd zUjhq6n=NJPtxn=6a)}pFKl8e_Vb>GQ!UT!7e*MhjUdabP7%yXS<7y`s69gS-mriauTFJ=^G;%^ z*nsMSo${)5H3>@k?OK`m^91054mq$-2OA;oge*40cF6`4(k(?OHun1PV}zjEGD2bx z8i|L>8htQ5D;CL_(Thb)Z_gHkq!x;cZOG#m5SBf@b^zx~Wx?*1gnIA2-OoI-i)a`6 zh2EW1@Eynt>0eF(1e&HRGgb%<#`sbo8>M>&!b0NCaf4bY%pubGj$!+LypZEO4({27 zy*bO}7)f%UZ!#dQ6ZtZV%^3MYij69mNf}<9MhAdxL1f3Wf~R{3{BV)@&vU>D>uJ8d zyUR!8E81%{JBfVKJe({~*VI(xviYt|;Vw>20=FqtqoWbZI{s21aZ+_Zc=wEQ0J_wkGe4VpiOFQwAl3e2upDD?Ff+}n%m*QDml1FWC)BkkZ~`ya;3uUr!AL|zc}OR3 zg|8FwML{*$n!R{k$Rq8g69we4FXPc*>R8AzC#n%WzSQ7xcF0&PbV1Rqb0VLVO40oL z&p;(@&pN~OCM|59t5@|l)~`!Su_j$r9Yod9Sh}2$HF=<+a_)R2+D_~5KW&TuJMKOO z>vV{p2T7tN*G=jd)qh?Idar)clG)LwLU~wo8YAnVAd1$ILD9#c)-Fw;u7xxWpW2is z3h2487w5SdwpODHyNE`cbagv)vR6=i+>)xX>SKqsVly?X<6FV8en2~To5BOM1x*~P zx@htH?+jlh#`58{%(a;Us|FfRL})C0D=d~L4;O<6bK|4Qd@)(+z4Rni1m`c6pH+-0 z)R8>zQhuM8b4#Dn@En+>GFBbOG*PBeH09*WXc%TesPG>vG|)Q{IAH9aXl9L+nE!fT zd$4igFC_evlSzQM3yM`9q$fV%PAMrfIvObZ8S^($k){8-K-9OQXhiwA zkH7^hb)t8?fiw3eq#_Pp;;3X~@p(Eiu}CF5Z=42|f=rj9nP95luS{J)gG^vWnph!@ zkFrbHoX@OFN4IKIS}{R&eS;DS{kN+^hGkUK)ZdH)H}Xl3XIWb|RV^182@uMPjbJOE zR6q%&)aA#@#B*FKPmMjJrmdKP#5P=Yw1$+3dzGop8FkhC>;8iJ%F+GKJiCY-^|&N; zY~v=uesv*d8n>;D_nE@mId@g@QWZQy8EIE_4b?r+bKhv6wL9|>SSGh-E-FbYIbfIE zc0JTpF+9eBi$bQq%s6(N0ZLI+QtuiOMBJrbZcjk8EU5IrIB8e2#_+RvZgD!>>0Pim z)H0h92*H8J&9Jj4hh4)0?6GzQ>l^E|hy6@pWa59xx2w;uF(%d1+C*zwb83rDZ8b`B zFGH_b(`kY(Kar6L3mPb3i5=;;N-iU_fqS1{I*V!rdhC`?h3* z=qAr~=q3)YMefq8Ua^V({*Wh{X2D}XL3#+`wrLb7h_gGJP5h#Nh6RGqEP5kwyw+S6 z?Q5Kj$VQ@QD5~~aN+*9mKgY!-E*zHC{F_W{NVNbvD#1e84`Ngo#-zE0A``7=f&d=kyC3?9mC#PL0K$(lB~xukeKSWlF;_%>_EWWq4MMA8N-}GEOrE<%Q*;7Bfri?a3O|!huMF4KnlOX9#_cV)ccND=jQ%=UIfmm(Y5Ync=@KQRF$glsZ;DqdmJch>Ld)1V2-KbLOA#DVQT+Q3$&Ytu-u%= zPbMQe@J&cU%!)0y(sr&WQ^I0dNy04S70^kOGXqV`MDO3Lv?!jjc7Y9E3{*ExY{%wo(L7lEQkpnHqr~MMNsC-U;hBHr;_gxxwR4#qy z?Pyu;s|z%Ix|`ff>thNw1QS9HK=}ak!g&q2^*^LcdWJy;2ZDXpSHd_z7(hXsVu}MW zqd@P$CN1IAIOQ;zfzd%=$=jjbS!l|?_4!LT^ad``uxLi=P}n4w zYZL+_d4OuP$^}Dq6c#irs*$)rIn6>nP$Vk_Fojz?(i+&FLal642^a=q(kPb?VNy`i zvB*S50_BuUx&hF@7*&&Mz#H(8CZS3^4zLc4Q64A}PX!dEaLYt017p-oLI8s)V`?Vp z0I`%YC6friuaqCfL&6j=)CslXAc}XIgwhbz$X?*1a$y^wCs|sZR3b7L$PG$9Ef8-6 zcu?2@-84yMA{~HQDbi|%RRDOPn=+|EBsYagN`x}0P9!x2Jy4t$y+FJjUIsusgQTTiK`GTRJ6ezTJO#mSpXW>W?-CYqti2AMvuuF4S1K6d$ zYXKP2+?4@=aND%E^0f3nLt`~4ab5{$vp}uPc zIMUpe0vxIDIstVwca?xT>bph&6U|*AfQkC98!$_AR}Gk@ZqSS@OYs6mQl)r_MvkU< z$wf+~c!@`br>qqX2_<_;N5ZD8)eJ!bFIDyfD6Uoa4JfXaO+aOUE8wNlz5_)oU4wjN z4bUsaN_C)UNFTUWF+>QoQ#NS=w9(ypQaGsYYfv~S?^{qfsO(EpI4JG=P;k;X=Kxk| zonrx-bj~RNOf)QOu~C6ah|Xo#OyC!0ehK zZJ?c!Ne=+xA~_8O8v^(LsbC-rmEqHvu^|jVIiL{U@Bfk(=;v!>OAS)xi*ZV2garzG zqB$NRyjO7U8|dfFWJ}inGcdMJE79dq+V;;1e`adu+Bv@^n|FKDAF4c5)izzjw<|nI z!`X&n5ux-*Zn9g(R9Lo0h9`e9J(Zcm)c~Umn~kIKLCsNxvpmQeeNmLLF|Z={3=`zo z#y{s*>MiFk=bB;Dv+W|nkzHv~X_7gpHm=sso@I@_nl)a5E4P?2kui}D%uuk%967Y0 zHO`z+n^GH68)FZ(W?dvNjg-SkAMve$&W9*FrP4@W~2Wv8T4 zH7pgB>&ftOK29mJf*9QMX8mG+Qi4rL2WAi+qQQ2@oN21gg97Cm=D0CR0SAr<+K+)o^fnsSCNJ2=_@1h*ptOETKM3z1=Iq|Fl1CPv$ zM5Q37@o?@Zl0}D7SZ+t0#m~#`_%nl2-*gYw7T0mY?9MD}zD3T`nw?kr->djSgi9lb z#+}h;3Z;DMt<3foo!j{FpE@5oMO!0obSK*bZfuM4a$Kz2eQpAal%UcJ?zI+zBfQco zo*UXyx{O+*&TKlpkqX%iM&r)(2gZGBkZI@d2Sy{_yfgmNSh*9IaaZo;2{+C~Z8`k( z72g9|k&A9;S7A%moWHSE&^6|~+xRCiEnWxpxX~Ot?FISEa#ii`z}mK8>kh73yyloH zCUf3cYj{;QRg6Uw6wvnAvZA@o?TC61wsU=`o(s6Dz#@&@L&mXl4{vG5R-;a>!JyhS zMOFGSvpTCf^Rz|ga8B+xLsma~4xj!^!Q`uCi5!)yYuR1w7C}jfHLWU(Y68~^H0|Ta z1fSg9$i$hzI=(NARU%g_u@xy!Mj!hQeza>4_1+S9X~X5z3uvSk=E&QWE6}BT@lw`9yLIBe$JiEf*=L-;#-)whSjsBk56F^VUAHiwiid{@FtCrr;jb8^NdnDQ#Vd{FgI>Ivpp*I zf$#0!e!}gd_Pz9Z_O;!DApK&y>wbR$ULjry)=TZxg`hU~AO+BL?>w7$RbD~wd7qvP z%kKZ({52D8#oMBsbd|Zbbo4f+U9;La~ zd}3xBE1kMqWsVNcb8c&AqbL+4?K6XGg9h&o-;96k zFALLU9XInUsx_@M91}feu1%Xu1e;q+)u2s!POj)J3zmFst0g<@R;~+U`D@9S{pnbc zgAKi%o%JJorlb$vuKiNEa!9l1<<5rHRW7QBy%kd1VcxmHp~G7DSWmr2kgp&fHmxXH!Un6giPSr&ALG4tv)9g(4S#HaJDPu{C4&V1<7f&;9pAKw|RPA#? zT=g>Z)b{Nx*)3JJ=&j+b=q*)NsxB(fpb(8~u=fRDy=1V`PGN6^91-= za;w@9LF=~!{oJ#Ag*2wlbQ3-O_s7{XFWGJZgZQNFm^`&aM<)%&hd!jD%~mgvludwH zc0^3mu(h}0P~=lFVi(%|iSd;vSJ4zFy5;2R4bD9kKcsX7V)+nH!M5Bm2LJKE8If}; zm3d`%XVip1kfrySzF<$Ye8j^<46)+v&_o^+%fbbxLvisXd~R_?iGlxOY4K$vd@go{ zKBJhYGy@2teu~G|`N71p;({C$<->J8NiA>=0&9f>eW4?<2?ul!IA7HU(z%AsO*1p(KNs^o{qi_aP^PsRo(k z!%;((f+YpX^obimFoTGw!A8JMgZxNP$DmyT?TnE2AQ}Vb`(*m4jmViHexR5^3NaPJ>5P0J=;CHJ=s0PJy#>4dem2>SDag#Tlia+Tl8DSJ+VE= zJ^wv{J^ekxJ^MX|J^8)=t5fQGSbOGs6npA zrM`u`Wv&OozQW$3-ICPfdx5P%Z=r6%ZXs?#UjB~+^=zLDWLlLf3-V!qh_4Le+vX!ZJcKLNkIh!Z1R7 z!RW#1fn}p+!`ea3K+k~d1T_UU1tRzP7!j(+TZ5yg`H?_SrNxmzz=7By)*zvt!GZ*Y zjZo44e}!7#5Eq60t+4#gC-Vh_bX1g{AN~dJ#IG=k&uJ1T2uE%Ck!skuqhl)FQt$RSBmD z$FfDAngg-%DniGyqHP&enqonQ+V-bpCD_Wp?rs>e$}>{;s=$33m?$0;I2NYe@Oo1h zSalTWtSD;k^%dFp=@Oc@q?w>K7V-Eq7OiUvFq=-0c#~HCR)FmDn@%q=N@oPCb1?bw0IyoMI_{V%h?S zR?zAJt(1Bte!jN8<>}0qEo$1fS+Qer)~N-5v@UWxp7g~*1C$-cziE75Zi#fIyHYJx8pR5g1eaJeq?0-Ebd|Jh~5<&hbFTf+Y`_!2nV zHG1p({_`Dnp%Jr;s6eVgOh80&LG=+yAZE$n;Qfcn?83t%L=WjeL2i+vvw-w0jGr|6 zy`dh=)-UJYOpE#I@A2&`Ekx;J?v158(@|?eq4R@sjpRMJ4roPd{G90cE5!I@Oil!r z9>s>{8qk5SKTP{e|0?7;%J$qHG_=vGUbRrXzCsxZo0f9yoxAQKq54}&PfZpx*Wkw)X*kp!Gfdx%mfF1ms* zn)Gz$Mtq=T##|_`-LK_6uiZ&wT>JW1(NhS_NjOCj5u%XY9#8AcAAy9tUbp%4IwQZ; zq~hVGZ;^3%?V1I>lsx6*)pO@FLTde`C~@SXV;a&$P3e{Ij`L5;>vuZXP-^2ZC{5j> zqYt!Tsf@V^31Q``AWbS@OBS+Rwmem4<3Nh7q1r>@QeBG0t)6}z449;9k!Wz5g=#d| z-pE|F#v{wiy2WFc%}Oj4Wn>&joAq<`Aw)89zn#z>5sH{)(>>A-z(?6ow$YE_q7`W4 zJYd^t=;-KtUT+^CGuVnck>Z16@a)!wsBLchzz291U%otfYih6Ia8vK?{ZG8CzW0{6^6_a53Xp{hUL)1uT zH(6jLro~^5P|vI7?Hw+bfo9QeG9q4@9X#WOUjxCzV`A~J;jmc{q}6wnj#eSstJ4Vu z_^7pgb~~bGfb`YvahZ(AUj(|Eg&>B0Qp$DugnK}otmqF$kC^0-EHBNQA#R-e1HG~M zXhiv>&_%Pq?AqM&*b4u_>;6%z^R*trOcmGz9(xIy+caG=e*%Zw7u`XmFEiV6v*~?= zUCd5CHAMN#r>njo5rJ5te@u7_jG~*tKx$i1YJ*8w^kc>iu9)Dd1QlWj2qJq>dlDKm zk-L7i?Fr5}fu(g>H!9c+(@|i+=&u)1RkrK6YB`C0%PuV=Z^(L zSMI{YiAWw&`GHiPlJlitF;9=P#j<_Ds_56Ylpj2P_J{VL1{~moi)WI)e{qL7A|3;a<>x383tP?VdmKno})co zGN9Btin#~A2K_gopZRE7X?@`Jnqy3lg)JA}*<)&^&=sk_t-p($O_%hh z1~D7Q;1c7zL(ME45y>~8l8A%F$u}F|(m5gt<*~s7ap1=nxn;U<(q-$?e7M9BY7Ks@ zwR(vKU6scVT3C|J(@o;}!@p~TGADnmKv7+Tf!lxeUEX#lVOMz{LcV-_(%e%@ zW=HknDDm|jrVfj%&CtnCEMv0sSQ>s*q+21W%0Y}!9~(kJHqy(c9!l(x^TjbZI$zSG$Yd`oDM<5L10ayee+ol9_8+x6 zX}^n}J#Q?ew$72GILkWz?ZvUOmL>N8!|!7qMBI!jT~K;?}TaE0&*zC;U4&UV`&S*K3zb8=SG(8zU zAkRwUw@;L@Af&=>SKkJMS7{vGp`hUGR9=?){5E|0RR8{N8AldrRc->Eb`olHZFT-z z|tg4YxpWV6RhXlmi-6Q2{P@zA9_f| zHeFVCG4U6pV{G)BGa_h=qwtXT^yrRcD2N%_&)6$}8n)7}FIv!|Cw~Hw|6Uoj{5}v3 z(cu>2=3@ZW6>n$jlsd?I*g9&G z)JTP+`UQSCOS2$?tH$!rdkT>o*oy|q)a->q5kLkrT?19B9!=AN)s%cBjfJDI1>pq2 zWRPg;qi0+dMBgi;Lfy8w|A>g!C=CD&wgj&_IN#CkJF65eR|tZ9V1{ph+x$vh%V)EJ zD}UksDC#g-Ji9e4YRA-@Cg~8**MrH|1FWe=>Dd%4_7}BK6|XuM&oela^ryB5n(&bu z(VW$436et~+mumf9*DT>u*@C8TEj3CC8ytrxF*Qivo73NdRYn?X08ioj^hV4ss+m{ z1j{Vh0Vjtqc}hWzuZ>{DsyuxsnSTYa`UP@qeja+fO(VT6f_~yQ&HhEY6^1O{hqWOI z;aB)0BWv*fv*Kvm!?G{JHi}pSABs>SUphT!cvK0Es00IPkMy-uWG)G+MjcD>*E0Kr zw1u3VGvdPezRbSHn$Le<#@5@I`RpPw7o_#o{Cu23#j zYwTUO*@OmMfpy^FQ_H{P%XOz0%1Ku!-HMwRERdtvATSj)ix;7Z-z_3Xi#P=%ElMOI z)cr}Qt!eW1%I;MkL?6DX>Yz=Vv|LaELjx6f=Zz{4FAhHtD4xl|{QeoMV=_RGOD#Hm zUN>~t1|22}5+Kbqy&5p=cTFa&3Zi}y@#a^}$eN!lVg5sGn+}`hqQL1ianO(z@4HWe zJ3D;mrDrRIm22-O?VFxXSED|mhR{u5t;e%Xd<&(3Wpvd3<^>+@dM7jlPY8YyepQC> zSKIzT4El|rgkn;~Y(|AM=Ek(hB~>qFBA6f}N(9@NaouXL(c<+%zAS;N8D zvSVenKEJ||(Xh>i?EsH=QCZcb{`!xXut*Vx)Wn?=@1_?0^SNBik^Lbc+{i^{=22O+hfJfay`LOX)@h1w3sDV@=<7uNbj2Qx>H@>Xeq2aq@lQ z83g0a!RDP?WQKR?!FP#5e5yMype1tISI75QV>DOvIdkIC zwf9IUoo-LxJmQ3-Laax@4WCee-~+$#CZ$5i)s}IsBq21AGZfP?$arXJr=-tJqv>CW zVh-q|#Ei!$d)qK-ovB)`MDEqHFFFA&Rm@fT>kFXKZp(UjzTsCwWe?e{DCw%1Y2_HX zY9aF?yYH*bUOKUFHBn5zAZZOCGd3d-Kojrr6)?7${93aJEjjwPMtwRkkd7W>%ukqguzRy_{0=^HrUEk%JPQ4%w%5N$*w!ia9JP#4LTrY%{2ehPDB2aXo+^xIE zYD>s9C;k&~B20x+i+6r5qxH4JBkhtalIot1=ffj88j|xGPbKQ)?Dx?gs!{P{8@9u7 zi0(k|M>B9X*INLu{_b6{Uy9+Frp!qk&YZiEs&8O!dHZj2)cz88Pp^s}FXv{}{NJy6 zHRq*uI!mG7>Y7aF3vSk^yA`V-q#wvT&rdX-Ny=5IK15NdoZKIuY>@t4yn4$&!$%!4 zYI>iMpW;H^AO`)~I_8NQF0$|mSh@srfm;`8Fr6mT0jVFFRn$YpUWPgHb&EROMInpr zJZp!Do}ApnMiUsnLpW!IyU0Xg$ZIC^xpY)1N2nmSaA zRH-ajA#Z;)q00SB%Kj1MSoyDBxM3PjIXYch!CX6{8jL$bQAo-JtY4+QJL2+pYTkKH zO4LGNUo>B??0U<+l#mP{FFX+8-qP-2OMiA@59uqeA2*Ua_9Fb5R~nt1e?^8WxdK+C_<3*oAJZ>(6f`wxn>nag)Mrh@yZF<}wpzO2Qh%eABu)Gl z_GdlV0a?;%KYE8!yp>1&xC85fJ(@{tINdSpIh~npO-JeuM&=f!zO)vVcrOG0A<a47>kA|olX04Xx7J?T?fr`OY1Rzd|3%A)rYh%7nZ*N6ss_bfE4-y@FNH` zC^f^Hc^39yVx=^sXwWYhyrK$@j6dcjgNmXlMhZKN-&o%4z3#9@O{!@F${@(&uYk+& zDYDpoiY;Wo+Vh@jPk*(}IuF+6oR}_p{1>%oVmxj=37ctJ_<52Q~NPgc z62r1Oi&+o75YEW@LJqIR63(SUtuaxf(`(^3ypGay`iRZtm&Aa~Zs3RQ^*r?iR)?*Z zrH=XCAtZH9U>WyJ=n*UPh%giTl1kCiOLE%m%YS_3Co2tYlj(z(GAfh9QFX*V;=BCZ9$r( zDV*R$iBZ$!#1;^goDS(WN==a@{BxV+a7xAhU0cVJj8WFVMnUx17Xo*TvO{sv+^FjsC%$CO``hVl5M8st?BKT3O-)PZO$({8r zqqQOp$0(DPqZ>2cz6k1X1=tfU0?WEM>N~(7>UmLOZSe=a{fQb-DDT5(a^r76L}kK4 zrEDmp*NtAYEO^yk*c*@M7?sEC)!Pp0=dgo{rwptKn}iVLVm8#`_lj#QPinhx6Rj#I z2tky*e19SVcbn|2njpVTXiYwc!*A9SFH)3-bC^VjktR;y#C^D$vr{HS8OQ322DMIw zf|JzX-*voB4P}O&W~(58Csb;hfX@+zs9~MV-8ZN7jXrP=ldvl2>1g5-{KYJc4!vYr zWD<;a9$bW2!sBNm2mK7?JjjTIRvA z^M78^qY+K>;DMY@D>@FEHS9snA>sz*nO7vZDLs_7qLj4g=9dz|8yktGS&oVvrePu$ zwK!Qo4!xrS2A558@EYNmfPIMTfvx=Rn~1Q53gqN{1mWI&vvMFNf{SriD|oEGt3o`|4u7MH z2Wqq5VfUNW28}iBc1I+QMvA!IVXFpQUyeBk2c-!k5qonh=TueZj^FxDE> zvX>VwgXhVu@*OrQb|T_CFb`(NGZz>x3h#Fh25**oxr`t#po2L(%Gb1^;}tg@A{svK zrcbG$4vTh<20SfPIqW_`rO}90PJc(xqS07_4ri690VxzMPryI;f{xQt6mVek;#ee@rXsPeP zcCyGlJx5^)c`XBekP-=aHB0G2C2}d%H%Pt7X|{TeB#lp!dckG3dW-;@2Tr6b>1HA<$9scU#gRJ-ky{9k(RXNdt=aE<2VC68Dsq9{i8;r+;GY@?)IbpJegZ zDXq-SwjL`e`GS4vOmb{^ea&#D3zLjAHa6NjN*jmcjYNBWSmzwYvM<-w)mbu^>X6U# z?z@+`x8JqCCB1bC5uGbZvb<6!S5ZkS)v}>#*I*{7ppJo>xpY{bI}L`2Yp`nwLOLyz zYv33`tEtaJZHA3tP=DP|tC8`jY48xrRDs&66M3UqPvT$4-FC@Ad=01cX3i+;DVs28 za+q1wZFlPf{#7O``BS8i#b$~|D1fHGt28`dFzabrZ@%*3;^FTV+tu&~{8KSsK89nW z6}iR5Z-9+{zg0xKwqb!G9;5z*ZB=HSFAj|jIftCSv3a1rxPKu)Wi$1oKJ+?No`Fh! zss3{IQvG6V5GMmaO{q#fs*}?eO0_7TOhC;uIb;;JOZ73OYOGYXyk1eZ`NlZ492Cnj zB8{cd*Q|1>R>qbPkF_0qa)$^ggVkiQLr2w;9Lv?1A^g{C^vTtSM*hn~=}oiO&u&b| zCmx(M!E%yWbbqp>=<3MCc@-(oVKT}7sg};IE%wGuhmIWD)CdlY+_|NR`kyG}oKor4 zKcxjxGW%SjQ&2hmu7gu8t22>kdSq^G=iv>F^na%rIPz(9>K_oSVBri7PNVjA9emZY2lvTi6ENkkf`UgG1Z4Q*;wO4-VzDT+P^o-^bHj9PR3w!dz26-^7JU zX%aDJ+Ayit+3VasIQHjBY@2^*BD_IOW$J6jeA^M(HWcbw42tD)9g>YDbGc3yj_m9x z!A0DAyniKA`h$dRS58+m_UFQi6H?n{PHrS`#7v3=UOH<`IqN3~+#$@=6m76f*xpGWnQC|*n9d%6ekD|S5M-}0n;!@j0OL?})N+Z*EM;(AwlNaoki5NYyXu`Sq1Wj<*QZiFD11~*_F zmVW|Aa=Nwz*Vfh5#wAEwU>-N-M9kLj?05Qxf3+q+wKp~F@O=QKxLcNDrEM;gVHp5h z<*E(Czbcd}Q@#&miR~^)tkOiUmX}J#P-XkXLyFKE8x<%Cg?1G)oyA2F>H4W->x_WX zi-Or^AORofX^t>Z3Ob&Nc5cbuakJDoo`3I~9gajsZW`f$=!XKO0N=gE3HwM|O=}~$ zO--p)Nzs~G+m>A0(-bs;#_pMsIQ2Yog-D12o+CN4i9v)ML66O4UDxlauMP=fL-%-F zX6HbXdP>KjVoz$TtTroeva+<*Wi~1IaPk$x5 z1DA8!D7Agt&|nMQ4DId)Xm@;7U}7vmbIEydV@}r`<(l*PW-dzFCg;KLb8gJ$?sfM< zyL%s+NnmIq3eBV?mDpLe{nt=j*L^ml^C{;_jJ;o2aW-oEuVwjN_bDw-wbJ4mmn@E2 zW^pKkXwl%(X=1!-_d^pB9F-(NfPYfNEs=rdkRD1AF)Hgk4a3c~?fX_&C$>B|X#teZ zB1jGc0G5vRB25D5@2S+TnwCJ#;M_>0cys+gt(6b7RRh)-&DX}SyB^ea9~g-f-;`Zj zl~(hQBxe+KI@WAJb8|+?N`qa-cz&WIvuij3zKTLNNN9CWGJf8`3Y-9O1%GE2sp5Am zjXmRdGcE?}ozCij$54FAnTYuL;yk$)VTr8^m&p`aznp`qN9JJakvW*eGwcNPoW;}B z)~h*~#B&990)M$u}zJRtTUVr=ChDQ$OTKApUu;JlZxP1uC&Mf>Dr^zt%5L;!`X<_DZ z+gXg)G4mjQ*0CA7F4^prOEOayg4ep71ATL@Ab*9XD;9ya0#Z~ott>e-21ke-$hS3XwN}D5>KmBO(B^YLFYRVzk7jlMJbMNxYTU(ci}@9Dg7`0HKL((1?z~ z_HD-wE4CxzdoI)m;4hY$$L~7Fcsv>VJjmoUO}t3Z@d++751LQYlNgF~MaIBqpe4Ll zVr4{V7uJZ&RhUA#4vi|EDZpA1id01k534QlGnP{2;8U%jRG{n2TuUmMMCTZ#Wi?ka zdeNXJHLM<(d&lbqseg8$zJ154Bm)(UBq%lCvf)sE-Cdg#wywF2zr$(ofMHA5PO6dIiAY1p>shMwh%$qH3>x)h6@h%I&X*UWa4si+oL!pldl z)t+V-LEO}G=-HX>xeZM^6-A&KET&;>raQmA%U8YTroo$atcIl2EORiwr$1y%jig$( zt*X<)o`J+kwST!~-Tv(Oed9Hr_RY=N18WjTM<2MZRdl- z$EIX2H16f*ar#_{B(a)#oVqB+IPd1TJv0w2=ebGk6n{A>Q+?Idp&2uzjwm(`S=B|Q zAcsm`D3prIOkZ-@G7H_};$CV|8%wSr3QrV)A9%?+!T1IuM3TIM>dHK0)YH)O(SW~D zCU1?~U+Xfw!HUJ}@L~=4Nv|*PHgs|+h;CGGLj|*#+ztmvbZC&%sAe@EJ?JL_9~G78 zbWCPyYkyaKrEg)4;9t&%>%w(9`#j!z2Ge=w0d}^znM%)tmNV7k;*wG=vbHTTb~U5Y z=L@iSH7b9uP(4nGl}faX8TINo@5RW8C89ILX{7)LVsH0t-`t(qHJUPMDHNz3$nRX0 z-M6~NH*n*6cOq0JI3%|dcWDd^WfF?5-oC1tCx7?UpWpYjnI_gOnL=L9%4sBr#nZK? zKRem(A}9!waL(gV3l5FN7WzkhV$GQ`s!tPbnJcFVnIq-`GWI;XSQboKLi z_eD$>UubHgs^H_E5jN%Lfw?>aEHz4b&0W(34GI=U{3F=8g0P7~<(GzpI6$a?ko0VapG1 zOzu6gIrM_mG@cI(_GWF~+}2#jRJ#j&b=_BP><_Oh-2c>;HP8OT1G`%_1|#pZ8Eu@} zz!`>a|HcIClGx172d3J#=KXq$+j#qPdw*+cM|L9JNiArJqfmF;SeK$Z9Ddzdnzr!s z_^Pv*h2rNy>si}4GgTrlTbyn@L)p-G=L+(dl}zSJIGP+;rC37g?K?*aN<$ZGX@lgq zdxIRH!Edgd(y$uJWW;}OFf%mqw$W*`8UA(&bsH)aG_^`AIE?$`kQ{ zT)WrPmhZ2g-n&<3UBZZYIPxU1JhoG|0B0?-0F+?e0xjS|C|C6PRB2=Y)qiNHF`rgd z_6ns4D_Q@2xgs1;RH#dU`d>Yvl4Si?O|R5{a%Gp$xIBG&#mXfyZ>n8$%d-a~BVDzE zmZWuRO}KqcYWl(P8r;?~xIVf6gd-q$&u>)6ujur_bb4}KoJvF0Bi ze{ff;7P??gJ3@_|zzrOIdVhj-iu&eV500+?@{ykLC*MDO`}2iFa`n!Jmg&xbJgeFV zHR-jLn6O*MgxxYGv?!R+a!pKVDPuy*XTpTUYqf=M9=+q49g*bzZyvqYJWgx% zsw)#J9YfP_Kmvwk*wH8%C6AH*t=gcWx=>aOVz)Nhcn$tL8AMumiD&4S${_NiTQ$55 z^+ku^lYJRVr5W2+Mt_p~E+qW$7fa3KH=Sd|1Pz-%b2d7`r7AFF$)qa)BrF(3CC?XN zQ988@I+jfO8n`id6%*GIZ2U3>d#WobgPBzkT2=?7RpYhX^!B#x19f_e(NJ13J3g1) zboZueYuDlN%XkAsJu4w&c6y|zYG~XO_M$g5*t}wu-x?k$6dD2kS#REFlYbDiV&ZH7>Byrpwyq~@-TU)M)#qTd-qn6b zrN4G9r+>poe=KNxxaaoMh1SB_6o(f7)r?9T?cLMYd2loy8ohOW+s2>_8rzVUW-=C? z{{H05(=(Yf*F7=QY_v*xJ#XWUc3y3DS>5^TR<&=GzBZa4??}xgu4~`4pg_yAc z>%(4E+Gy3nOF0XR4^6ECvFo#7R~B?;K|>Z)Wq(0-9`DSVbPk8^<`meQ0s|?~oC2{F zfW1}cg@YJC3sWdJTzM(lZ!W^qU`ZX&%`f~lr-eJZ=7nUE3Mv4!>nZ>mn|v!4o1A=4 z&h(bCsQd}NF)jv&(M6>1C+Y3{mOsvzVDO zB2+3}6O~Fv8I?+(h)Ne1&xq$`P#ULLOMjKk7vymo{PxO2f&d}uzkduQ4OazaKuTa^ zP+Pt&K*2v}94C#xI>Um9|`ItFU#vBy&UNQ7(rTog<%wC(V~{ zI$yr&eCek1$ar`y%4pE0+Nc@`(Yu#H^iqb>TS`hyMIO4m;VO7$kUe0r(hs7@Z=g(R!mEGFrzWc7*?*xNu?;MXu$8H=dVfoRCbk~uM5X&FP)^Azc?~$?m)@WDK ziqL#(#@%rA%9HVdjyA8mT}Jc!VBa(bd+(=ZG~cDnv@J*To#z6m`-Xi%PJa{4K|G*} zRzmuU&U*>gqrN*TSJ4$bIfMaVrRB)JqUXLQxUY<|FTEtY zp;{Zwj@C{;u(29Xot!E>v?)}#_o+jpw@u_i{PVv2RJLQH#bJdmZg5p+8+c>we^ccSmm(*}ZcGc(p>x4VmvVZ%*vGtGL&;#A7`=986_&%9fy`!OZTW5d~<+t+ghPL_6 zN`&7iBm71g;Tsg@eZw`G_YGy{eZyyB-Y4G49C~(U|JMqs=7Y~3MBC@Wz1y0*c6Rzg zz1v#Q7MJdL^NWKWcfJ1L9dCYNFn8A*Cl7yRD%`yH;R(2nwCsHtk$-?J{1v=IhA?l5 zNgU1k3TM6YtZ&Czhr)7wB>dD^5 zPoC@*;Z3Ub>MQr5g>p)3)c?3^W4*~z(;2T{n~iBuA{I`n`SkE~YW;0v5nIRM&0h!a z82KLFX4KFSGn!pii~dJlGvk9^UrV)Q_t_9Vf9M-%mGg>91wsu&-m7XnwM-c~h&))3vAP5OVBDtzKPd*pT%J(R`wQV|NWA zQ9upKHNplRK7TSAais=pTBq_=Ali4sSPf^fu{yzS@>n4ZvnBH3Xm{L78>}Xy)j-*j z-4Rb?M6&s8l));nW=_w#943F~w)VPpoi$p5ism;#s}3%FMBGhQL4cYp&FUt->^xXF z7osREF%O=}v0})c{D7?Sa-7z?Q>zeRCyW_{*O!6y9DfIqdh&xpMNz3-76SHi7RM)} zbr&wf^}CJiS5zjqh%%3h)zCepkY5Xq(I$7y<%_%YUxDUC7azxqkAl6R#p`>o97y#Z z#W^LTL$Vm~QO2N^?e5AE6=%gquKYrI8~g7G2_m7q@-8J5&;0ZvW^Yz7d(%ANa=PmF zUqPj)-X0-U=+no*b-4mp@U;Kx{}T5l;Bge!x?SDVv-M0bmo;<(Vt-BvH=)gQ_-J;% zKMPV1o~hnNV^Waznwood-#eZf?}}^XGCW_XLyaTZ&UHoCEjL-iJ{ND&Tdm;FIB}s$ zqbl#xnye1psyjC3!T7QpS0x!iuVw_R$eUP&U=-YiMe%hjDVj37z*Bac5-So!Rr$|= z0wA{N637!fPX8yh6@Sh~87LP^6+Le=N~9#|dG$q+(9|DKBt+WfD*8`~lf}PDQ$Bo6 z;Y?MpDN%pCBvlvvCrjcaUHsdO6p-3nu(CHe2Uu^-Y`lF{|E|#{A1}mux81(h+0hi^ zm84v)RD05$(czs7e59eFYaq0I|B}!{>$f;sn{x(d!{S_SIDcaXV+(Is+u&(mdC5%! z3vRpalF^ic;dqM~xf823>ZbMkx-~{Yom{eKWYrdxU}QJmxY+A$9!7kWJpBpIlvabN zJ0wKiiV(E`XWS#^P0ZG_AZm0EB5HK=Y=}BH7g4jw>#2O1j5Yc!o{&I;@1Hs;2s)m; zLnAOW^=qBoVt>+n`Ur!a-*|@F7WDg(EeC0M2Vq}sgMEn*^hL^0-@D`pS z4N#bOqNEAxLT76}6_(SnH!YJ3-jq5Q2k|_Mnpa<~ZHc{EvP{wo-kj;%YDmt{)b&b^ z4mA)_>fdekTkqMh;m!>yS8;6zgoBp&s+-rYykS|)oPQf_F2X_Z>bgytfs{#~TGYH@ zncLE^va@q(8p5{m)~*$K17JF~4}=3f>sk^ceMOh0u{c;)*fnW?L@#3#ypu#iq!9Symh-CYQq>~(nO;Sxy6g^-dtqPG$m<**B`O2x+s zo+sLIDu20%!C-CrIMI3B?^h%yttS=pXN?;5uMyTwG<{t1cOEa{>I&;gDP!ST5Tg9t z>faiKScz4-kPUTh``V`Ha9fC#%aLaRtqL{{r3Nn^2$04?XMbSDwWFcbx;xhO?i_6i z2tRN(j%0E}Im?F4&eoP3+1UQY>n~fL*YG@}=6`frU7U7&?SA+cmnZ{Kwr z8eJ{Rc73US!xxwMe9gnL)st!tWhEGaG+G2{gghbJs=l@LkgeRLp{UZ>P70HOzt4k* zMU7xPSrR7cz~9e;3}xp;hKr)B?%Lk7W2`kOL?-UKXzN#3Mjo-&FRpJNN$PC*#SQI? z6Mq7!yXuuY7QrR--mi^c{j)n34cz?7S9agGG0|4Kdl@{&TT6Gtk$fv0D0L7WCWu8< z{jcsxryvk!MucnM;uF5j5#eJAZz11+X)N@Rdw^K#N!p@->y>u z&Sese97BJXl2K?Io8r^mP}rS5O>Bm5m4kgDr+%SAdEGWZJe7cWN>06|Lj2qW=daS4 z_FPca(tQt0{=`XH?7}2}0!sea|AFMM=;=x#cR(e2OlFs;D7P~_#K^Qt4z8B#oFa8D zPtwUA&>G59WcdLw0iqtyYt>X4uhE>{taq4=8gc_8moa$)Q324GgLwiif7R<%8o{dP zj9P^rinH9}Lb5kyk;y2d8%#m54*`K-)2Yh;Ns_UVOAtS-m&PuV`m6B#7K2isdK_uv zpwuUd9HA-vA(5biA47?MyebsVkoIDt@CPXEX)*XQmh{JGrNIU0&m=E6Dd6SISdXu{ zeciw%O9~#24|Q$5V|}2fe<7%m(*QDuLgCIYs2kp~z)eMO8lSjoq5FIK_&|Nn?x?MP zalT>r&qt(*j;f@Og42YK@S|)$ zcCE{d}=8b*PVE3y0_>C!hOZPB%zkO(^=*%W# zrr?IM!Pl5cSxw1Ivv+Xuh$O4pBz_;SXn(RO4-!kXB5r3xf1Q8udKHD^b(7}fidts> z?Hi2}r+Yj9fK9lCBX1! z`G7vxJ`i2If5;PGecSqhOU4?!?1MH)uflNJASMS3y*tUq&YQ2>GoDegyw>8-TR6xi z+NSl__8<_Oe;C`{+kXI$gPk{|H$urBXc-R2`?Gdm)u)g4AXrEYRCvcgCLx=+$mC;$ z+ByZ)k8z`ZcsNXtOY(dAw-x^u4&8mc#El})5A|auSm9jy9mAP!eeL_6yXNYnmls<1 zJ-h!Jczi6nXwSg%D+WA~k==vKt{C)?2k!pp;M(yYf1LczJtq&X9e?QLeVRL7InX@x z<>z)u$Esf+Z6Z8GwlYpVq)=3u1V+Qym*BMO$ZC5GOYJ8#Oc592JxKo)cRr0u)_WI2 zX`Ch4XHPclNGv7+8dVD-)@QtyNh%g1WfJM-sQu2&qB~SRVl6-8AlILv3S@8m#`jB+3lv$DoY%U~<*Uy;^k)Ceynw!Ba) zEmN`5{uQfzMv;}4nPJ?2w&rJVHv4K#I!@>D zf7_B{dls~ATinQybY`sL&-aN;M(Z*Do1Dde+A9A0#}Om8&_Y5>xNeDh={X_bk&2a; zDKe#++Gj(7XXau&)2mOW*S%6n3anF}IzHpo_X@4|B%Hpqi+x2DkyqaYcmNy-yUOoY zQ<(sdoOhT+m)$0iA1k%0ieuleJmg!ae+DHk+fK6(2VP#){Y=l`z|4&9r>>|XKncKhEP;>9EcA>m&(4e}2Q4 z;95cQ^dsfe3q-}I?GtJrq+1ONS}LKSJVHTm4HQ3OUCb;!3kpK_APPb^|0zCgvT2>e zW?}#Fa}AHS{UHOQO06!8J&d!_Vl(_lC2#RVe=Oujkr_wzSV?iVPW^xAqeVVFdhotmwzM$3LFcd< zY`h%%Xg#osw<8~|o0ni8t?&MB_x>MNe6(8Dcll?1v>+u!A_f>t3Cp~I?15@P_Be`k z9YFTdIH(D=jw0^sQH(`BGH2?|={@x1jVbv_Dd9| zj{j{X*dIR|Ry$S`)!kMFDS^)dDX_C-H5f%ky_~%GhX<~FU^12}{qWkW;PHp7E!HxW zTDrZ}=+ijMX@h0tmS;;}ef8QVf7hBz zZzQVzW)Q0+fCl%1-|qx|2k_g0&j$Qf;I{x2qRs>&xOa3YYF`Rx=RqBk0t6yH5s~5+ zM%tNMm3F%c$@v9Lf|L=poc`kYX zShlTEXU~u1aB57XX(4H)xp~D!%@xph)6QdL_WYnP>{aTV2F}Q{2EWgYLElxkAG#ES zKCG8HLEVW^ACNMzu7rfPBSNnPX(c3O8cjQCOzkwHb{Yztf1(<}!eO%rhKeXkXb@Hj zq9~y=NKwgGWBB8iHq_E44F?D#<418=Oq_ia%`U9O_N+9LSk&Dj>^!y^Wt3?yq7O+n z13}tlD&Tnu6@Csdii2t5IHhmQ_+DUaijs%u8fX13P{5*Qz)+tTs?J6){r}=qNXnIB z)6$*ATAxC2f9Z-t?2Ao-1PI1Ks1Eo;z#jwwI|$l<4^N)~GYFVK&*W(0fm!f80d&M1r3>{b|twYw&2;^N6Q- z6lj=7LXbzXSovp2fd~<@%1kPQ0<4}09r`C+sab2TPKm5*fvlqU@*YORj^ zc|ZSLuuZ*=TqE_BOdY5GL&Iy~+D@$izobQHozs(c@^=b`@^>m=`w~yE{FiE=VF00r z(<~yJe?FzdIhRrX>7V+@Kb>l-ct@U~z7A1i8}VsnP7O7HP(4okLg6{}iOQ7PAkCW% z$fJpY!>ZwFNYgOvq%bTEqcIp}7iD&4_GhS!19gN0b%X;CZU-9Nj;F~iLBLOuPy@;? zNEF2>lm-T6<-%?}5TVyKedIaY|nORp9 zO5UcAP_|-*!k=~q;K!*sJ`3}Iwtr?l_j&v?!F^l*`_3l8VrDghRVN?^O^4Opy=BHh za~AAN4w?`zOCFjKFTaAZ{zf>`4q>eSVP#Gh;2f(KJYb=~S1`i+(jpy<+<@o0F;rNGjjDpzHU-&cO z@x7z44K}&XS%y?#3GTs|6bZT-5L(jDe&)&~I1{VlT#k*jnpIU?8)sK0oJv}mgyGUm zR7oon$-5C!`d^5OMezi6zqB&>BOW1QeFJRgaXkZ2&zFhnylQ2B;8OXYOB)F zab+4bru);|(o`%Bpjd=!R)S^S1Tx^`iLByc_&6E~%}Dmarp*Y?=}_~_nn8VYZ}Vg` z;-td4cZ?9-n*25WM^Yi9Au{|HEb)=Ag3IXN!xFhv zGDyBgZozdFQb`AFwE~wYrIG;oe=2#DeixRgq>>(3XN>+eEKy4(6R_l7+=`J(GO*P; zTt_qWUDM<}xQ4qgY;1U*<;PqXE{581{d+5zXM?8X~BR+xP5J#OPg5Mza0!~xZ530f0@Q8@J3N!zMdK` z)|-nRug3XECtu!f@Qzvvh0sW+&={4#+ht;2REC{p{l4mql%FzrVNO@9p&$ zDAqCs{#vvW>_D_=wDv_0{lfX4lXN=i)QcT^jX#zc3JcL#<5~RdYy-~dp%UTbourY1 z%{o#qmOA#*#vjixf1D&QYqD3$=LX5Bg|GEB9#b>eRgR!5xgxe`cW-iGgBxj2)f%}w z(-m2`#GLL+^(oazL8w;rj4p3!^|z-zaJfiQw5G1PFW9!a)iFF2?#bB=h2<@7jUXr( z&ZINib$X$t!I5e-P{2#FLaGU4$bra5_aE zCA;U@)WO45o$7VSrfyEWD5171)K{I)rf%jFFP3Vm&I5QCw)cg~l1DY#%auGH1oHAD zD54cfE9Afdd)9BUhb@}R8Mb_!{Nz)U)&9+DT*Pn6;QMhFgI*1ulq*m`2`K!+Zt$%# z)?xHOt{EfSmk@~p4S#Y3>-`bhAN_Xx3+5MRS!7-aP_fDTBeX9%UjKF}{sr3nVl7L= zQlD>cixeq*qw5*N}Ul5ne~aHwGl{P1zL*{n`ZfkQuH`Um8hmUca;>Kr1yGqd_oeX_rEZ z0yY72mwAc;9w#{pB+9pHHI!T}2mb{T8BNMqgHFes`i4>^hm*cmN$zpk;k2)$wU@q% z0vaND0#SdQCllCX5ksxv zK;Fyu4=wBm7hP;h4`dc>j0llr!BaPwcUsdu(f0NJ$a>I~s7pnRKDU!El%KPNd|pv> z`$MK&I*D^lKx*9psfCd0rs_@!$mz)Utr?yajUle+Dw3y*p!g`-0R2MUUi0&cxr|=p z{U&;gk5)E6zfh{%t1y2*Us9aG=dyWfjQJY%1XUZZ`38+EH?nik*^ux+AWkwGxg}Bv zG%U_pDLxp_I(pZ)*aH1K&_J->LFxl}e<1JUSWhk(TllNgn|D+?B5C~NoRe4+_brEvNKax;JV^;x`C)aePQnzsPU zA#T45^2j0jI!FS@7V!07eB(t4ZyhRe`)S{y61;h#`yj~8-rkG)`nPtvJlz-dFS@8` zy@mJG2YfjXFZ%M{aMq;-3x@Ws$Rw6sGtz(c%3S@#mHmZfO%8iuw9q|~H#nO|;n+-{ z{sdf4-U;zmp^6Z<4i(iHaD8guPal|rwu)+(*o*=k7PJQ5VBu&1g_kB#sPDfID=3v- zfaPkJ3XK9L0j-xYjRGNmt-8EX1DDsNoMCPZWSrXigf}0uLS#=UL*Wq1(*2SKmr^*W6$(;e}&cMZiyFzSj!-r?c9E+)QO zMO{aV+P)!wUz5MD&)-C$=-O`<-GnBekMJpwdZ^140g(q?9H)1Gx#+I_EYc|gt3iY)j1Tus zwt3rg0Zy(`Decjgs4wl}bfLDoPKCM>X1rs;K(5h|i#X&m2-N^ZtNry|!NQUTo9N3r z!>tkHQ}KbEL&XYzyv1tMaRSR*JOW&IYf)YtPGn@+xG%|zoJ=FK45v}5ML}zcwFVrS zh(jr}hOx)4{`4ngD|stMW1D8t*iBV5w!f$|hHS0`NIWE=tbOVOvw&>ZS-l(|D%I4z zKtv|iATkOevQ`ZY}ZUIUpz{%49c`ONl^MLnp0*8Z7%`DasB`!3`Ofa169 zl4;B$Z`AQ>uwACW7>zFPF@c`)F9|GlKOjZ&i$tN~oBR}!2K$ft>-=>Z+Z5z1Goe9a z<^c%5dB`7+My_!gf$^c4=?TrzyXIjtXYPR#tRG1}T8dml8y}jPug)8-CO8M08RsA~ zF*3NV80|=Z*x@8V%jGIluqp1!1&w|EiAIyaiFz=`Xf>Mh{}hva%jypAYq`-TmrAQu zo6IO&r-svLZBa)ir{wg&Dhg(Y!*(+OmQ){sHN+o~aX6xoJg@j?2eJrytBpoA?Cggj z4b*5L-5(Cqyc@V5fKzk<6n=0v8qCP4aO(aNtk1cBA3$?<;RolS!Hn+WtavO~3&-N# z&b2Mw>slSo{wpW+v7keP96iwbC1Oo?MMdBmj<+t!nr=?ExH{~dGj4apZ0wuy(e^-W zWKaLljT;(i1;YqDuh+;F3b`}a6V&Nl`GIu4QRFfMNt4bUlk&Tf9OQ`E{R!Z835UH^ z)DQ-Ln%4o{qvmTE#PX2Y=U_3~tb4SC@6EwtR5c=RZM0mDB$f0@iV;x+2A-4wy8JSu z)iU6yN~@sAQLSFksmmHP{i7J4;I!4@G*gTLqObt$vkdGrpyU3U4m=0)f(5ltrda!X` z2c8pqp|lrsHYU`Q8F%NV?#}f^w;@>YIa6Mpt$9sX$3(%}*U~fk-x4iN>3oYmVB-be zoAsK)E{-B$C`Y(VTWr8kolq!pDS)r8!Wbe z@++d#YGpNMn@cA+O>7Kuxerpz4Jj6dTwYmoSYlJ);NyfwL*x;cn>;QENbk4XX~gUg z2C<-ca6XgQVljKu{UumEn0l}jghJuL`8*EJ6bExPc4y?yaJ%}hSZ>cn?HUwG)a*;T z;~i0vbPv_Fj%H2Xj&wBg(L{y$8yg0adchsD+9D3EfV6Z4@(OY@?I0C0J)8o*2usrB0dPHSfh7j1q=oz< zX~lJnQb`=P;&F*dD$$b%$W~epOUzP92G$X2$nr9aRFZ-vaoox(m3Uz*4%e}NNhJYT zCxh$Qr4k=3apMvPDj~q>!PEav?V$67p^Cx8(=Xr+-J-PPTT~(o1`mdmI3zYm#&Al$ zn!wnK83lDU>b#d`^>)3{#!yP?2AVb43^H!1rTY8Dd3Zdo^<%IR~ZtM_bL*qzIDFWj`}>XO2~e3jWgaOvg4mv>&duXkT& z`;Ph@)|G3VYjjJ-jpJlfo4idOO|Xf}_w86S-j+zTjj!3U?{Y;t15gXy=V^i1zVN^Ie8nJ+!~8iP5sxCDH=oO9Lef!`jwb0?y|0*eJ^wiW zzQQ>7Ixz3^tKUbxk;>&#ccDKgv*~QwkIu>s8Tj)^Hl5CrOVHm_R#ZS{(GX;;BN;JHjPf2t3j z-JMG1NVim}tbix)qh`O#rE&>)0!NFTJVw4j|A~|$IDW5@d;)^G zX!}Ur@U;`!-0JIwJN86*70alsjCHIfInw00WV53o8PMHQzP5J5 zw@Qs(AM4e_HMoL*cL(~m_uPDwO3N$M8e@ePuaO^MT0DtqaT}yn2mO1(CS_uwwJ7JK zMz13D^t4th$fnIxv@;Zh0t##r)bxW6*Ly~f!&`ob6-AbO$)$IHz-L}bPLsc({{o-+X{kSt zk;icVK2yC0W_He$aK1`FJ;Ok}Z zdN02IDZZA$>m~U5Wqj==UzH{4Kf>#uVGEqZZ-kc!R8Z2B{%M~>qtQ7aQI6ntB{A(evNY<4k# z25M3T7tcngpeDyAe@_05UI;zaV z5pwAnDtm0wt8&Qv*dIPaCG+0QDfw;DmuXp0_e;)VWvBX9x4Kzt*y1jP&ChCWb-rb5 z-C?UXq4U>=x9)Ssydrhj-&}&6@f%T zThwS&5tU_Hizr~7mR~Aa!r-eTM_#Q3jtTUha_e+S ze(7A@Ozrz&xZ@M>+kB9l{MzORS(}Z0eDd~n>Dz2g%eyBgI+is%Wm_h{xj6$L`#H0i z-*oG$hK+sUQ@{20Y{l~!9v4vN;{DY}>~_vP1y(*rggG>yX>xxwtu{}4y-D?SBbv+f z)5)0xmgpBxW>rU34p#P>r%SMUqk6i88XQgPr%TD%yp`y;M$1&2t~2ke(W;_k;j-HG z4EY^k>p5KT;eW(vTm-V}v>Q&jn;e}U4BePeJY!S`;QY`n2;+vaXIn~lwlZDV5Fcw=K@ zn;YBq#;y`gHXfR8ODh`If~hrj{?EX$y)sX)X?>si>HLB*C{N z!Oynt-zoUr(8`@i(&53EA?D3>HdiBqrX;7}qcgE8k%?X?V}C3GB<@n`c1U+h=NG`S zK_g)$6IW#=0OeUpdazf=?)#b3h;Y&t^}VG~>fcMg5KRm#*7l%3Iu}4VzN}GK3gNR% z0(r{^sc$x~-fqE^6%xp^>fl-6^dwXqVmrR(j6`Uc{4v|iK|#_DVA0_t`XZYQW9KUU zd_y+HvL*mXas#sl#%9 zSA&{;&!*jqJPp|TgLWx3X%GxRxe~N9S6!NC)I)@b6rK9eritn{2E0dx=%iOjofabm zhCFD#oBR`U0GeDLj*0y-KEI8riAOuDr09xrlNEojLXx4&B!q-E>&(F~fF}i8#{au& zA&&ty85+{7Lw^#)wr@giI_m4J124e{OrSDHgWEp|%*ml#?O5(&X+o#7NWifK8H?(c6Y4e1bGz^#+JO$K=8RAft%H8w3| zROi1~ZU%f+M8yEox;wI%c50u7;>(V;YF2Tnm&L_EZ{DhzDVQ&e1{MB6(_SZ%wpQ(N zUW@cHcCIS@0GPPBIi+VNAzNV8bPypwJNm~2B%xK%_;aX4W?bIn==9KYVwyX?Bffn4 z4_Ji#!>l1@U{U360SBol7|9&EMe1jhm`7=ea%T3tA#=sH^B8^qfXsQz+9z|O<)k#y zgSa954@XHY{?Q++WfAk*a`w*e2keU7*qn69Z)%V$1Ljw!FsAI+X+IMYD;^@JNq)4X zVv|F(KexGI8kd_Ixzl2IxZ6)`dO=Q7d%d6sGO92~P19BP26_oBR8Bkb zKB_6D33{1PWpAbqW-K_QX8g1Vza#APH?( z25DTx=%f>r*VJIcH`&$bU+FX$au?rE2`lJ0naKr(RR%yNSUAQqx+W&z%AMr7&QBT| z?$v^VK!4V6Bh65)e7U~@y*~a)$Jgw2Hyj+JuiCj155|e`9@bG`N5@7FABVqTT$eMm zARQ)@P2rnX`ed5N<^KA!bE{b`a_aXHO$sY~wyYF2p-<~XTc?NC{qXma+LtvY`~J89 zzjH*ps68?9iD_Qd@Ar+Mt=HLs=lKfn-U%JeCurQL(B1{zqe$zrMP)rijf^f6t&CN_z zH33ak7e02OKXUX#vMHO82dw+^MNaDT65MR7l#X_Jv~HS{ScSjoF={e)u%$!l9_P_* ze&)8?-H0@E4p=RkE!HBIuG9}Ux}@MgH`*?#)``Y3Ez*sodyG`>=tIauIMdD84P1Bs+34 znp7B>Dsa8l7kD^JLQAShx?(*=6i78IHEB*u^w2& za-@UKVYEGF%Y@o&Q)ZRxPWbJOD8ir zH1chDVocrv*L;Om+I(=~cXS-af(vQozuAmbgD}{8;w(BJK7Ppo@hb1UEC;2XgRT?NEUcr0mznVCJUzZdRe)CbcqH zAQJB)0vHObbCyItrgEk&t8{Zy78pW5w<%BFe!Ev{P<~o2BbeI7wN*))Q})aJ9bYtv z-zBrOiH4|$ZTj@c&w6BD^THySA@%Q>&eIaX2&7%9yr=4-OnBMSSCs6J;SG4edkfH8 zo@p2=WsRG%vo@S1tWzbaQpe#re&TNi*c@!Ui%QC13C1StD&3$5)2=ikSfxT=3Bg?H zf*+(_*A;TA-^?hM=7L$;#Bxzd`Q-iEd$+t*qi^yO9$gB~N;@ML`jO1naZxQ?`JbtUV$mnLu1Jw8u28^vE`KYBLXjP}zNALhD|j%*urMT$IM73>{) z3n?yYBwkZKKIgEtr2&Rk*K`(GTWV&0UaZe7Xqv8>(aAMbWLhsN*x;3IE=`t)UO+4= zEFo2{lR5cO;hEu>igX^m5mr=@M|5$okWKl*@`v4dPDN7&$pcWbG~Oi~tAZgC6`+M~K&s{1x@tIlr>1Lwdx~BS?cHA(z_Mf@UJ}EEM3sZqpQwSWw z8#do%65l5wNgw0OtPD+axtmPFGtZ3Lx{L?c#tYk#EE(ELcIyiIsRE3}47y`@GVi=DNAqR#OnAVCma2+?t94r4(~~@Amc#=-;KsU*n^$>hKtVl#X`L~*0dm6qj|A^`;PE`W$7Yw#3TX6l}%amrj`z4lM2F_W94A< ze}Bo&8W&zyY6T`pl!hb9G7wIJ8%>VrpVN_NSJ`yeOC~2K<};V(W)%|@8pdO7Xt)b# zjc8$<6(JCt^YvOd)ok~~dbC`STz8 zbprG+s`GOeN@j}=52J5y8UOJS;l?hm`1m1C_6|}Yos&A&Fo-A4$j*U0f1U7+V{Nuc^KHJbrv8fM! zP;<4D;Vq6W`lq&MdsaSHaPzRhjjUW8!wo#B-W?RC9*6(bLU6tvEDLVJREr@zuUQkG zEle49f^qZ9i%oGpFa1tCR8==`b22VxzNuu(k!&mKbC^593VjMr7sZxkh*Spjr{m`- ze_QLGx~DwXg?6wxOK0HkQXZWSZW#0vyEXJ+Y@;M!w7c7^mkCHVFZnjglH)xJaRsPA zsE+1^$8|BnOa2Q+GB&QztxqSa_^X&CrW&q!);dn#=t{Zf_G_)>^Fxf^&B<-Ahv3q5 zcjC`qSm}_1MvWCsEvq{7#vWbENf#~dvt@2pu9epE{$tJt6%%}JF3fTDA5!KU?Zy!; z?P;5}_oHQvNYD#qE5RN+50lbwy3PPsu~4IdAoHmjbHdv(NX^r*vvV*Q^W(|OS=K;x z@YOF$H9&Qk*61iG3EKH>K23H*U1xM6{kHudzh>LRd?Ek5S3rxAWp1On=bcN@oX3Z3 zQ*eSf-$I{zVTHF^11U97KCaEX2Je+>Swhs1T07FX=lc=2S!(GR&)q|AF#_1Bj(a7w zK+(*Np#?P--TtBRzhq<_o^*nj9Imu~NJh^dW;3ee6B-nmUL;EKk;qHw{9a0PrjZ6; zVZT71&!WD)E^z2V_M*iou<<5mQ+YdnyVgFI&d74j`$Su$`Ht0`A+EN)aB~76$mW;dCcRx(NUQHe9q;OpeZVwK8>AlON z@jz*DqgSwLD}7BEL8C5)Zt>_Sc_;&D!%c{jVHbzVzg+L8nys(wrygqBl4VQgRblSE zu(GB?Jxw9)Q`6q$6^tnAoKXHb$GT2PE%;z~GsUIHrB4HX_PcGxFasJw3bi+R*BtSn zzbE~!nO>AsEqbzv?b})-F=2apk>7i4LoWyOueUsC$LPJ#jjTH`-kkcb@V{S8@My5M z*pgkOZ!o4YXfQb^@{I(KPNWq7K4Jt4Uz4Jo)rC|PlEWmze6O>lP3I#ssRKP@NcGj7@>HN2~wv{|pNcA9F`&n(0=WUL(U zU-P|pDlPM_KJ`rOW?r~>7pyJ`LS~!}XX)K>-FKSt+q5k_901-Z4{xm(nP(5TO&9VP zZU$S@0eX0RthZYe3z_Tp9G+Pn{kq1s)mmrX_XaOQZL8-I3E06=ebiEAQbhQSxPtst zxSnom-$83_Ct7X%t{*lru9KFoV;fh!8&@4w=Q_(T6GA507J4h4?cS6xW%rz?b>~{& zMzR}EKkhw8P=Na`2mKFl?x7SEud{9h*v_bZSXf=&OS&O^659Z6aff07+V9_G6yxI) zKWvTB_Qv@)2`;*?8KtYp{r7hQt#`7+c*-+39n(aFG7<|YDE~|Nnwn8h_P;m!uc{H^ zZnO5x)FD*apb^k$&{x4Gyc!AN%aR#(7kKZe5L1@-hG>ajgSgz&sVgeAkflh~g<+T1b4f%t=eiWY3sxd@u>i_$WsQKo^l43#=^ocP%oBLqoV z3Y3YMi5rh)@~Si9)9-X;y^oFZs!B&1l42UQ-0=mXnm3*wa)2)syT%lSfxhV)*Q6cx{YmEW5esV(qACbUB$k|HdV4FP( zHz%Ye8GYL&JF^+Z{l2{?+{+wfYv@MAw#Z^%P7thAa5p?5P82Npm4rRd)(2uF2_BjN zaytz3;|e5uo(AC*%D6L>g`+&Yp>QDBCRAdFdND~Obnl7Z0d0?>(E`g~iG^DO3Au_= zlq$4dpIU<|Suq4)XP2Q-A8zsl%WJzQHIz{-6_Ye|p7avaoq?o2qI|4dK-VhUH>VIq z)8w+uXacoSSr)SXH<%%54e3hq;rT#?sB9`$YD9gyUnjD5UpOc5<_q*nJ{W&;6U}su|_E zjn(zt^ZE}PkY@j3#AYLGwo@=-mzadG=w1d}*Bir$5Rl_FFQMYsTx~r)lylZ zLHzdGBPJ;5YoKt?i6&J(&RA2@m!Zn7UM?hrHk7bmwUxZB6CO|$5fcXdy!Ha?Lne+8 ziYU<>%+a)HPm8NWQKbAoaBrD^>0d^fQuDobHb%vu)VAI9z7zb-a-=#MFX%ZyH z^i(0g@3ePeuNt+-g-btX%(*(|%_7ZAnmnx$~yGYR9h z8cf@qjoGnbiO|zQ8*Sd@u-i7RHyLAs%?)9Fys6I7xkqTj^2K=6Q1yPoHQTly@Y(E4>tlGs)y;*NQY$#U(Qj_10N^|C z3qPE<%LA0T=UjSh%m*2{%yvm5R<=iQ=J;|O&^2Xp2dnKmFgz`=H$pseH)%F@hX`EU zh;4m_Tv*abNPD*Ri;Dht*}Plm7n*@ zp7wTB;9S`ylCGzANO;C&8b$T+{2cNaYPxC-t@vmAmD~vH>bYd==;X}5!e)R=)B{Oc zt#f${H6weMTg|p|+tm#u3IIdGxc~THdwhit;XGeEWj2oQu3aYQoOt?I*GXM3t5;P@ zkDPx&tvkNzcc0<&qS@WNMU%fTvs~Q$atF1~67!%*kh^g>Iy36|`^;syxAozt{_`Xh zD&BO5(|mcI73y?dz!W=)l=T#A=l03^4#e6%$hry2g`{As1`7G81Bob4rU7!?F>{%Y zQ0uQ1ll#cm+veO|eO$c{o*)~&q4lwlHJc4V_HCDzHKvUpn(it5dx#zoa74O&`^fwP z8}fxkecN}kP=aP(VTE@?a1BU9n4xB&1QT8h4FB#5_D;I@&U%@-kexc;hzc{DUj6h_ zo#H&r@{1F~Rwp@h1YpIJ+8M`wwGwVG=kWK#u7mZV2Xlf**@@HbeGM|B!4juq(==?C z(0ysW>)6jpZV_Ah-273A;huX4zv-l|pWIDe&FbrHu%p}Y^tBHThQ!r~vg1IIcvmL_ z@!6NM+hN0E1`l31FrLUQ^zPs$=Aqg4kI|PC8ke-w_{)>9fGRQ{*pDf6F)C+%lgDK+ zO&Al9e{$@B%^}e}WKJJkd;l!USZo8TCulaBgytWz>dLxOovt*COa+zw5tT&fA47ez zy@1w{apx~ZD6oV~50T_CsY?rXUt?;56QMv$ww8A*L1s2R@ve zU^ju&pZLw)Z0HEWZBsvSgMQVyNW^WZ#o%?5jr}jszCHc9emS3vV7U?aK78F#=Ub7~ z1i?pC(fZ0{#68O0_<%fvY#!#=q9H6%vq`1_d)}@AfMY3T*bm(;>^Mtw-~=*ptS%Wm zxSfD_GF*A_?oMR;#TypUe9qGMr=HRPN*lH-q*pX10aZ~O>_#3`j;ob323am5`$@kb z5x09i`PA+cLO<<+{F_c6s~h|wx7UgX%)EPFibWyYyoo#3OyQesx6TY9Z-2n9(F*(a zmwc!~02e%o`i(|2cErDQ{56mqs`c4>TA7qDS9Z&PRgL$6ckfHH4v}=BFpo-*h04^8 z9BFA{fF~=X9b%5T4G7vcv7?jz_E1_-SIk7{5LR}G1NDdQkk!&!m^)e;8>x~7hxiaL zDS?15zU)@8f{0ahsx%eGvowlu*MiKY++44Zqm5DvPiYF+rU(;NpB;xL{C{lK!OA7`M?g{gudL=?^5i=VOn} z248i^rVaif$Bj-~$f-_W-)l~0(;3cjle1`sbxuA|Kg%`M*VNbSC%Uv`jT**sG(-}E zg;7-NjFdm@;7+5e(2de9wOx2~9d@Tl-dKgQSsq8C=$WPT8p0V%zxNRa{mX_H*(0>N_DK zM=s@Bz$D(s9Zu;qZZomlHmn8%aiXnf0W5-S)R8#-3Ej zdw`QTp}0W*uk}~3e>rFN`_Q_q z1Fryy87oN*8B$IKvt{7EDoUOeYLXVah*6#8VRh3Nm++TJlXwEi4u>gb!L70+z#faK|yBjuG}H@6HOVecgCBH0D}hqJd}Z>Sw1KEW=tKb(9!B>-O^ zzs~Hl{0|Ue;bgV$vpv^0;ZdJ>pDgdh&VC^|AmR=4&G4S{S$JvjsA-uS=Sy2J&XOMc z(`kk*^b3BG-y7rXKCigGvA?yac`{{f46V8?Z0#7z9 zh#7q11DfZ9d&?_>C=a*06cXg_>qx$31qLGYbp*3J2L~~AyJK$&!1D+r@(9E5eBjyn z5ZMVq@^m5dFhfqHfE}=Hc|qFg8t9o~__9OV*?@uOeI4<)P9Z^UV8sY_8_+z$U_3DZ zWTjFVPhmO_D9v{kipLVX7}E|JV&cvB26amS;=k)g;>m^rO@dF@f=ygNPAq~Q@H=ra z7Tz3jbPq#L&~-1v^Spo`=yo4Lf?OddK8u532e{o&6kA?ccDhji8Amq6gdW61`6)GS zvUj3FT@1U%X;rsv6>F4l9V}oM4IxLp)&6M^0*^2R4>1fV8xo`k13CadFz$ZpkN{n0 z|J@#g02%w%5!tywPAGJTO+=D*w}9~wLxBKTo-PQUE^sv;FdhJzr)t_m6Pkw|+>~~! z0vdGYJB!RS2yO~y=L2V#1pz{VoallEO@d9__}*mr-fV&$FoPWcbSK7UTtfvA6AaxN zpW_EJrQRZd0ipZeFm>aCgNnfpiop*)Z4m~W82xMmelQ7skm2jd__f%97ughfs{#x( z?(4|EwT8g+sUH^nAm8_fVaw~QT_-%xW|4M12p*Cvcmt7V70gt4YYl}51!AHt_tw<+ zhP3+$YD*ge0Bu5nK9Cu!AVxN)e<1$Q`+a+hAWO75X!TnpwCg(y$@6K% zr+dI{d7;_8L;k0ScR0Iu_|FLjM$49)mbCA!7se`Ukxu!osQ3DFB|t03JfAL8hh}F3 z3!3#s@2Gu*+VaBt&y;LJf@~Gr%m|%fkq!W35Bx0xFi;5C!Po95gYGBV?x!Z-8;$Ny zh>sg--y7%WfXGCl;ewgsG_hyWqHv#Jb4U>Xe;fz~{NPiTX3Go0F6+}>!6pE35Nh_A z)~RJhq<7}H(Wd_B5@uuE8z=wY&SANlE`!ejeCd8d{Z9iQTsvJb(5BQ28bBQaczA1Q z983p6%gH!i@6Q&iVPRvd$xC9XM|K;sb`P6HYiS6;pfr>D(bLBb!M6riaX3kl#SolW zbz>enLObu7r6))P!s3x*j?fcGJB}b=zrzq^KVuL%UhNphc)#va|Jsqfx;8^pgjJJc zW^zyrPxy1a*HcO(=Nu{g7kJQkX5bfngd>joZl2~9l%b~$O8IV<#^JjFzxADGE__8H zrF+MMf(_-2ZTalx-)^0o-7V^?&DN3-a4=0gA+#em8U%7}q6fgu68 zx{?7^qilhBlrmBKw?<8OKAx+T_lJWfRiV+$z(6i(7PTY|BB|Z)a$o?hi2~CAH{66V zIaEf5wt%2`@uuVlg;-s+B~u-P`rTL>JA<_STAQbI5}GFI?%*$JhW7W{X6RK!`Y#qc zBagUNT`Jq~vO`*VNnnFt*2~I6g5|1??>-sG411Gcei%{mtW_t5EGRgn(uv^`P%>}Zsl_?G^G`!hL#84^$hB||gpm79 zgCcjHEb>w~C9yN``Z?c3oeW-Y;d4zQ{hQVHf(ewT>dD{_OeiEG0Sj}&FT7uHRviqn zMt(CS4ye@7DBRJT(XuD*cWDar4 zP)uT!ey{B9zgp#xUltGfX0z=J0ol_{;C9=SJ#G`TJ`EP%0D- z7)--Aohjs6Ff`@sQcCnr zWn9Kr9m&};p6ZK;qi@;nW2H$SY_lpH`DkfAZOUo}Wj`?M74Hyo_(dN{B^QCj0f0^8 zsPM49;Q=0c(Pi~her!&FTe;_|#%q}CsY>nrEa@?E5_+sYipZzD65@9H2s zmh!w@^r#R>BEIJ@HRxaOogL~$j=r1JRD2rDv87<7GKGemMu2$1K#wkm`sfDvwII9; zVoqT;lCKqsx<6J`u5=%fTq+I~kWeI$uxLS*pmSyjb#l_{Hnq7yAs1Oq{|9vH4X*BIdW{mO60<%!noR+bDMOqQPG%S*N9buUOpmgGtE>BWp z(rIh#|({B`eDz8%m|&R`MrGJat@j&jCnwCLYhfpfo3d=Q(2FE zyxCQhLZ4KrgXES-51s*2;&NaKV{zv|OObtRJ`g4~Bci;jw4q+J3)BW`o4F>u8Qn0!noS}F=lC?Od6@Z`+w8h-3U2oHA`+MvI@*n0N84*w)T2-$rJ09~9<*C; z|McxG@rGfiSxwv3OAYttyB|Y%$a3b-=H?k+&rnt`N<`qzpF{rwwE0?{P_(;XQA7-Z z+Z%+vTm!QxBlEDlJQq~W9U^$nKT~gJPxHq=zWD#jV@z;MAI9?HaCg@r@R!TG!A?m& zaGa+;P_eU%kBX^^PLPHqqNM@j`-J!Rp!`=5JLk9O-)%10qmv4vm0a?jk zwKHOAN4U70YsWoAIp$lJMFC*dCi{2qmCo)zbI>*(#-qzE{R4jHh<3xfgQXBdB;vu# zF&dAvrjnY$VK0H(hr$|hA7YhLU<5wvj1o%DPxaibDTCu#*}tVb{#k5>Pmb2Sn0VlF zui&&29MoL;2_&q9|JpK)#l=;ZI^zXHHuwZXQGva)?F!NX^KJEd@YczC)`j@jIl1X5 z$bf&%Y4sDy5_(sskX5tC=Y0m}EOg6z21iHC_hN+;ryT~po5dNug5xjTuiKB!eO*gP zr$2;S*{Ha$`;RLWxp2N@7~y#dk+TsmLr{f&#tMM~B0wBHBK$xtm@Exn3XdQSc(ku* z94ZN%By$kN*$;(a(XSddU_`MSB-@js36G=}mU;;dYK`EQ88jtTkDMiXU@uC3aFNK7 z<^Ga2uQ;7dw~Z686+NW~#7?)A!{532e#~Aqdh}^vz!3ZTr$W+P`Z4ez!Uxohij%%Kk4DV7K*9oZCnE%jWkTiwmG(%OP+exY}( zTMoj^hKc}79@2UHh0_%=*~C^egi5~Mwd2I%VOWULlY_q*{q(W{6WmW#yoptT4% zfj86$K&h<9%;$IIL0!$AZJFyWZC9p3^0zzrn5XfIA~v@E0>|fsqRmo8DIInl@;hwj zVhgt_tqbF*#OjBC-WM(ssHKgz#xBQ704-Ca{+3N+fQv+$wPu8!-T~C#Q4xo{qt_q! z?ROM~&FeoO8@eogwaEvZu~>&7BH~q>0KDs4!*7nO9$V|BWLYS>I>d<=<7KWELIyfE zS10ZL$N>T3duY&jP!|0t`moNlB*FSnhYIyM5#(_s%HR;nr0o6(JqGf<84jU}I1o37 zb*?8}W7<)`E{wWj0ypVBSa8d;FOw=vY#v^qpU!QF=xxB4mlFPhpAcX4#?ky)fxL4v z>Sn_hFk}lC3Vw7sqT;d=y26}-iku2KIK>~BqL*OW6wzVE2arucq@H?E_-bPaUnT9( z>nMFWIRi*~p}^=TW+rE%KH;}>x*6Rze-+C^cz2X4w%pwikUHp(iV(vH;yCG=mfehi zjj;K~Go*g0epzs&!7xC?0f*V#0l*mK3_HTBtE=@?MB&~;jM2fREHajJr&OR2eP20z zwv}zygo(_h7~sv@ox}}NYAV(_|MUoc>0GqF&J#DqY1;6q%)0;jiGM&GAQTTiMSDBIx9n(k-Ri5w@K~ytuBT$}t`*JlJ|DgIS8dwuxt&`FB zt^{#!WUd$@upFylW=hW7*-hw|6Z2~_XR8UY+S`kE%*&B^^QtN*j|X?(Lm6BzW6UZ| z;U_<)hqC+;n2rS*MgC0R37t99#oK;HJ+xXR{k9`l&vWY|TyQ@VViyut+6Q~;mg)p?h-d1!?oNxETVqZKFa9upAtE}v+Lv2RUSX=_WxtXn?JLsiR`}>s z`M!L{AgNYz^<)d!h8ti66WrX)WQI4z zuC!L^;)JL*3wDl7dlmDFf;PC)O-z!WIKzHUhcb38toe`v`V?`Z%oWtCO&ND4v!i+3 zVM1)*U{i`t;_nY69y48|uIzIl6NxWJ61;XEw6Cx15PqG(0DLphU8Gw$C25jf=>H~x3 zKqd!c^_Tw$z;(6-vG+S!A4{&EVS)c;Sa5Ym>%4GI8i0ERcNB%Z`D$t{+el=#6^>NJ z=FQM9>)ILU@SM}G{%}K&ziRW~ z@Ycj@LWlorXVW0O_PC4tOtg7fu97f#&DwYw|FvgY)fa2R$wGWHjY>??II0DJiVXmjmdiI_5>xrF#ec{T& zwc)!Y1oLlLr+u9S2d()1%z!C=8Fo0vUS?PLFv`o(=3Dwf%Yh#bHMcwSGws@Ark+ld zMnIfP`!+?0IS9XIYnvTY{=0C*eXMDU{64a9-kgos{R3~Z)@EXqweS3*jv@@-igEVYuL{i7DQ=X_$dh}IkmPy3 zZ0!+)t>4=Qp9p6j+v=lg?6J|ER`9bC5&)ykae2OsPhSnY*&y+ic)?qUxyh<~;Se?B z8^%;}A-<_3GEX{@^hb|Z##V0lhDcH6Jq@e%I)Y^)HL?Fr0t;e5Mq4G0`9w2MceLwmVm?J_;399O&cHjE(bLDIg^s|9}9)Z`qR5j(6E%Uq`d6V#Gx&1b!))HW+ z)sobvx+gmY32OY+^-sVGCE*a>V$!6*so3|&)a%()N2B_>sv+omBwJB?>QAtsQ9}MK zwPJ<+p@pM$dL{R2%Y<*Hezz94+XnNj{5poXgIe*dRmq$q8Na&Cm@taErjhUMMJ6p( z!)-KHh!=|0kq~ z-{OXR2wr%+r2BXu{QNJmh){+=mz^sre<#dBM!5d2ugOvG!U{?LfBRu8>>mmJ^#VJz=Fww@Kccq+azY$i1m^t|7BZxDN`Is15NS@X ziqWq5Re)CTF@?sU!_>~jPp|Qj`aNoZPG6;T;d!JBvwG?75Xg*rKkB~+_>%iS%qowz z`5oPnT6Yn`TU@MLE{t5zBBOtCQ3zEuPkSna+tql_m8Lf(pA`(#C{nx1j+I891vQez^0!PHqA@?i@DEVqKCkxghpI>c*#;=wos<;CuWd_Cs-*8=D$Mm|0R` zJ>e{Tzgck7Af#D5G;g zS2Eg3>hA2YHsOhhhfpshDOklM#Ij~JAliiB*Q)nORjE^J(CIR30(xjBC z8wkasi3Qq;TL3 z=SZ_lqo&i@HW+p>MnWh9(?LXD96I!e_b9;P=wa z!qQ}cGO6G37MZB*We+1$cAWba3-vy9@zN7D8+*3(uGusS7&F-H_~eCj(xc#C``!Gn z(=49(Ae$?e&@`I0#ZDjn8#K7>Go#05pb9kQ1HCc^M{ZIs_S3V}PaX_~WH^*7 zZ1xM#kqNqsqZ3}(bA!9s)CZo zsK5XPXp380Aow2;tQ-U(J8zJI)@Wdoy!?yVEBapUhElWEP z7{2{Tp^j`!5th@iwR0X*a|ct~!Ts^yZRGqgw@D*Ypw`%3x=djtGuUfTPN3~0nW}Su zXXeb{_4=G24I(egs>e(0OJVT&RZ-=BG`)fUv8}b(4+}Jx^JRmDVF%>GZQPD61r*^z zbk3I{y8%F;uAyC@Nim-2#ibi3N!vY9^~#tBST1L`wn%W`Yhs#2X|0GP zKhbgm2r`=|MB}J>Jv&L~p-SSWfp0y)m!BWMdI0I3x&<1ZhKvtU29U{43GS@f?6+Vl zUbsr1cg05e?uMh+5QL=_yW@i4m zeg83Eeh)b=t7=LX6KGYOITV}5Cq%|P+UT6CJE16cXy_Pr)ty{+-CJB-sk@OjJJVC% zU`k=5z;-i3g|c;JO*Ub-bk^OQ2GuhD5F}$wUx|2JlF5a1JbrFbk9R{<+cL`wK+RPN zfR^g|l@H9Wj|I}zBr97DnH61#=sCklvB6?Q#}0F2tW|u zt@W{B1IY$Uyxr>(iG@rSu4$j%dn_;$xIfF#=V84&oSxx$2`3yHK#_8P@_BcjYVIz( zBIAjck$D+_VEGEE4-h9Vr)Ju5YK2i7=2|uko!1(#q=#;f!ka@27Ebc4bCMsJhdn{VSLvpW|By0@&v-SS; z1ziF9G_RWW`Uef}O}uVXvQhaF!C1xdgL`?in~O_4rg%Q)XVEXfOdI0sEhwMKgVi!D zu_YW0GWldeJ0@X~8#{_6qpY`lSaZ|xGPy+dbIYF@61yE<`qGKj+kk`Gew;f(W;{V0 zQO`+gBBIWXjDxXbUjA^D%FS=K{2JO>B}8!iDFsg++n3YCn{A!RBFTLRw7URA+ljR> zTm2{v?WkDG8}mBAor#-iAxXet`$sC}K^RD@J6JDLXQQ9p;|Lk9kE@Fj9SN5t4;|EFziKxfR#APjhY-rSF@xHCj+i&XbQqqj% z25tgSdAhu6p@i`$>0rN(N4lVtEKb0%Xpte;!faTwT96@M?iS>j@x6p4lhWQIv z=>I*c{$is9y@zwiQM(q^rH-v;x~O<#Y)6ZT)LyZ-qq?7tQR{S7Uc z&u|wT{dam;tmfbPJ(jRotycQm2(VZ!PWsayNEOfjx-D#xD&8l$@uYXJ{W!4)>>(`) z*{bh5Ax{Wcs?X0M|66HiAJyb_$8qyS2p~yR-c5K>N;5@C0wH+9X)6LWB0FnfA{70JKxXm-gD2ne_YSeEZzJeBCU$AbA=LV9}9HY3F45EtF!eG zYv*gqnM~1ifaJ%3z{~gjv$x@S#Tw8VAd(EZ&~H7Inu`kRD>r;?noR%aSIEenR9-T8 zs;6NW{mLyjeE13Se3q(}ehyz|P_^$_;lDqlXmjtuWGdBxe+n1o2iiLl^rMW^o*Clc zhyWVmowg;1dIj`2s^iA;{Cp^2B6d6yIj=qq^M}0cP?=xT0hwgRfl(cxTt&X;KDD{}>j-K3^_65bE8iE)} zNFE;TwVlQzy`x>&K0zZ1TV|+NVyEO3HM&ZkP-fBih=K+iwp$*u!?jg#whMbtPM)+5 z`U>U5Nn5~oKwkOLJ%)dET~T8t7Bv??=H|~u3P)RH1mkGBEWrqmVcUkA%Us!Wp+;*N zRy#*gz#cqm{yfiDITs>9Cvn;nbFPf0unozm3$W3*WANC6?gdmROJ0~s8@%TM5(os9 z#xn)HpYWUWjXz-*Mn@SMnLDRG?k9LJDU~a%eX|732VVm3gya+_BqN%}7eG;*9N6Oe zIsI&>l&%^L2a_168zeE@uW6=V02>s(ttCntl((5Ml)!WCah#cu(!7>LJ?*4xr=z77 z5_*wJ-(h(y)%Av;p6RTRCv_mHcxKQkIxC~AUFouj9cZ|q?YjF2zQJ%#g3@H6lgLRd zQBF%9KU`JojOE-KV!F1-VudC(Cb)`>`F0JTkMr#xKBK`_%+Uo#OAj7Xib#A%=3gqm;+C{vUllt=c6VtQ47)4e_p zKPq0)F3#(1A?xzF<|g}cnmDVfd89vW0IspePL!utwO!Gr*1&!{c}NsD)jFbM_+c-Y zG%HG`Hxi}G{Kz~QsYF-OTRWlGRIm%A*wA=-8+Cv7fYERg{B`y9F*5^Nx81rGjOlV# zYn!qZ($I`iK1h4!Dk-U-k*>XQ*eox5=W_PMQo*yVnaYTu0OK979v3k5ktu zakb+VL))}#2rniW)RkDl@R<&$O_ns~s>IiqT2{ysn63aGQl~0Ich5y!ck%G-I_0FO z=G%WY5A31)#k?x@my00o+QSz?sPlBWe)Cm3sa7nU zYDtAsD6k0(TLhs(d{X+kRB5EjDg|R1PMNl0hx1Ks>qTgTV!P=%NRC4{YA>9CxOb78 z`eo1D8%kPRp!FH1r~1gfXoRET2$bDtV@MPGM(^mud1$CZH36j`Go9A2IEn7k(*kIc zJhb!9!#LkEudP6~vID8aSovxOmL^}NcHWjns)HG?lAyudFGKcX9D_>YOc1n4-23t1 zPrqlU-L?4Iy81XwERWaq7e4>OFHm+kyJSgBXJs(et=n*Zsi_AKDMgqVWAPxE4(ZOi5ZoXAaqKS#EbsKN4;#y+u(e33Ys+x}W#|&7w_BFW`wN=W*eqxO+!=5lyv3Kfh zUO&liEd^K3R+Q1}|0Rv5Sl?}5z zy>uMN&E$jM&fWH1P;8LPZ8HtV%YilBR58t6PHbn#wr$(CZJqml>Z|(d{6BwI_3G8t z-PP6W>bg2>4P|%@rA8K``MsRU;HZ!tAM)1tM)L(Zd7)JB6!1^}IRt=#*Iqu@1!=k689aUu1 z29@wSWx}TC(`wMlVxLk@Nxu@z3*k8S8eBm1bI}#4(f{bNH*r#YelRn~XK1?56!LAk zcYRtE#+avDU(cz{;pR!)*{YYS9YUx=;;8C{BvAx%^K`uj>D$_02zu?VoE{#*!0C)C z?n@)pi{J0;7dM#5Iv>h^ES$6aXScbbbvPHPx#0MdDNQZVrzQVK$8B?azBMOK)9*xV zk-0Wfp9j|=cXtuJtWIJ6E@K}hO~A;W2PeJ3JAOmTfq<6W;~nTM8g--P&Th`}RL8K% z0}hDo$(Xj^Tid^!K@boB0&9&B!!r>QhB%0_4Z#=!HH0v#buoH0wB2kV-!!>2I$-OU zx>5;D!nt@g)wRna(eHg#OroLooQm=H8F$D9{Rl8yWl|qGk(=S zeh$#k`;;8p@iOlS3j6w($nZ1YJ3*p8U9kC19n}lxL@d`rMrG96VqkhWhu<#3;{uOA zmPeb`jaeLL0ER35{gK=p+PI^w@+c(R3Qd)p;%674W_R@E2X7m@rUH_kQpW#9TlVlshv5YmwW{6I%4PPc+cVP{Thj|gh9_so22Dqfa1l6TZ}yRRo1 zSM4Wh&|To&@18~K@AcM!yteMx)q~%bCo(!bYWU=^MfO#OU+lB=bpnv$?p*%hC?_8! zN79ke{QguUuJ`X8Yj+oWn97C*Mpa!}i{S-vh0wA@qv)c-N8oY{gtS)~Yrq~OOq`fx zV`XnHU6wz&4YIiKhNuWiT7_tyi;eq|JgHMrU&|56Yz>{`QZHDI%T@N%%>FR9ru0X< zW&%aSBO~UueZKHZ+NS*2B$b}5A*g#hFkobHG=^Uuzs4JE%Q!qyl$$m05_}Inu*T_s z%2J^=HQ=X>EvC%{mHf4WR|hh%eHz(77(DkweW`Xi≪oR=7=l@W3v#Em*;;{p^IQ zdA~2@l5Wlqje~2cQ^lK3+>3A9@MZS(caDm>IV^wD8Wg|!paoTv^)&F7LFxrb^6aUx zcI4e||hre-n8Y-zLS(_-Hd+mRM<3 zBNl?_x3-}F+DT}aIs)^ZkY1v2C4c@6(NP;jm9EL5hS%c>)NOs*b96?%Bw)seIL&yj zOww=-E3bc@V5nLaI}x=IjtCtCGmh_NK(k=cf_OcQ6*>00!RiXf5GC~0>p|2L-a|nJ zA_Md1yK#dQ;vFjU6LL1NY~y(tj4YJe7*;3`)%&KZ+%;>gnt;w>=gQ~N(FeWm)lZlX z9V4S>nV&^|FLxK4gFm#=W6yf9r7xsx#-k|d8yWYK-Ee(@>2#R%;oldYa6$@tNo7-C zE%e@*_G7wa#jBMlUQVNKk5vw#+2+0c4}%=nt_!&7%nCbYrEbdSEz3~88$;pspAN?E zlFMo?HPz0S0zfqu{B+v$8tm^2nD)rufHa@VA&L!XMVmqzzS-f1Z*b}}JK~g!@(5Xm z^I_j`)XChWwrS$^$zqozGYxLTt?48~gtUe$cCak-S^kqjv+H$C&d#M8#8HR9W}c7|p`e3!3CVAK)H+S7KSfPp8?LqZ(2e-!TKI`QZ9 z;KxnNRN=!`lazdA=r&hAosgID6KWB>JY8SOYd|;3YuZb;uSL5;`WLm_E^gIxhEbs; zEte~+;6qht7+~~Zx8o#gvA#4adDcR!cK*EhTbQH**0DjRfa0&pT8r_WZ#e&GP11r( z*k@u%0-=G7(DBVTOA4u52!vmYGQoPo!1iX;0@JzWq%v$3RK2}x=j5Lj#WVs`w`_+g z0@U%qxmmq+&lAg16{`@!lr}GSkN7g;ARxU%(ndOIrP3K(^2z-yTH&EQQ7^iQ@Z7(( zIMrV;(-~{T+R*bTGRxSS=l|?}mDVF*cM9^m26@qjq$`*OSfnl!_*#ds`k(C-c`=4; zXW8%Dg`#C@R`8M@MXJk1s-3y3&`DJVW6H&{s*dVqbs-(? zs)zQ|CmTdot{P>Dn9fP`Okz*<{XB&q3DL*?yT?T54LhTxtvYqNB8mx)64re>1AG@O zfgdXF^4x{W72KK#GjbyisJo<(dO)Ta!p*B*I0;OdpPF)P(iuutnO-_ae3Lc;bf&@2 z;Vmg$dP+6}^yE|;t{lCc!*>E%y;X`zHWmDv=BQU;2O41#=ZW;Zga6uh7~S{j1;Qh! z-{|EB?}uve6%vQ`2O0GH+N!ue*?gRY6cXLS$5*bydaBG=UE~-s#8m zjagZyR2R+mXpfvsOTElft||1zuT>~MPHh4M#FlMyZAZ-Xt9;QHZ4qO?Dq90Vh5R*x zEbX&H)N&h&O+RZ}G-Af=c5dIFNvJ0YAH_ugXEUs?c+3C1I3y!L4YHpW9+!EX_TBcm ztJNr8w<~T@yY*~mv-5)5e}G*t7t*YWmf4}Qx*0l}8cqBDRQnG%FzLU~bFrnb2D~l& zk^_%6WG5|Xx-9rPah7N4vi+uAExoG>0D7PPr@zDiO*1c%t+FC1x7mSi`km6P(fk85+Rm4)f8&^VmuUjwEZOlK0OBDl4}MLGG@gKDu|a!*1$Ljh5aP%QMB9) z@z&vl5%|JlO-z^fm|~XS(oaYMEb60qFQ9hQX1!*m5D& zIKAn0K0bNadzx%ljcYJ~=M=a@FRW9OByScUMzATl1R%s0*(|2|y2A6X#qlp>GC|9< zB#qSdR~9w*>I%Te14S5S%~0`|%O0}bqWf5?I?#e}Hs;vBk|#SviUfE@UE2)`c4dsl ziyHLGu$z7wQ!y}0UqEWQYePDZV${+gq_ldhy-T1iH{XI(hv=&w2=N!Bq)7z<{qB%w5)!hYl z_>@aoCJ+J--F-I5Y7Wo_WA86Tmcp#coW(6{*WyJvc6Qn{mcs?}tU6yL{(-Xm){-i| znkHiAQC#Ph>aMhZb-wjj^=VtSAnQ}_?NDZh2(o5?fOcy=u-7Fex4V&F(?dQT1r>Mf zxA9yyxVgsxZO;!#Ae*=)nv%tKDyL=+CM|TgmzKs^_fmrWZQDK|L|L0`q622ON@0nf zkfFl&hMFPGrbTNQq{gmxm^G25@$j}t^|{Lj9eFq9(e4rPRIGZ zzzmA}v%yy?l2IM_zSwKnzcgeP{F7Wu4v%j=L^ch93lj%GjbB;X%!TPoj)U?T42w(O zl>yiZfdm-A=xTcCHUveS4i6vS>a48As*}j@t6w=d_%GPU@05k`%8D$qqF^re%z31T z^o&dRAGHRFaa((K^^qa{NZ0R_=vcqur{cRHP3aP#!Pmu1TSbp%u|@HBsgbYu)QbK{ z#X^+;6$Q+(RxZ35ejM7viEc6}$n|V=gap1OD+{HtqEMUq9Y@EyrXbSBu73zY7#j^H zBAkFRisy*#-)cw*K`8DLDS1Q52YUcc&}I;T#+wvzLwd2I_$%Yki4HAS{kf!eCBjH! z+*;fZa3?VRKCxgm%6@<2jlc>+EUVv#f}wQjA7WtPo8tWT#e$f;hZz^7I+T!AN1i$f zOGTeL4k1R{+ow`~zl)4SRTQM$!h>Vg_jd?`19@2g-=u9(Ti0Qo8`Ga>=etL`Jk^TW zfYJ#r>NVb6KZRQ^6HBZ152ur=iPhZTRnymdR)1tcNL^BiSUZ`cq>A){2eTkgT`&0h zD&B>XfqWV+%Bm;75zt*g^`_4BH*!icmXu>@T4GOm(UT`HJV1}s=Le%s##bQ%n=WDSy5<=sC=8qIl2O0v)eI@WL$XsGC*iw#*H^O& zJMc4AnQ$pG_|i?l!3QX^Z~$*IaW3hra{!bWVFzcVO!lS7Qi!K?x6*7FUN?qiAJZQkLs`04phZL${+_7Sx2F z`;;%+;_B1sl-5#dxW181pxOPZ059_=FZmsi;f%3_20y#n#|Hdj@vg6MAUa6l9Hy@R zhqwRJFKQahyPZ?ml!d*fQxjtj-q2?MhXvwvH-$;J8L2l6iS7sd$0LW1oxt%?X`(6B z^zQe7SGHpn0?N^>09|VF(!rVaWCHHzPD>lg(@QJ3?yclXzR*&zOP`^h{?3qgm8GU3 znIN@O&lg!MM<`Gt!o|nrPVEBrSsoT|K9;YO&kiNIFy}N;&8>vsg&{}wsrT&5=3_NP zgtkS^PM;)<%LG8yJar zmEv0EUupX$r==4!7rf$a3^duI`&iCAp*Im=4c&k*=V#!Q;og`m^1EyG7;Etd7c<;T zJ&LBMO3V$96yv;6FztLrUKVY^sy~xT%$Xz}a{<1}m_AEiHR&8@4qEE8s4fd5<8-8H zam9W0_$yrzE2m(=rO++eAE9a43H!_f%QZzSjd>Ku4aLXscLmnDsG_2cAiAagms=N* z>9vV_MklZik1y+a*Phs!5gp{wu>CK^73F@uQ!k2-#)s%Cxttm$VM5ZF%P5lmPmzC% z)vxl_(oxq=3^SCwcZ34r-xw|_w)}?7%#H_p!(yiU*GTfD4!@~nxYp{>qs$>$_>A)y z!#k}Y5)W`H&CVx_v45N+G8ZzfUOd>L7FtOrnL-2WL>C(;M<|oI`4tchfE5eygaRaZ(;ROcgWF+11nDHrC$a=}*tME8v`n!Az-%;P0cqCkl zaWb+q8Z%<=s6n4m-0A(y-B3R0!9~63dW~G@6`H2$I-M2R7n;2cdUb~7^W%e$>KH4O zZH+)Eg3AJ8rc19Xl_13uwf96`$UKxbxSMMFDLbdN5J7ksyFo&EQ$%Oc8R~RNNkd<> zH%fk4WT=9of0pB9$T22$Fq*kt?lHj=5>>s~p`y!}gr@k%Yev!Bs;EoqJHnvNzqcW8 z0$oFCup}pfd&nROb>U>KKXRw`N?sTuRy9Byy7%9F+vz!+8e^H~!yX$!Omve!@!NRT z=hhZUEogdk+BP0%Xc{=13&}G04UQE)3RRkjL}s_n@bX_U8LZaEnOOuQ5%PB4gb-s# zRgg(Tg2j;R=Y*_wsG_@>+%Rk_vvEqm>~@O6J4S}@^5=4qdemo^rb=)X2%P56=S6^y z^J;gm3p4nRAn+EVDNB(@QL`T&XyEL&~T~;u*6)8L-f}xkA*?l}-;t zo2KkS_sr6rI@7C-_0a{Q3oivtn`&e34;>J2Yr*@#f!V5 zIk_nN0K#YN^?sllJxUo?OK|9o{XgLBKh@H*k%ld($&Embv4FTHPx=qB9ywT&`GK1ZX^vOlo@^$lu8RrE64^K-H>i#q1Ilk1O|t$2=VXjA z@Bd~Mc5vxHbLjwhtS~rx3LhkdIFNRTnN0-)9boUxBgB(#s?B9wHmbk^dN!~hiv~5@ zaI)6zINEHqSp{rCrsgQ0$4cAUJS{c$gbZn%W@ z)9uJiY0AsisDGSN*e{v(X3lDU{*T_H=pqWa6O6UG8dI5VI76MFm|9Nc2e7HEX!1!P zx!U~4<_(2Pk5D7>Jx-;UE{WJe_}lK(>*KhMl9tG^YrbwXMrR$U3RL&DGu_-7;%OAl z(}5Cmv+jK%fqz@#v6+8l1~6a{ggsRX$DS%PR8?=ioHU#Mr^ORxDY=tBW`+z&p!$b` z$g13$n7;XR?I`Izh^| z1m_oS(;so}?*`!a(N3pSA@h=7`GeN-EVOB!Sz!Svbk2%G<$-rY6DUz-@BH_kHedog z1sdt+NcO~AeS;VUht)subWob>Rg9HKz_(2K%Nx4UWnfU?VKIyirXcLNTrQ z8Yw(Zs7MPiu)7p)ann*w1^X$+ku}aELFM3*VHj4bQF{>Ap^Oo78Q zc3wEYL*nbY)PO*8bq~^cgn!Mu4$o{!+0YL1{CgYuK7Am&yA8|#K4bf12&7LY?$1>n z^yH?n&!O@CUFm+Z`OEV9<4!1-OC@{Yd&%!R>J$&?x^zv4;|OU+RR1;YN-mtEvEygp zz45Lmk@V)r`Rr*Xk29xtDdT+NV@0fN>I`7{#JX|RDe$3#)PtVr`=kbIhK%mk6hmeO6{$(}1&uunzF*q5a)8 zdt?K&L{X+3|H$|A1A4r}5S9D!rYQUn*+bgRlUcLlRP+f}ZOCAi-pWf`d*zh$LH+W0 z>9~u${Xjjl)>Hw##K71La!N7eg0D&bUA$8%4u`QyTaXh#s|ArhT{A6j@tt~3FFN?& zuYXsi5x+hU({s4rNUG_Own28-37!}NK48?Rd9tO@q?t92sIh5h5+nY zbRF4oD~?U6b;}go%*13)Rx4#yskstLP1Po$75m!bHG+%rOAG2}S+BDB7dy>xE9nVN zSzxhKqEbTpKt=wmT5M``!$kP$CAy2Hs`=asGh4=GWmjtcy+Fg~1M?>mIfpi_%x5+U z8o2yr#T@a{{bOz7?dnjK;rp8}>vb@y$Vx!<<8`&)G1RsLbOypm@?!qaXno^l03=s% z#`ny~`J3b?`er51+2fmmU(Q$SSLj#K7cl9o@hkeP@{1^29&<<_P_;CcjopN8%d3wZ z{il)&?bx7gtz<)Z{h-&LbFIV$Nc6T7?fJa`_y=z;{hBA+%GDR+k1h=VnMIVTu9J|v zO)Tf4JO6^Wl@w~v-8Wov_cmzlhS+2QYHd^qn!t~%5!o6!<_lskaF+aiR?Zmx{FhRy= z&GyBY@Cb%wJ-Fw|Hfzg--4W~gb<(-xBs@YyMKA(>;P5i`C^i-5EBZB*rS>}r^-dUC zcw{BP4;N9M%)qVAKM&RU3G&2MQ{f0o}gg{m=j-1D4Bae3j2Zl5V7=% z09!$ChF{$DId>S_m%(p>hu9r`q)*rdybB2?6;=HHC}$Z+Jo60(s~`Ox7(hYHk$|C6 zCOV9s4YqC6p%$9fToM=yj)_>4GboNem2G>Yk*5_GJZG470$hG~a)CE%X~T!l9gQ9N zMEIVD^T9X<`a?P?u)Vf1o(czhYr$5d(Y>FKG)#KRJl#zo@2w5KdW%U$76!Kzc@}2+ zOm4Ta0~riN>Zs}I@ySu*W8fIRPhaY6fs}a+ZuUQ6M^T;ag~Y8GgQk!bS~*is%U?Bo zw5&aaHbHZDgs)icO4gyniD#*-Mgj5y^t&Y&dnGq3yJI4CQNspCnDr#ubD{B#(n+pP zGRt}Kea0jbCNPDo_k`}#^Q|a8e`s0gL$?yoY3qHS@Vx)#DU?c)69OX+Oc!d7sTK~` zg!22ck4Fu4mXkIAMS5Q5k@3Z3rpJL>EfAw4;2|8Io?BYt_zfdSSo+;dHQJb>q%=RC z(0L^-X7}aKb?DhRJE!dy5K<)5unuFG4%D&4*@5aWF0I<&dIuIKFnt(t#!f5iT@;*KwGP++ zvSo8A2wtiX?GsTdL(-_Af(mn z9uBmcDVC4%sde`KVdKT3Cn4^ZJv?$%`ziB}X=>zRARe${CV9^hA!lnzbK?Z`Gw1XMofb)98FDGv#{8i# z^KBG7^=hrtf9h!Zt7o4Zbx2FU`BOq*+m6J%*`7eQzeK*NA0sFDPkj2ahK~Mut-4k@ zn~2S2(UNIwITEm6^N6;REBAuf*T`zgUJ(F77&ko_& zZEE{KU5`_Vf^UwY9ezqO@|neS`RL_b4@D5n3;s!@`vuV5+L_UpM+&*#HxolGIXfr` zF(-^QOip+Zs{IebWl;>ibQ+|~h`JX2JN59IXIBD9fEp_)1z%~MM@}~8myj5(;7M{W`q>zwU8kW4oRmvKN+HK@Ia7A@rhM%xRHH zh17neoTjekh>!FW0j}7j;itBMh6N z!4cy()^iM5>_#c@c%``T^(tF!h}R$uie*#YC|98RAJTpc4Ph~LBK&to-vYILDdx=3 zGJg5Yebpj`o3z#Cv7+}k*^GZC|M)!63@A-a>lzaXe{22<7{5Sp54GCG4^wOFxY-D; z>kVoOjc_!wlb$f{-z3vn57uAOmnt+i9Mwon>{^l#ZqUO%M~8dfk7-AJ?$I}BV~YB* z-I5QKY~q~g3*nz2>rlG8SaklXq%XHAr%h;w9W<%h)AbB7Vf5l0)@tBooxs>D8GQ=j zuPYLfd#hg|q5Tv~eI+;8F&bBBuIvkp#$^B{Rs=1lGfS)q@C(SvCge0-F~|++q(djB zcn4*9wn7)mOSaykV>!k?H)yDjxyQAuEg=JC=}PGP+tk2S3VSf3;Cj2@X#Y7Z{5wZZ zDq~%yCkhK8(3Q4HE1sZp&}qw^(5l8pRd9^Dg915E1gLoxJk&K=4*xK@@6|z-S*4}J zwJSI+x~L_O$Vj>+g$~U-R7WvYS6}gpkssDMO8FU{B*e9HuTaVbBVN%ZAHO*V5bp;{ z2=+pv{n%FhgOcc@C@QP0jmNX~%kBZIr;{mLxq-HET=|Xuw`-g_>3Yz0v}GCNTL2PZ zL2NL_Y=z&fAo`cs$Wur$rrO@+(4)kzlWEzQc`JO_o3Y@)7U}9eYm`7!NX%3Q(Re~T z?i0%EQ^I|01+QIONyVk*9}AF+X!M0!5}G*)FUe=RUmq z$!r?Kq?m}yHMQd2zMZBBln${5On5?9SH>JuAGb8dP-+}}pVew?bbW-;WGq`<0?PHY#myhT84TIt^r8e@lgUBUvwJ7YBP}QkTuK5+zUyNOi0PFFb^Q|7PbuU}BC*FaAXt@kJDa1}sQ)4xj95yZC5{l$dI z69tX^2uFk|9trAzGPS|xTepBMB7DN#mdn&zv6cKe{#Q;Yv@E9kw$u}sD?c`Ntt zAbfC|ZyaurrcJP2D-iaNWN%y^t?Y#HhyEywHH7Jg@<;oCc=5V%*?I4M4!nlmmau2H za+qPwg_=Pvwcdf*`AHf8R1~Ka%PV1_)$oaP4z$k2Yxw~uy80gH0ZEQfXH)Ees`vxr zR#XXKoP}OYKU+4SL$yR(s}rFvTcKtgiawsC1sb4}WmM1q8DUP9p2({m0YjBcTdNiE zQ+9#DDIOg$Nh?<}3;jN+L_J?B!kVfqF|}B-2pvmyLftq9y*?=wSTrC{HI_sR$XAQN zNJ`BM(TzZ-0x_smNd7|)m6Zn=m!Sg^Q!552si^5yatENOh8X@;OJ<|D${x`Es~q5^ z(vvl%W7CMhNW?7|u%wcrV^fWgr;?&&(~Cf&3Zr9Fiy)Il(lCxgKb1vNGY&wvlI2Z0 zsg@L>dZV9}j!>flswSRPhDbyh$Wk&GY4(96m8jwqPx3<4BP6NX82;r-@=)z1g#*Yn zB52IUZeG)x&B8n3|RU>E;J@q1n6Ft=;q!T^0BK{=4{6R;hzYRcdPiirY zcu#E6jPOYWw&+H*C$1Dp{y`T=Y5_zL$=YffXP_HUttH*iI{iU!W^l?t*G*cg5j%%IXyk zgv#pW4s0j#8b;hF@@hu7B(4+=tjcBM!@(JO8f^)k-J8Pg=u#zsBk5Mir%2N~&&vFmpzl3vJ`@aY0 zCYjm1tgB1Dr7({AW(z_~qGH9zCqiiJ!v!4>*H*Ss?qa6SU_-nW^t`jf9PbRfgDT1i zCa6fBDRX(Elo}-Eo_(P(2E)b5WTZ3k6Ps<3BkSPuAFR$u12CX8#!47a*(}Xm5(Ze5 zSC_v}rB9!qqFh_EpNX- z@*p(%{xo)8;0h}!kI)Qm{x?XBkTJ`ObeW(hap=JGYv$k-BAmtSj%p-r4)WkK+{$sAlinyL2Z%F>`3f%S}l&Fg-?;pXLx6 zeqaRRc!gP^3gno3|2>rf(X@>0%g5HzOH=s7{iasw6g4V14uO*u+lFOG_)m=vEL0udNX-AI57B1uFY?B9y zDK01MT`K=I<;Dh{F9nY@ol*Lf+JxFcv|AuTXYC!zd`GRbx6U%JyHZ-u0Sh6f&0!{8&R*V6<~C(p2LNXatu!sR^ny!6l}DKGn*M(^rO`NC$r+t z0z?bK?#5Y@B(mCkPhR?E|ARGIiT5~ zokzi#t&EGO3fbVPo!z9S>SYL{D(Z;BE1Eu$J~3)YUU+;(`kop#m_D|_3@NS| z3cI1Yz=UC+db@F6ki5Rdfp35vj2)Amg7DWSpwz;uFqAOZ7VK8=wf>Dj&w7vUuWopM zNKYb9WKXUS%5{d_U^|vzLxk$)PTXv3A8=3Nw^qmEOYI%J->7#vVb`TMKbvUsasvDb z-)e6{o22`3R*2!N>xBq~x1WBTeL>z5LlJ}NWwkqXlC+?<@LZU#1BHyDo7(%+Z9PJ{ z*4@F0dK6!-q1;Iw1n!!8kD=GVQ5~gEoYyb&7;KwEtLp5K_3hTt+lJg3Zaj9dcg%OF zcZO!bn+STAA(!)}8h0l1XjZENyYS!WcVtf(ew;fGv?0|phhC%a81Inpi0?q`IFxt@ z?`7ezFS^^0aIW)_0q^ZH#O)ihh@9)~IK%r>RZ1#``0hcu<8Btby zGnbCfQLsJ%K|j+gO?yfUK@+1BOO^hCK{yQaNal6bI{UVMhW2IEs_JD-g<|zm8T)Z7 z0|&!~3x`Gfv2EIAc9Qm0c5BfFk#@F0RcMk~a+{LmB3>Gm{c|$<^TPzW$89qCbHhaG zRq2h=Xy!pSu){3g$M^`!qc<7;>Nf55N-zrYvF4%Xd4{Qu3E-2fVv4mIT)}+azQj^V zUFTiVUg_;>n$M(ij}t6FxpC!f+Q$K(&X8xQ<8Rt*(bhSC8EFr~)ok{M zyr91`wmO2OGk0aun5(Puot1X*h@DfW8SQZPGTHZs zF$8Pxj^&~r-V`!u>3rg8th`G#w>$Xg+HK>D)vZ&;XR=hsbHEr~rmU>3oRixWb4xr5 zx}xTQ`&Z3^CbQ%eKTS<73XlFu?o&|MqNR7ZN}W`N_fp(Z^xN(JcMChet#b4<{eBC< zNAN6qOp6)eM1k}&JNR`Pqj1&UA4?O@J zb9d!~zY^}@5*mxR+}s)+Ct4M1%nxndzchgvFXaR`+YTQtLi_xU^QAfh$Xc+O2n`T6 zfm}V-zp-l2wZIu+lfjF@z^y1y zI1cszf=`s26~z%;yGLsW>NgTAJ}z_=6nh|Ak0U80E({WM7+7$im?%H%4_vT9aK&7a z8PpTR2XNIu%v_!wFmLeD0HPXjp+L%9*x%6TfmfmsGsxkvdjVRcFyS!yV2D59CBb$@ zaep9$fb|C8!9hR-g7t7#VQ3P~fPw!2sw2J(i0y#gaNPV4v2G}C{(pdVgM7nvgLOl7 z1AW6+gSvv~3-f{ag82gVg8KsZLUTiQ1AZfXLwv)0gL1=tgMCAN19QWFgMUMRgK)!s zgMLGP192mGLwLh@gH!|SiSLQviS7yEN$82>iRlUDiR%gX0((Px0|u-iX!$=Z!fAnR zA!tEdfp!HG2owk;>T&-~u=79sfprvcB&fk!f!IL!AZmeb!E1qUL2ChTA#Oow!Cpbq zgVaOTgVuxB1J^^*gV96KgV00LgVKZ31N%VcCE-Qo#o>irfyzYAgl+)0fUp1~2t@Aj zE+by1ula&r>h4J~AcY_V{{BR)00us?{7+{IRRarm0S@64&_hi6|E>7HkZ(9z?+`YI zzb_Fh`~n+UYM!8hOIa~Z~gfmue_+X?*X=|001e$X$>K*F28hU7A0X>a;r3c zA(lqy+%^Vs&qBM3bc)@daF&c)a%@ThX;6hkW9JzjL@Z%bU>WmS{b-<7kyMVqru<+X zAowB3+dP>?>4!{3E>0U<{rwZK?}Kq%bx_=)qyXV6Kps6G;w^`Pz^PBqQ}RtdJy2ux z%s;8<>YyrN&*9#TSo~me`rR751(;!>%5{D>j3+^BGau=skTtX-eqd0b*-iC1P}QW=fzWJpUOB$t#wAWggMH<}Aqx8j zdU_4tIo!W?{t;|HVQwSfI1pZo&N>qFrJL@&*}T3NHhtm!B>jZ6(*6sGohew!%g&`a zrSlFWma>j@@f<$(aj~5GKDcskfTmO> zw7dGKy-%DXd4yMEkaY~%_*>kNH8nI0d>Z^Chd>aTx>c;Tw)NU6jJWaKa}MqXTYdf! z1W{wdeL8KDGEWySHHVolo8q}7Aa|YTmVCGpCBs~$%6yvPh$26fPbJqJu z&E#ugY@BRjA+i0fsGc#ENgn}qd`7oY&p9=+^dC`XW zEFIIrWn*kUmqN>WK7Kl3W;*(l9s+Q*_imK%7rJ4!@A**gofB>C&PO?kiF7*FngR>) z=dXeUM03d@E%`jQ%338T#Lpg$?-BqWxCwZh(Y#CZbV zMIaOsvKKZtMCX%H5O#e{;fgICTQ7(TR#>Ox&j7DCWsxL8{Y`pb`}3*Qbq=nP4&4?S zYW5F@Xx39Pqh8~O-r?Z>UyN&c2HCqmdZ}SPf%t2P$?T{KR?oL81JcSWG3u zZ1Qh3fNke6`xKc=A`=L6;+YW9;3z5SBN3GF5vn*J4iF|&8;xgbxH+FH1*f;{nTmgG2xfX!#d9Wnwt9W_eAx37oU}A zY!S1yLdHSwh0mkwH$7!Z&s9Vz>DlVT`SzJ&mRFulZrnA2)d8}o=gCvIE{oS|Y6V;8K+HqIC3`e|FI$rdXUy7M_GUI4u zhrWzEq3+h+iZ)?9Z+TXn>l`H%{x9(WsFUDvyWP5n(7DUTA||^$8N$8ftv)}NnYm=) zH-E2(IS3I;k~ndN`E|LBI7zSLOuW&$1m!I`Q5Vj1m!r(jnaDYmY3xjZT%^O$VLwZ@%rT+Qlv1`l@7| zsldJ|p@0F+lcMdp`5vMFy|3v~GS8@s;A&JBPHcj$!Ciw4b(&kI){cf7B;~JBVxN@6 zl@lp-lE-i0;E z8@;(lqXNd;%%HFPCM6TPbD)vUbMuT()8a?*Wbd&x!=eBinEa%b(~`c=HM~@F&nXq9i%o>Vg`Lcj^qFegV{@rpYBut+k*I# z!5CQH63Tu^A*JiW%a>yVtQpMd#{bbXXSP7^KpAvU4nMT?X+UcGCKM{$7Z0A|aGL(3 z{0g&yuHM|+Qyb;oZUR9A8#iR=GP$Vf+(CCp40NOI9NCh_> z|6kF6tOAj-d8C4}G`KxKZ`^`&Y2bCr>9b+VtrGuDq^|5?7cLGa3{=)UmL*oyIT*RG zpwkupbN^8bDUqS$_*?^b=HgaPzYwOC2({ z(kF@Wc}`v3tpOntCKn;CJUnw}S~ae#P+@dz^sGB)XX*+nZP-oPEf_b@IhWKmEy{16 z-T0+)kR%7rY&o~|Dc&ws{%lzueWLUp#Alx74P5fiQiO@Cla*kVR8o%prK+t}9n$_y zypdzlcqEC%3rEQjVfocRF%v#y4x_U$|rH7YoE*S+(Ai3$<2%8Hd)YVXu{+ zs@3(aLa@ltAXZ$0&{ zE-+y5BKi6_YYn7N-}S2OwfrqF#dO*zub^b-{>Ie-#g&UHyg12;>Lyfl(3v@_1z-t^ z{&BGoU;^Ze@wjfb4!=R%v)wdlLx`~WZ+-uBCP?oWQn&IC3pck|EY~Ds^`JG`4U-&e zGW@@H=H)3k+Wc&r>D!sLUi?kwHp6}NOXlpWyl-8O4lQN9lV^Hvy=7m#UvgQ*Oy{nY z))?i_4Nnu5G0?%Jils6TPf2P{3W8TwCl8vBg&n|J%(*v%mkHS1CD0sjz6DhQIfy2HzF`Y@jrIb=R`RLN5IpuO@^* z&k~ClLAb|5T;F~WrgjoRZv44cWhOL~jRLpdSc^x?rIs582>HbQFo%W3{Xo7Fuv**r z+mD%$RLR`DuYkto2#v%3N?>Y8U2-iLP6H^|x__!Y4O6r0IRz<5n<+o};AP)aS35dh zFS{f{22ztjgC_A#ZE3Yqc~T#j(>X-X*PD}Xl%c*BU2-& z-p(#Lsf4m^53&^L8>zqA;HpAUbOCV@6tWv60N3j9R9f&b-5x@w@Yb%>`$=$E7t5b zoYQjq<-01c+3iElPc9Eh_O|2L3~$w{$?ccg$uyrwhv~tXOUO1T9#(kD5|h zX%fo|O~h~Eg9+#X8&aOmFwNQovz=AdrF{8CHL?2rmbxvq{p%!qeIg+>57(N1K3-K; z?&FdU{^aZ9jR{B4%2c(DH9_6!hQ0q2u(br(|Ch3-P2d>H{(|G3Mr(bTo%KIT9bS<= z%~~IZ!#CxBv_KvHluJ2j{E5iwLw&0c_WMGE*AK5c&>#3b8)%D1+rlQa9bQL#(0OpI zCN#Lew{vE+Ix?`oKir#gIU0I@tD`*)&MhU`f*;8Utw)Yj_1Q2M4N*Fq_T|v5k2Sjv zn&+0*WB&NE5>~1YX>+UW6ulHeampD4(6dF^p| z0R(CoPl+c=0#m}V~@V*-a@=OWdua2}(t3o#Aw#raju; zpN)7jqjj$Okd2|>Hxw;@RwdkZu~>FhHcA|cItk4TQ5r;^SeVx>YTtb?wQadR^}05CiW$j zqNSJQwAq*c_{vXYylHmN2%$#`Rv{-aj}(L%RqRV8xG7I0$FdcFc#|*nbIYcY#icnr zxeo`=(^}ExWvA8-GYq2}qDwgJ0l3FJ;ECFTG)Yr9!HE*1rpbvdASgK<(ruKQB1!n? zHp$_XivPQ|jwKnRtkbXHh`}{p%A(i!6~O6)Ba?ymYlz4}%R$czi*Pe54@}S!m86sv-^MH-G#CM!obX1sk7)ZYrQCt3uS zb#c^pfI-yrqQu(b4|@9(HK0)5htK53-++k9goR4kP)4sCy=GbPs=cr`9?vl$yv)HDH~ zBMec)I+?p~PU#zc;2b7lRnXJX#3lHPSr{F9$+XBM80|c`2(g66&qNOT8OnK(FO4dt zRG(KW9&izV{sMf=Sw2J!f z_ekU`sPvk@AxT1PybHE~SMi6?xP9TQTCKIrgJb7^y`o1Wn&!a+Ih|H?95id#gPKFc z4azgGNN`hnC~ZY4Y0=FuC4x6L5=*lj6*)}9L@a7?vVa_VM+FQno8;g%$RtGku~W^T%}`**W=*xDlJq8M&OX2gAo_y2r35CVc~Md|Li$hL+JXj z?9VTMDInyWhRDHvDQo-rr6tc>30MN7#cJW}yAIydP71JvEi&wWHb_l_0k z-tjT#9;nVVMPr$!Sn)g+XpBV~o8Y-$#&Dor_&?wsY70b85iB6{`cMJ;5Z41+`Q0}W zVG9+=$@>Vxz58b6KuiP|(XAYuN3MNE(e4al6A- z4Yb3}YwXVg(NG?Kq%yfHh&*kfLAjg?`nHK=7TFI)!ClUwCGY*Oq*#CKpG%#3F) zFkBSg?;Z@^EcbF5L0muwb9R)kX+y^=ZaPFXeB4c+Qb8RS?HmnwTBvf^eS%7(5v!bk zj-W-Ou>>8?Dp3PcC|aI?fA9qzr==+9{C(tch9riOLQZGcs?}Pp~lY^fn?k(G;(zLRwM8?r5EZ)i-9Lf1(lwPn3yhRJhyBVpaY2K*o;67Xu4(uGRo zQmSu|dXv*^^%_YUpCt8y%WU-+N$QV!R!yq(f}Xxb&uZXxOmYr~d!Ye8OaA0Doc|?) zf#;VzJV}5*KtplBYw-m2EJ_Zg(MY^nP7{UFN9XSb=kPe*hQU5g$q`X8mq|>2W~)Kk zO){4YD6?$QmsgZqrlp7*`QjEM`~gp@^%@F%7;?FTK^M*278W1|?SkjMgsX@{aNPSM z)TjrDUHIQq*H`MiJ=n|jLY==8qfugMi`L|| zNM4f$6z^SjEEy#3DJwnrALUPf#oFb^IJrN`;;&O$nVW4rR#5T<`_h@@*zo$A;Y=4M z8EI^6w0D#?4#yjb_WH2SIf`XpuB)rFWG>YqpXc3oFL7_bYkf<4>k=Y5SCV9TrB1G* zl2oc?L)EUqOi)1`12uE$usnAf3=!90*ARqsS|-=PF@jc8pNHBE8^NG|x}R1f<5AP# zA(W{CwN)qbMzfy8zmB`@l7sjfPU+2@QPfj5VbJ6-v#Q(f)(8BnOjh!zNFR&M6pc^- zO@UWwc)(!R)3n}v<-^6p-z&DO;SczyV!nI~$3!b~i;Ld?8~uK(h;(hk0z*7T{R!Ku z%sO8j8XIyBIelaEKz(t4Lx9R=>PLO(b*MZ8mHbluQD27a1Sm3mYsr!ACf zQ9hY~nrCvzC~TMNV@lOnscLz>qHObxacVgzmSaR3OQWw@FjJNV=d5l{xJ z$zq3&swFv=t1(0Puh;04s}GI*mxt1uX0M;!n2t|8IBA0AB(vy$WJ%H0k%{vvQl7(P zlKoRHom*S%jhhY~Ikc$}92mKCOB3}!QOY@`(yM<;3!-H9xkRU+a{65dr&?BLBGL57 z+}h5=8ye~VPBU=i)9BPcAXve|8K8y>4o+=z*=&h?sCA?%9BUYvgSIyX+xaQ#Pk<)A zuE8+Jx3KAjm(H?(_|Pk<=WSe@UBCLa_3_Ny^GAXJs=O%LV41MJxi)}!9|)C(*1Kvt zlEdQ*Y)r2a0xYo8PG4KHs(eL=2l0*%$j?r8o*CW3C;9-6fcNopIe0JPp~^R z;Gdw#6dqr#Yt9manjknzJR~wN%w|*O^Jw5oDQ}( z#LdO^uJn-1ub&~(_1&-u&Z3(WetE-JmkhZ`)Zp?|8 zt>4-2^bP-NO@L}|YS`iX07`MUEX7LOTqeUZ0Jh3i8-{;XC{?C>AIK8hU6NR(iC!%) zm5iav_KAlSp*1!tP!bC5DrP#1iz3qXQ^(dB0i_oOv&}#PKG4$~VW1RrJQMBQlD*?* zsc}4i-#0rPiHzJd!U53_1xf+Fdy5nHk+hoDMsk~)Qmc}pHMO=axwfY%XabGhGb3^8 zdE^R_5Cc3%a%K~Q2swfto6EYc-&0>562yk?@wUv)fh6^mjzPtq)K*z-R^DV~X{*a> z4mLP^wPBx$^@b8~B!PCf70P2A$^+TmPVAq5N_Gb>=d@93`?jIM7P=YQ-3`$0_^QCf zSc2w~^Wesut~tsz=kv{6l(bFGgWu=en9bem?uB;uJ~WfS&_ooPNlPlRvugXVp}4O5 zY)0o(&XpK@zp&zL)b?M?^1JR+TAXU7#WgNj9JS2iPzKSW!KKs0c+>8OCL}m2NrC`> zrHET11I-~llp?sOvs35+}YXyS6H=<{wGUDCl&o*?{KejFOcGyNvPt zL`P=VZ~}Z4g=~<}>Yil$ynz)s0pbdO&MZ>J?^qgp#_?uc4AwiH)d7#8_>?md@$U)1b1;eL3hV?5trpJ|;P;=JgDKT| z%Zo?@*8ZBM*^_PGs@ z9L%-uJF#KI!?SSv5SpD?_$yA6Vdx>Y%Ba)A%;UDR7_VdILH?{`Gjv_D*(;Z1rYr=n zbvXz6=3GJk3Qbom0&NAPsAyVQa%c>c23efenmrcLV`ly;SPT>*ZvatJ$4f>;0A|%7 zH(tePiA^UNQty&@E3c!!k5f2*Kz;y16WgE>9fR%LjvZEPN5uDBs1LwjEHjVab&m0P zGWL0p$!VH+k)Y!fTxK3LpQa}<6z7VJfzLooc(26Dh|n&q5tpklg>oGlRXS6EwImd& ziWDAJTjFOdrOLslT0yBm*O$4LR5FRqF-ps7u4MG0K}~8{Juvr<*9%gA?Ld9|j#Wtp zDi}#nYQAN|q5QhLHYaRda~pq$*Q!}9)n~LDH7edEnLSoP^V`;`k#27&7q@vr9-4EA z29eWqRX!;+IMdUxZO;uo-`5B-^Ui3oNZbV5XB_*2(mwTge@;KLEgunV?Iu%EEv|%@k6f!g%`SqtspZhKGu?9=nsh3PKr>iO z!`MuBetVa%dd*FPH|tmpNvT=pV17@3$d(#OwQO5er-eNOiIZx7bIZE@+41|vYdr0n zo3jVjB#w?ga9ykDb~AdjOB7&-?G1W-?d$8)8?!zYXEO^{j`n9ar6c`mw?E>i;26(} zyulRkTjFc4?`hjJ)}+I!y0HV$>U|4ZvW^U5Q7n#4$zEvO%gy8Txe!TWHS;)iQH*ik z&2f8Z9$3zEliDeNa#E)Hs;fgYW=I`TY#g$xi%LNbmAp_W6_uI3vZ;c zy!Q;I^UMS6Y;!Y}o(C;ws>j78rCMZdTVm{LMy1aeVDV~H{#>DYoDwUQXc;r=)p6d7 zkrPWqXNc2E0Sv_6?%Te(JF{yvWzte8P&<&{xhlJFb&YS}#`W$*s7i21ZYS>27#PYV z6kEN0RWnb2?x{b&?`tzntXVRJyquNONDhmqYfpc6vfV{c5G3K8$D$$ z4e~-_^-WJ72n}{5cr~DP8f`E$T)*X>^)bShAKsYUdt!6w1*vH~9~kV-+Pt}~xsIuJ z7x?PBuiV%lURAjNsV!@s{f7s3w`vSV-f1)1IJJQ@4Bh^X3DzaCnVktg*b z6a3YtU&JA_!*Y06PN%JJp<-98n>w#73O7^YM_JAZ#lnKt*p-5H6V-X8La>glG4wgtOEa@~Ns4nMXr->i89eR52_Qz-2B18LoS|@Yf+MQq8we1U&ac{?#mfnMZ z1L2>|?cFzL&unPhc_8NR-rb#@%DV5p`*Zhz!F6|z*F;8dT-~;7{eaKiJu;E*JThLN z7~S8V-nzEW?O(NSD?Yt;dfU2CXIsWqfBTgu;sd#Mucs~FUp>8dugtoH5%X~5Nn&|y zr)&YvT4Vtz!MX)nz=cq*=<})4$N;K;(NJSPt*Y!5N)cAF{`+!8IH0IdmjLy@dO{`1 z`mdT^ssH54E}?OG`t*vGOJd$syXKZ>4@O41Y6UGx>(rWX`yTlfB5$23yI|FoeeG1odJ1PwGV32Yb!Bfw~Pt9WlU&MFrnp|n9x$j zgqF{Q35nNg3*S6?$1^)3$^G9vdiygwBQHp8dq!7X*Wr{}<*f@h7D||K|EmZS{^01w zM`p6kdmq|Z+Cqzc4w}N_WCV_XYom(AMsQn>s<&BUoMC1 zqv1?2=3o&Tm01E*ws$YB1XK0iYrsW0(w=yn*6LMPCR93xrs04D49l>iQ8G#%BmG;o zK|^(+tQf>@ZM5+k{C6^lwD1zo&@Yui> zn#XTC$A}3UHh<=9bb?D&V91h5R{%&@Fp5f^FTkR7Y8iAane;VqWAG{_t|i#`WeWCG zS5gKut0J_l4oIuUYq{y|ZQBRx^c16^v|@IAF1zXOP1V+}!{e9n28eoALdNX$NKe(! zxF_sIZ)mW2#VWrwJW$AgH}2e9f{Os#4bkEW*ajQRXt5SwmD3Ne2@dCi!^6QGVSr=$ zUWgW3_!eGF%V@EGP4$AuL-kLHpBgR7wfoZwT3q9)UMR>XSU%8y99m5OKZ3;*?T4S; z*D<@XnN`z-L9a=znd!>!=<>zZ+%$9xY%dkf7&NmoRBRYcHBS$J)@e}`EkUUa&1>g6 z$M4$=q2hQ;=gdgWT^ql+yV2}&u?CaVTxE9$-M;p9_32GZK+%`m)EMbYyL}KSvUZWT z7@}Op%%O{6V8=480d1LoHuZpG-x5hK^Zi*Aur8jEIR%D$(g5TGH0%PVy4+>mGpYv#vARt z+Ul~p^Vh9v-Rj$WFn zK+>*H0ZHjk14+LN2i)1wTPNBAoH4%o$kPV`gSl#d19aMeiqSB^^icieJ?kQbtz&3? z?Vc}g3O;8^Lr}V^8-mj8mTYcIyAynE-Qzbc2c@i$(X)ac%OuOmK9xsg{;A|VM88&3Sbk+x zSWYD>eMYa!3eV$G&W5nD3%!@jB>zDuxk)a6(}eO%%uX=WBv&jQL=~I9jdx<$!dqy` zqC$;dI%_Izxzbi)>v&1#hRmZ}4lO!IJ_%2nFW+>&eAD^TP3MvE@LH77piQ+=H4vhA zFN5f%45hb}l$eS+wl*7 z2b#kJ2fAA)Ad=rMBl&7XCD%JM5c;XJX-%py1F?MT&O+4Jy}K*BwatC^UANx}2G`y> z9*>UQI8?&&qZ8?_BO4)>Kaj28vbNtNWBIMouA~*A`PPiP;pmko;{zRSUU$2U=J&zA zX$Ps?b&OPOg~j^;bh1yJ`5`+}T*CYpnIKozZo^c9`=60ApkcT}#TD|m7U1Hekl zk$pwaeNAv*8D(F3RazzFecF^9z2zB*?(+$g8sbB>Hkuu+oqk|rHJ&;-Rd{GqsBZ65 zhemIk$c6alefg#@hd;;CqwFYeFxn;f(C6&;8d4cd})F_k&~W zAG@Ikx>ff-(F5^)GO>C`L+iHA03*t8<=qW!^PQClzfnf`jWWVFD9rnYYclT}%FO$Q z&&0e>ypuWf?9BeJ6;jOypFN1S&xL!pHFxdo^o4r2wV*97-SOrZ2RrV1{lPol{K8=F zt~X8|{>oIidGEs$a2sja`!FJZ0a^Gfc!vyO-V&2In)MaVdgWQ)jJJ&gVmC@|UM0zqTL?Z( z-J!+xP7810b>w3zjS|BJ?{5O)^cY!(wPG`6oIbrYWz@ z@YIz!Cayi*+mO=~lp1KVH+81;CZ4(ygT&R7y^Wtd*(<`ERO{7O?n4XZl-8*Kao5Ir zlclCJUcWXQ)1X8woK*Aa;px=++r}ccj>DV34&E{HJ-p4Rp&@28yQ~)dkGf{Y2fel32+ME%C@8bqRi8kB2<4LW>(WHjPR4c4?y<*Pun z?}o7&&SGPAg5Bh?LKtRCxq6t#_wZ zA;L}=GYGFQ1M4||4kGpB2Zf5FQn@Sy?By(uPe$u5T!!m+8`-a@Ol}cn9v7>jdq^R_ z7968Z?wHFLcj>OG2cN=AodG2o+&K`Yzcl_M(7ijQ3R zh4MD`-xCrRNYL#oiHkAdrQ1+L&}|G&h2 z33wdEwQg7U^eo*oJ>8cKbOK_3&L2v+32mOkN3-+&S&(}0O!Y1rlY+F@)ZDZC-tp9U zS6m~P;rW6IHI8IE*A-p2+++>=T)atdwSqt6#Dyxgs=P~MvO09D?%0?IWdqW*YEsxJCZmc&WA__rA; zAho$*Wp8i}u-=;4c>AjUU87AtUWoN>yM3**qbbHKNjalrJn7Ep@XiH3($LT~5L&)} zN$8>VTO6&;IfJudaV|H1oH2v3g*U8i@U*YIwz|Kf#&OY7ljYgs58)q88wcd&Inn-Fg;8jqX82 zjc%R|QRn6&Y7Till`oTVMxVtK5@_)KQzr#M$CG!c1(v3Mt+QKyOzKY`VX^ZY&r;ih zem}D1APw&z?8|MiFA<_1tmXhnktEjek~JFM!ZV}+3iD2sG=V8}w&qh|IURe`GP&SQ zsdI4<&$Fm`_0`&z*qbHGB)#CxnZB)tmk=*LFZS zXo;`7dF{#@mc`6}xzXk#90aef+msndne?ee%^Q}vEe$I>JC~**Y#VRwT9G#ZwqyH1 zIMB1MB{9-hbXgjUgLQ?C1F=BoszPj_v)OKM>0Jc=vwg@A$hn-Ex;jgIqRa;i>6F#c zkjvTKg|Nw9hj$(>fs|YbDVZaBE0IwSyWpx+e4OBUq8+Dyl8YD&)~1gWoyYxtMPkx= zQZawls8RnKVckU2$0dK~@e;1Cu%47M7M=wm%FnI-twD&DSfvZOP}jDvZHf-Jg*dq! zc^1&BVB=6~@Zy00X)JX12Uc7=8cMCZV_omg(UySl183t%CO4F`Y}o8Yj880%2xFxb`hR;oBS$K9=wn z@(q~ALJzqIh(#Z&QDjQ6j$HV724U5>3Cw7Jv**EsS*4CN`Z`~a8$QIF@fYO0La zXsz9>cbJW8asw-u2YCWf0nwLPc>*ne)9Y1g!K&99H3~fxXSv0NWN*qMlTk)Dn1W&- z0s_INQjOQ14MDY> z29P-v3U_`%-SCbDZYp}y_{3EU-QUy42kLuvM{V_s^9{pkBN#)W6@Bd!yRPr=`pWf} zF3l>{T9(m>YSF5dvl_N>!?k^uXmaVEkwkya9X^B73GK8f38aPTi=2~-$k~OzY zvgYQGSkEfutraP6J+G9nvF3Jvz?Y%RyJ-y+^WG3I2j4sO0SeJBaO5{M0y87%A7<4G zNzgN*O;-{16tNig-w^CSACV?Hs**kmP7^x9kFx#PH7>gj{!?a9&S8RRA*rIE%(OHl zqkcIRdeb!7{ucEalD=Bo6ne8{nxxv_DpB(!z5J|#UVeU??$A%4UvbNSx{RxBbzA*N z-WFSS)2fx%E(vYFxNS|l>*bYeR!Ac5 zwA###9SiH4H}*w?-K+BBH>T_@-NWGh_MxGoGnZ?Ay0huZR-av8Ef!z z585ET3d3oGm>ewh?j##KZ@zBNct*wX8jC}3(LyfKG_Ak32Z7js#MtiM{sVX%?7Shp z5lZer%Wyc}pSAm{K7F(Y!9rr7!aD{s3E9L&CLbdh>l9!f(~kP#;V?Ze$?xgkR{UGE z=+;M3g^=A7|wL-Yv1?WHCG?KywJMu+5OkR<73f9dj^(YG2n@e>>gZp z#h`~gaQ8x;N*AiIeBR9_(Lb}Q{VB*f##tvKetOdR{i>D6X7AUm2v7Jg`&zN zuxi%61gBL;R@-A(YCoZ7i?|T)LHeh-^Ql#`-n&>z?KJ7IJ0DzM0Tec|>dgl!+eTbf z)$L5o>UOSuk`)S#74=R6t9M3`dMA8XK31#VnQxDMveF2DDR~l-l4levdBTsC*|DY!3`H5Ba_HL zJD{_RDh2N}>YXN&1~9AcykbKvHq_+xhCE8aVc<*x#|Qk@{HiN^+OEFi(Oqw;bW(Iz zYA>Wxjg>ZkcZRlSWsQ}V#a7zCBCXA+5nf7cd7)NXwqm9ID^~lAA}cLB!?^!!&ClM( zT@UZ-xMW$Qprk1c$L2PglIBOuFWcEM@#R^EKCpf9SGF|k zoo-H}cN%>**L;6It-@Pe-hll1yb%xD?5i>9v^tM}-5QIE41E|aQf0N?iEo)UVRhb0dOGfD!*GzWdb~M-eD45cAG$ctkkF~j(xxK zkZ+xT8kD$fJIz5HczIR#Gd+U?Gc&rMx}qx4_E!Yz7Mtg?);XXcZli`!@UIvOE=IQ6 zx%~9G(V%=*1B5;N+P5!CataxReEgVj*X9N1@$nN}?tmG2`GN0V^xd6}yvf9>MZ2E2 z@Jf^2?Csh<(6*-4CBu$>T8~?$!#3NmlOzCt`3+x!YX!;EkCam{5EY-cPpEy6ZZ#-q zsf2>^2nE44Q2dB>F}w6EC%DAN+Nh#?6a z15k^D_+u043)repFa#5?vsExipaz4Wcy-u*iNe(JzpVuO<7dNa$7-Uw+o~WX@L3=Q zc9yILqsXY2lNbN+z?Bb7#!{spUV9Zh{*bf9T82_fx3?Od9h>_KOIyQc75SBayZ?50 z{jwjNyzj1)_;_&ry_YX-u#DXDZ0W17UfbmFT65`*MAhF6VwD6?<6iLloxtw^emn5l zfZq!I7Jx$3nLq^hjt)ibOX2K1s6$eKK*T2^QrzN*MDmCf+cF}NJR*e!jG!%SoYRaN znOXFg6(rj;JYrk4AlaU4%3nZ#AA!R;s$h%z1Rx+)Nc$1rk_bNq6jgYFv>{T6#bdGQ zSoQ;a`V+~33|7yE)t6Ky?MF-SBjo5aW|7L-lqLUkS2^mPyP1ls9FG8+Oa{=7w13== zT;(KBIgHX?!F{5M)PtB4L>wUK1jVVU%SsU#(P%JYo4(PFqb`#188}ye>5;~#$^8Ud zAt8%$D6v{Z^b3V~$TTk@y{ajGHKOGIeb6`0CGQ{0wl(VP`H>t>jfpfZB&{?zuehkW z0{U*+d5p}SAM}O2N}bc7HS(On?=xf2ch&8OF2$e^>t#+*cOujWqztSpA))Pv&?`Y& z2}zko(@q*wJB_HFh61O5$RJoaY!<;#5k(0N!YV-&C3FTUD*0**f85fBTH2)H0AXbO zC=QE>vu~o=g_GExlSUGUx?6;u$2FsjGR;NwA<1SSNV`l0JTIZb&jChpFijk%^o<$c z3ye)s@(^9)tltF+Skw$y>hnU?+32PJUwjHlxl(Ley0cj8QwT19U2*$lgTdkAuv4Mk z9e`+cX{KR$u?DTKXb7X-Ct>VVsD;Y|5pqgm>@o?hD)J-9S0e=x$m32SLCNf;Qm8(`Ud80wxeNf}jBe^#H;M$RvIm_+=nw19&dgRp!#T(F{+F zZeGekRLMQ`-jgVQH<3M&;HOT1T6Dk~JR0^q;wc^l8s?D@QNwOsNghyxD*}npilj8lHwU4Z}_f!_qJsgJEt_ zW@l!9hRQflM>tSNIPlXe{1gc_pzMM~QJg|)U{F>r?8XBTdR@~;Zj`f_ zuA2w!S66UO*bbw#1%ZTbjhD`qMDX=ea_oSNgaF#l)! zXVz;!kAEh(Z|i^G*(6xZoLaEz1mvLUu)4dq%s6Pyf_=$B6XIpbLlffVS1{J!2uIo> zjP*aP%*g_rV;R8%77Ba?BfKw7f*5WHQfOv){5$4<_&qrEGe1>r#iLfd)QXo{;oB^V zE`1q~hNoN#EbXasg$L9)#Jz(Z;S3TgsmcC@KNB9`I||!ilk1#iNClSQ9*jwmpsN9) zCH?GYu1tb6u`15x*hn*+s^Z!>yE4(Lq?Jh+F3m)hv@(&r8zH6tg{W8*Pf+(uE0aIs z5h6x^5+F*0hzvw15DkJL1G>><YIC;Cz~mMGiaV7V?{0*0D0DN$m7}#fE!xlGN;Oa3GJ1x;TfTWi zcOZ`Xs*ju|e@*|9RLE$E48H|SeB`U(GWz$hL@t#KlCP0la2{m$Z9*FVR<@5gqZRl6Eicd#MC(RhLA8GnYhj)VAcW zLrbn1UwYL@FuWKY7lpoNO?KC1dZMC#E!7>%bjNhhuDWY`V}8rsYggXAy{Uf7-J3^u zb~!?QTRY&<9_-tSW96Mrfy>DL>Tc|(2oHGlajdkR0^$+-74%ioZtRNUR@#j%K3>AN zW_DxK=kv_FOsTV>kavo^IUR$niHfHw79vu;MI)P1sAz~>R0?v>h=OF$)>jRGL+=DK z5>PS)4WDrEbUnER(zL#!6@Lm+^rNG3PuxRfr^wDCqcXh{-mA{NL|s|gI8)WqV%loL zrguv4HkEs+1S?e5I{10z^L*~kxlzVQZc%gPD2p6Dlp5xnnN}}TYz{QX-Ab8CPRVuA z=Du+I+BTOav8;bP7*=aOklM5T%NPEhtE`7eT*2to^3LW& zy=W564vkKuQk(RgJ+~l#*0+f#DMz{pM_}M|iabho&$Fq6hpRf(>yS;|oOn?}ZC5Z? zozJFj<`XZLYO2lyco(+!h02mg)tbwdJRSt{@*^ms6-g`PzyW*KZ?T6h>dRTKe4PB` zQ+8q|)iN2% zL{2L#k@jS$BVpFZ7G#4tt4=E#!ET6gWt^t`Z(_pSyS2xgE^g@ZDLH{*;PMyBZiQCI zX}$Gfcgn>nbw)5`(knR=f9G&NP6Ai+QY618APw$?BPT%=D;o5}p@u2&!^au3nMqE8 zLq!w85IH_a{v`);k4B@k_XS~Z%kwqVSiNhW?L3-$v;->(-WN*3Ub^M^bI_wYO~^jm zc23^wA1L*W?I_yR&fKC)22IJRTdhQMp3>q^ISPxiX5e1g+`G2LCwe>Q&IRK`PSM{G z2<5%nM8nzz;f9T08cA(lzqBJjE7fYFK`>}&ml=uzHUWE=P>KQ`W3?1WlyB9jDLEqt z{{;~lP0BcfPRE}5hEgSmlfFht?s3`Sw6CN!ChQj%A@6~G=OXHe7Gi%zS_Z)*1W&lZ zBSpr_I;=eWr;I-X4nq;z01g-J%2QGZ%bCa}jMhFZgc zyqE7ETG$URy4aK+$Sl|x5hBTgr*1Isw5EHa?d$!K^`I+Jmx>sDZYN(TKW7Q~yrSs# zhfKM266czL)Vcvu3nA4_)twTM(~<96Gdw98L)xONNS-c&;-hE-^b2)+&Ce_5GJ1{o zo9Hb*TG{;kLaA=A!higHNpS|B%jT&u=4;dwRBgEC8`Q4c$j(7$L&5`rILWHzmPjGc zusCO>_+UKi=w07p3-s?m1HpO+sSo7+fxJ)4d2+$n!e6B(*DdYv%UOJkwf%#kOYt|;OoEm#)}f(I#kl`r+tS?@aBc?gCIA1doSwi-`eT&bYIlJ=%S+a z7T!}I@Z~(b=*xS5mF z6L39wC&XKYDiGc}RAeyV`qaLkJ}?Ju6&aVWi~<}MGzQ*a(b576FHNFQ-+v!gP%6Cu z%Ndv9i~=SBjh6_G0wI45Q{JeC%WG23vNr}YPECEnn-5tbvZpDU3*|d}kg_%*(%-Na zI5>P?swLA`iN>TD^bEMR)CY>4E-XiM=o? zNb)Ck!T(kJ4__>_6(S^Vg|uQ;@UFgc^;R z2O#|BA%8p?xyEG##)oF6Cp1UznupDtxd%$HekA#5DRK>Md}wCAI&ZX^;2daXoP*57 z$l$hOv?G6Ehm!y;m#a*{rnoB?H1_o;8cl*$)PpfrqgI#yrB|bvmb^u zV9-FiKOClcH*h}yr|1GG{NQXfn2}TA)cqw`U+aH<0L|HjADn{*GrEhj;;~>Y9E*E9 z*S2)8YjrsLubj}wf(|uu^g!#Eh&A066@hCw-nt}fx;fe6>ac5_al0dCKpib|~52W*rqBb*-H0j(iDZd-3g&Z-vKLNZh;jp)g zYQld&^E#k=)O-z#SROL_94tngb&r)m~xHEq++>@~zW5uXF6%F!QS194|wx=BCSd%X|>~JjsGG8>}bHy!;!D4?a zzal!VR!(iUxpacl#Kj<&`yj>KkYZ8D<&`yuB{l^PK2E6BL>_Uu$>V~6^nSaYM$G{@w}oLD$k`#C z-jVN%CMwL|*f5aP3+|ZJ7IA0{Q}<5ElNASMJ2Le@L))ZLt=wu45#F) z35>0nQBYT-&UvC^3E&w_3q1T z-%-EAx^j(kjc&=fahz;wleaO^1edsc-;OooZHYwN_?jL2E>{F6H+X-8MDkejm{1Af zSxKRKEOY)p0JYG4o)(Dh3;%o1R}A7f%%AfS@hI|n^SNv$BppTRXp)ZA`-*wj^N;iI zD~xlm1M@z=`hCYl_JgSuQ%yYe)ej2P5-gQ05PJG}1-$L}?gPe3pi zZ6B!{zIGy;TYcSd$DSy!;#ifHwT`tUN18mBY<4sx18Ux&V%5}`+o4vNOhR_k*Vbe|nO!XR=@%6Jr^&LCt|A5!eOV|HOy-U9juV28|x5D=C&GwFin1fub08= zz4-d4_*w?9m*DG{@wJzHRhFdx2(N#JEpQUQ5ndutK}k>gr+p5!TIYa7Iij1%hyu~X ziOP!DKV5&CSHWDZfhh+k<4QKvkZ0~J?IcYKi2Q#}1J-Fa+j+UHvwXaTR0?*p*~J1H zFr*4Do{dg{A;%_vPX3MKRV1Jl@*{_7rJSUcf`lreR1cyI)34Nm_e12P2}Wnfy1mmHP?AAhug=@M#mRGEb% z>)3TuMms*RJo9bKL>gKFri@OjuKdZ6T`IfD9 zhpn1~&R-wiy3ZN&iqv6$TPA4Lh-UIfX3>@%Oxm+CAB%MRpwZwptN%skik6?V#~j+< za*hbrx{8Gsh245XH@@gz_OrZCaTc=C% zOXuolYTpmT9iM>T=7Zei*ET=M*=*e7lee!+-)3uC-aRqVv8>rC+cNph%^CRE&zZ&i zrdwAvZ0rl4`mMKTE1t*jxPUSj@2@^$w`(Sc~Q}b$^bg8S}K)n`EXN(OjmV zPR=B-M89}4t2(M`VP&s*x&*5?GSem0;Am1mT}sa8twg^yTBh1`oq1o4Ruv@+m({Lk z$lsuhSxRr?NJG=AqBqbnQSWTA9%L8_G9_T;jJ!_vU|m%irU8^b7q(N`!Gmt5$qKn+B=Hdh+J2T zTX(Y}Mkhz3R_-MXJ6m2{Pd#roCr40D&b&Y~T0Q2GQR0-ZLM@jC40UYpFMk?-GEYPo zEXwWaz1>@Iy0u%Vzda+?x^h)hBMPY2?)PNZYk&z>%E^d>6cHLorDrvZ@fNz(SAVA$u^q%X9 z@{;~VVEt*3TP_s_Tx;McTr3Jf7M!!oS*_gOncWr z@S%o!YGjNVH0jibV5D?K;~pW{;1DhVnzUGQ5S0n{&I1CsY5l8ZAk(N?dBkStK%hWR{TKmr1B9$h zb^mI6tcOVAYPZykdjB6;!r1nUH0yz#CgL?&pfXWBZk8%-@j?L91%K5rTYh8y1Mj$L zard)vrQrvw0d?joe2sBKN=OC#nFu&S*YaZr_EdN%N{UOnX=tjvWFd|?i_G0OdYYB&&{OtK zj?*fe+G;#8SMYSp)2v1N3Y`j-y)?YPrCsPGv}~RFcL#TZdMeZ@c^3ZFQmTAbr(Jpv z<)=9G^#0g*>AbIspLiAo3a20}=FoZPN+fOF^JIMJ># zC_&-+Opw@Va8zLbg%UPB6D9PJf47w1@oF(N4vu`Ru7-Vc)GN$G{NAtAfP^o2S&Z0R zb|-tOQcv8zEDQYt37(ymdgSK$4WnNG&*N(P^D_&Ydl@d{;YS!ZnP^T*LEHm1>mA1- zM=o0m66WX(A29AafA+MJ8CM1oZx+rmD&02dUP-Z(TcCVMPowWFRXsE!Y2B%10Z(Zu zI|lbMWDq_1F#Mlua>_qdtn!x3-P89ev%h|fmMU~_)ldx)bGs{ulcxKu8B{nLS2zWN z$&Sp7Vi4{RZZ|ZeYE#3vN(`QNyO~`tP}%a&k0gUg&_KZx&O)ff3WQm^0{x4=&Yy-d z$Ndx^6%5LhUEFx8Z&Ulz224U~b9ggXOT(J+mU*tkADvPQtV1(ZcQOx97;Uy*0V4BI z+?x$CisykU(LYO@tC7&V%&OF$RqMVn_dZs5G*m71#5{e=!nr2sm_`%Z$H&1+yv2pi zZtLrA4S-&rp?m|w4Jb`wS&<%r9}u()2WF;67Usz>Eka?JV^kRTtGHyzX(=-|32{{W z>iPz_)0C=toRc~q+;gPDIa&iJjRyY9;9ms7NWQD}#kg5LO2cCbB;yY7VWz&01a^f5Lq9 z;ICSZOEL~`#olsi4|%3L;WibU74~_k&;#Y=moV5L)f17{9ZL2f%r%9uSU4nO>lEpa zEVafpMke#EpDiRGPQ&5C%fc_|Zm}dI!vE59TcjWN0(+8$!^-l1Kx(s%<+cUPa9$-^ zqjVKHZ{)PX4zu)gX0!g2?F)Gu^}>~LEMyC!ExXusHrU-huNlo%N3=OFn^?fIp$Af1 zRMQ`-!2QzRoY&?Dx+S+vsIY3J3}NFvYK60xS~xP$yD673y9}?g#-`iIOi)!aj&74E?X%1_C`ZpmR}SElQCD0D=}Z)JGWVe(p9}D+zB{{yhl@{R=y5O#7si_n z=ZJpCVB3svao!s5v+9jnkA-t0H4vpKXxx5L!B z45x2WO&0}rC&T3JjhoTzoY9k^Cr8xygX-fV<}jc|r*ydTyo7t(DMXeY5=i}ekN)be zwzyh9Sj3w#Wol%(M%gAyQz=Kveu&HB47D-Y@$n}-kgqp^JY_5PdtebRIZ$;))Iz-&(enM@ z4(0X$P!9Jsvxu!VbOrixEN`@geqJnsR9aEXlA}KVV|l`8GSssZbE_xRm(h%`1qWE~ z7M++l`nwlT{LyE3kMgtkV3k#EnR#J@eI}XnpV{ogOeX<&M{vB&VGOog%oQ&7Mw3@y z+#9mItc^f(>HLPpCU8?BwyCA9!r0NcFk`c=u(ENoc}6AEQj=$`reK{-rK_?atCyF_ zkT9`tr1LeCx%dYf38lDYSse{C7}^~wMWNE(@~=sYC{hBcT@UY%j^O?$pU8tNUV;i7 z8EHk~64Ru`6pbg2BPP6cf(|rUO;NthivoV>M?)>M(~|OrL?G^c(cyLc(Yb%S4h+|@#d8K@)QIU z_n`?$y&%Y)Px$-jHXmo>Y~~>k&*~G0i5chBmG#zEB43h{j@hb)dafjO6{qSvMhW)B zw$B@f`bCSKJJ25VLuYvoSi{#2g>9iR@v$TH$TSsfL10k0Rzp|XtgVyR*tiDI--S{H z%JC@CRh!C}MiY-r!J>F9RZgr$M4O3O^?P=Lq9&8}R>hpGta6^((&ECO1(q2k8w$1( z%EL+srhU*5 zVZVcXz_5=4cgHK!hSN>pSv`L{X&Jq6@u0~9)srzvzb}1#Y&9)sAd}siNmYz5mzR{E zG5Pz~u}j@=il z+~ky1jHTXbcFDJ7cCl4bSc^{%!#gWpvMe{}L6|@o#NxbiVrqTF$ z&8eizcy`%Gg8&@16Qp(^RZ*cfL%?(h9lWpWJ!kUsjDrm&1Ncn(pZC| zu_RFVqJ-oH5o+f#8RcVJ`A_pY0yR1B&l4f4O$oPkOdK{yQXh&N`$g^(T z0xXhd{HF1^1`{P=JX=>zP>R(zE8kem$eQge-ZZD%3V5Fu&N7Bu23Nr~;T^(N`3hy{ z6e*2u_RqVJUhjb0UteX{boDJwEUpY3YBoso5ei+b3Cm1X%jhpjbDdTny#Y`e!8 zrjrM2m&rwnstq1og=!<>L;qDy%qy_t)LPERy3bQ@xmxe%+X7eh1#P_T6=A4au)lE$Anvh>s7h5TiFYg)5xNb&D`VZg-q$g zXPPY{Wr%Y{$eontTa~4-+^1Sbi(@m!Gt-g+pCz+NlFbO{7g?9`(jmEBfH-hIai=2v zg~kb1p(Kq0&PMh`TM=SMTPr^21U)=nckhy(l0U&@*wn>4+CRUVkKrR!gwZEKNpZEF z1w&)6LS4+Trnk*w`a#0Y*4 zzAdd!ohY`8Jxrh4HwxyWBMrzTJML!Oi?k`kAT1X`deL(`O@GkD9Ool>*^KCc2>TRt zCaQQ(=y;=+zwWC1LYYJ%CkOBN;Hh|}`lWB@&^HbYUk1Y@zJLh#&MkD5a`D{~TWRq<%dK-s6nl&2p$S za@-QxWbe>s(X7)srE*C`Ovz%DADGqtlzO4XJFD?7%P-oE)UVj5{wr=Mt zfvPE5PbaVGugIL}Y`iNa)|pn`#V0yW7I*dZ;@X`XG*%zwi*9TGntZ0}%nK`ttT-E| z-1c4GeTwUx{@wk_8sgcyc$H|X^@8G9N#N@`wK?~hab0(<1YS+O5njHzZ1JpJo!D*V zZQk^D$3S(lIU61KWL5ICU06KwdWCh2ZW=VLzMc0y`nWc2U{Nw6;DyLXszmW}Xt8*@ z%CJ0MSH%Nb+?JYL-7Y^j2(Pl2E>k=9f;#rR^f%h-p0a$#`4>7Hd_3Ny9#t=_md!Sr zViN^yR=zHtfQdxsz=Q5jaNAf$qUTkAEF^EdF=V86$8FVE&R?5QEy4S89!l}?iGRr{ zsK2&G$%ixCI=MFe545B8#39B8VAuKu&>Ylwx{g@`v1EG zVs#vr+K5tBG9%Ne1$Wlc8(9N&crX zivQ7UM}E=z&j11#JwWKDFzWp2E&2QylKDMMOki07=90$yNubtpaiKGq@ zwMpW;3f0)|>cPC>2oTOBdX=g}oQjr8uah`2X*>;R4#Z&p`4y?4ri^z`E~HtI%Q70sB0Iur704ny67>XYlFWjO6;wxF6_cohHw9y&sneEe$-z&Z$VJ0f zAoXpu<3=^F;%`X)hpI}@jZI!f(QJKqV4c{c=>oh7y-5H+ooIpGmXXq5&^(!QnbH2q zzcRq@I=-3zyGa*eMd|v}#K=tP=l@nMnn!sjDExH--3D7Adn2QMT@3~^Kv9F-5+ z{vyW+{)a>bSqkFo`YQrjioXoCN`smaMt&vFCOkdZibTh$PoDFhA7P5&xHQi*j}4UG z))L7Rq1W0r)i{tlKpM!qfXqtd?_qWmu16(nDC_=w6~ZrWP|*xUw9t}XCJGE9tXxnw zZSMp(h54mVxaiW&ZH`3gB5}M1-DqJhs%|FOrGP#K)8NGfbtU_d*6+Gq>RKY-jWK`P zttQbgad-nS;n6-yr{iMOp6_yGV2#o=F2soS4;T4juGt3K1RR(Z>%!}viM-Z`6XUi= zXln_zD5B=NW2QGPaXz;9i1Jtp?fjzytIxkGL=YZ162TvXn+X?LY&UGzzWWOYTabb-2PXGOXgHFEmp_ znMjW!@Z)!%UK%hEvwuj_x(6kDiT=6w5AzT0KqgutrgGtJ+FOlZ=2+@!{vPei{Gfu; zL=EHHsw0^s)}^UX<^v#uLPo+3^fMa~ntxK6=$WuA*ui}WYJ9PmyqA|qtHUOSY5HUM zK~spp62Rp^a5k+QD2#N#wNlG^8#|fd(`-MXa^c)SMb74{?yD3ZwDb07g&v=3!XdPVB zdOL;GtESXtm+}bFQps4lkW5z)rZ~+!v8zvdWkkyH!(WLp)0U1#sQs7x&F%Vib{&ZB z-Hb2apZ0;;CM9>CmD*wa@7fA&X<%6caOQ5f!@py2GD&3A%x<53n!(DqARAmvS0dD$-?~oDSr0H0m|1yX5aD`oyS&sFyySMSCO~b<&qv~;xm#9v|RHa{&0j( zgMPT*JqJ2P$J1S+@uFquMxw~Cmm1+7@x&OIB`&Z7^jjejc437Z97)S3mRXT>Tord; zEPb-IiC|66ZUgX+PPn}DuMMuqs=0F!HO+x=o}o)+fm~NQDLi9A$5AE`%ND=p4?oW3 znp*I#Th0!Tv5vOy@~!z{Z2IL%t%gtfY0pWSUAHVl&@?^nbdfx4sc^aSn(JY1I1T-^ zH6ngn=WM&N4&tM`65Z$sPKqiA(=eiv5=Rl*@X5K@Zg{ifJkGlwAJtt9mD}ixIs4 zqcY{RR~@?4w&WsUV>VjbdU{Y8v#~FUxx_`tin1Y5D8_KdllpO07S%v)8k>Ut~y;*?jbJ8#=AhBeL+>9u;Z&Xe0nT4Dtep12t~}*=x^V$r*6Om%BrtUL>?(H`?T9URy7Wh zJFgx5^GU?U>>C#p%WK^_?B7~Y^^oTeZ?&BkaBW+ZrBF_8dmD&1I9+!5&IV)rGxd$2 zwB;Z#P3GMlD&baUudqi_o)O$X4(YKEv37edKQt${NX3Tfb{`ab=9bnVH8;X-0`766 ztGw&eNH_$B?5Z|(`mU~lfIpwHK$da0=Rcsp=^u~JK1uBhX9pMAr6=~GO>M%L+lEc$ zN^@t~Fs(<=J$`GPj^x{iZv^7+^@f{gIky0hVgY;dP_b+C!&9wJh>sG^JJ%3SrimwR zZ@I2Jl!nKbVju6T3fjO$oV3S4ce{7qgiqqF%d~6HOe99e3K$>Ydkq!Q?G#j&Enz9o z5lrigX3iMFe&3v}Z-`Cc)e~T&Gqx=axn-l>%e42xu|>B%rQw#rZ5Zbs0)@9dc#Oc+ zy**D#Hl*(?4aRet6v=lGnYC94Ngt*N*7N6!1dO^2P|GP=WYupg=X2HpbC4_Wok5awDFbCrUB|EM zK^qXp0g&`0QybHK=g_}Gevm^W6Q+__kvsy52?Q0!2`U=eicLGB4H7kEcV}foQM8A; zMTVhWk}}RvWr&ctbuMwluTgsaeQ*m8(KUGe(zAU1uI`$JlRoMvS;R~5*N2}VK0dFV z0r)WNs;Vu(_eq6=NI)GbluA$x#JjSfxWTJLk}+6laI28~RmI$o^!vB&?P=P78s;1* z*Peb{$66F!Ljv%aJiSw3A-$oMB{(>}uvLg{6YXPYa3Fueguw-obzFq7INzQ9@HTpI zCKPYB-;@c%VEPXSldFn7p57>7dhw;AtlM*W{|@?fI92HUun>4{;A)j z456e$2RU~hA4ux?5BU8m-E@B7m%>3S_8_xc=xNp6!9--qXmbu@=P1?XfF0EogV-EG9xl^X9>^ zPC@{85YE(Y!~(x%1Duq!90Z|7VG=NZ0AwmKE~-auTM>KmjgolnB~}q1pd7*5=7TOl zB2KmM=7}Flgklz3=WPfrSwhIDQG_s&>X%G}sq`Ryph1Q;8QhP|c^+~{XBl0Pp_z*} ztJi-103tuKYF7)8B!SV!SD3eO611DSXVbGg}(h?2G6>RXPLil~_7m?31 zRf*uYTiFyrM0ifPV$$e#^Uu(Cm>&Q%Jv%?$m#*wnU}V2*vnK2KQ$WUhU!x5z_IUkg zqus>-xod-C+TyUy6?vsCIOIi;$8L>z-tH`bW9^stmwB;GZiroo1Nfhs+BITS!`V1N zLV7A0lQUeggp1=kw|k+5Rok_K{t zvGK8Mg%=R0F34u`+N}F@O*bupO^9TFoY$ZYJ%7t#Glx?8JAmu{#{6x`-$OPHxgZ|W zX4LBJs`+MEWyl8TbSnJmp-=3gzwNWk@%@Z()BhFrMR|qzZn23+yeYj&B=O&iM$SaS z6AbFrkI+TQO-hOXD$l@6e$f7((wi;735gB)jpH7et6^aAG0XD{{QKmmHXnIBOJjOb zP?vslK+RYbzL9q zt7+Q|T!SlUv?wxmNGcc6goE|DFnm?IQAqA0P-az>-cMal(M=)d*W)Mt{xstZf=k+> z@>X$uX%2AuW&Z3Q=Wbz%tv5N{ELf6D#x(ne$P7AH~EA7gZGWfC(I}F6Z4Jsb?Dsc9#~U7XIF2>gz6f^ zT+?NGyTX|U_sA&&{-#|N;S*6?Z^!CZS3b`6h6HrNdOp4s$sFgqLOTNYyG!u9WA66A zwC6_#=!0c`!P|e@b-!@%iLm-9Lj&$Wj>NiGKma@-M-P9|2+Q6V!k!-iAOH>MLI6yGPmcNB zd4&NE1fu%VAOT|_W_;V{P?-gOXjwSIeomnF{4jtoBzs>-dtdOXcM$tee0#luhXy!% zQ?N;;ZcTW=8fY2kozw?{OsX!9OiJ;02y?W%Fi0arL`t|vvQ5}Nyst+_I*zP&F_xnKMLE>rfb0U4}Q{wS(X<#&aP$vW5P6x1xO`E zcY=KhMq>ek|G4Kgo<}c6*AZp=b~qjLC`CaQ`@Q zfd7Z1hC1J$bi=yrTi%P28rvj73mT2qs;Z8Rn5DOH_RUK+6iADhJS^q>PqiY3qE(LA z6Ef+L>!U6(53pGGyH-LYJb?~)mu#X`b1mIuK*%H%O@$E#lla=lEHCI(b{BUSH|6Jz z|NF;2H`J(IjQ{DAL2e|I0!bl8_pi8szI6J)^hW}W>7v#i*_4_qa}z!Q9);g!Ely<;2w0#iOdW_ z6oaDPqWh$!Fgk%!kr4`E4nGvhM@5|}Mn&ts9T(ltPs~e9jliUqrlydd_R^&Cr$SE@ z>QS^H7A00Y)wA{qid3^VPg31pC=)_zOm?+Se8f~yt{~L@h1?u`9pD?M%8R5c%qP%V zLwql?u{f=qFE9g0)6o2eZng;Wge8p8B5d?V7?VVS6sJu*BJ#QKL8FW&`a)5IqeWN! zv4lw?gYGCrqCW?Z92Bl*jv`Bh5~_tJD3T;?!Buz5YY&^0S?)>o78vjX#=UvU4X#P%e4_@U@u`WK-vFHh7&hv)lXAgdJf ztswibC<4SEiu^c58l2bB3jppM>Pel55Uok^4vFG4T+*;{RHB+-wh(6T-@xf#*_b8m z6202nEL}7@eNNgyBX(X?E-70&+OrGY_Fe~4eZQ$pCU}%|Cfuj?0CotBf-}hcC75E^ zQdLwTOW4AAhQ2adAq$xofONEE$>{LRALdo@Gc+Q1c(*!8 z2VN+QWHpQg1G;3KWUb*vstyKQSaWgwcd5#>UnWK6=3*2qAFicUy(%d*=7pzNzg&b| zDOZO@D72*o8BE3cR4uyAcw#NUg{69H3v}3;8*9WmVsIF8s&9w z&Qr56un`P$!tw(nXWd3N7`Fz zF87vHDE(dw*fCHO6{f^RhURs?M>C=%JwMww_n%D|H`4re*rK~&xGa|fpM?v1Q&a6{ zCh_$R6sZIT5VEA82*<@4PcD?CGE->`M=TOZN)q}P5IXNj5~^~lfpW6nAl9?v6i_JG z%S07kqYQ(I0KHTmA_vc}+4t~bY+wKj_g8~}6n4T7woFc%_VS$96)_)LpE9q1`>aIH zh??UoJ-311_CxL#eXh6-7E?3GeYSr$Y;idMMr4#Kd!9zBa#l&py)owRPbD-Q{Z=es z9hcOxWwaV?wN9(^$!?M~7K2W+vuLGXFc=LAPI2XP=@}&}ZdsO{!ugn;o z&s#N=M(LakyBuOdw2&Dg9ct9Hy*NTyK zT4sB0rvoLK1bd+&_QaRPkyJM@@BH=i{E>o(c~EcBYfvv)M6;f81#vLl=XYr;j$+Ag zS_|Rf$G~uve{gvyCbFRlR1`EMTYO3$o}QaeuT@VzRYzW1XBjQ)PLp4mZ**T@U+}Y*grEWMm~4Anyu=58o{3VV)5pf@$VJA|(L{F&MHr ztY7#jtQf2qd?QluCPiky=k3d(z5I&&`!bNgy3>=E-r5!q*!`v4zY+SYs$h{g{GmW; z_d#`hVH7N>0F8<4Lk7XQBsvDr{Un638@bvQ@d_xXZuD;Gs>0AqPRfwxH3JV;lZ|?N zG#cEN9XwlW`?ZTT5BOF|ExjVBk6 zQ!m)mH-6yMLyh(DQVlzM1xBP>(x5ry+l#bu16jb6q_HzBXs6YZ!8NW7NZ-6{%`7$< z@5~o4#j6r=y3y4SDD?X}WzQ-hkQ5|bfht^%MUL^33@s5VS@Z|+7DTj)i3)uTj4AI} zXZJoA0ssajD&$9u-hmF+z$l6XCHlJs)cu#|pFja<+1-$mN*~!py>D_xoBZ6)-KAV# z&~tf6QYLPuDmcu6TjS@}={4@VdZ^-T;PgxiIpxMNo$S!sDWaJowl@fz5mIh9blo-+ z6~Z6`vOn>7&QUTjdY{A8tD<9>sTi|u6P!vS+nR)PfZwZo=I_2 zPl&t6n-?nhdu;^>tExnwoUxq!6a2N?P(G+{Szvb7UCNMRjW&pbC3mK-y>TlKsPWE4 z%fHeU6>a!G;5@LoEot@|zc9KDRqdrTOn8{W!3&ljsD8ZzrB|D>k-id~{lG9bOQ;mS z?BNn2!A4IE(W%gt;;wfP$Oq({uHR+*N)2SEe7ph*Gc!`lWu+%svVTD|c;9w%99pOG zUXm}z+FrQJaK!e=EOxUw?%)orwS8%7&KPf3MYcw&C9fV z&YNZ+`CwQB9p>+a={{vkp$cBYTC%4G7rRO~XNuol#WYD5g!v9R>(fVvQ4%Ikq*p+%WGoCuuehB_u?MzxJmH#MgUJmeYTR~d=Ve-#{zSk!U z4Vw{sB&_I;(dK$FA#vB0b7$$Z3e+(QeMz&Q7@kfFsI_4^JQqVUVRPp1Rvhlm@9yHG z(8*c8S^5(AeT=yJ4wVN12`Nq*-5jOlhkzf!e4!t9z>i$> z&kt)IW=0Y89@+6^ki~g~02nlo3V^hLG^g#-t3O!NwvRxT5bXU!k<`KBlvCiaKRvh) z_PxQhM^@=1EjZN}h*X`y&+bd@sXT>1W&;Rppmzj#4It`)rnk4(R~3@vnJ8Hn0=v}4 zG(<@K2NTHc+;fm}hr^fqUz}R1zc;gpLArx=u1gJ2Pvp;fO(PLVS%&MLZ!Mmy;`?(W zD%#O2fBQFrpd}udWqlZ8L{vp%Y`apzMFmRbT1uO#{-lR~?mMyV9xi`Y8qHn}F-mW0 z1Npcd@A}7S{2y8zH{IPVW9Yj}^i`cPC71fR{#5#&n|FBY-SqblKt8DPgxw!#C~9)x zfvuqhj+iOS$P3zO>{^lbHr$E`JH|6}ScH>FHF;SCo(bpH>xK}A@yzT_zkS56N)z=G z{QM@yo*uKIOJ``7{j=+(xyi+2|D6|C0F0}9MOdn-d{YhM3CpSw)F;2hw9uz3b3ZN= zz>$z(jkVi0@#V8&8MY#thgMMVC%(P-2=}~q7jr6L?+^Y$aNREe!lrM7Y}2M&f5h1m zqJWr!*_>VMTm)01GHK`H8q+$X6u?_h5LI>*Ilpq_KBc1{UI-jFoh=$KB%ZY#4~T*tK)R{VhEH;>rUcHr zT9o&>gqRQgE`)3N>u$S`{d$J?;y8=o;<^zf4qe!&DuSoSSa{*(-Y}HHW20@^)_geL zj}?TFVGAn$fviqp(q?S8X6eiA0o0`lUZ!!edAqD_YJ0e{_tN-xy#&Li(Xt8wyltlr zR>XE3pOSW?*%Y!o>-03l*V}n~rV+o(I)rfWTr*EkQ$1WF=MW|P_pasi7WD2PCj!+` zldZ8~yO<%%7_mwII!sKUlU^mgnuE6Ew#hy?G+kBMC4hCZE{OW@2z;s@8L05|J8To2Mxs)IM z3ATSXFJ_Fg@9dWe7MsvX3lzshiXHKLZ7MTRU^vfFGqEh`a!pl`Tbmb}%-Z%x-VjfG zAbkbp>D)pBN1p~ng7m`(eIT38cQ{O<%r%x`xio1*VApGG9Y{WRE~#RYT|d(8(7jncB3>{)8{&A22m(05A2x^wO%6#JqdxTz8qrow7_>pr+)uX|1Q#c0K3L%kg&4 z6i&lsvQ<^@AzFN6ivVk+;H6ISSIJv9sfc3B8jr+S-_$AZpMac3xB$60@oKWnyJv$6DOEimg(UhPpd+saD+W~J9) zqVsvh`27PX0cs3wX1le~F!^=1EAmUEWO z+mJ{_U(i63rS@3N3l?m9D8Y1+o9w8KWU?EQ&=u7LaRW@(IyHH6H{;%HHGVY)wm%?bO63>D2i>Z0wysbT&wHp++YR4QIs9oWW)j(+ zJkIrLdOpYCs*0VwbN(ni9Bo3ap5TPIX`zRd`mj@=&K&ELp{ii$bpQ!# zAa8uag#7NCD=Od|+4!}X5+wjEDAB$XDQDlGk=>#x!HHv`N`-O%Z=sg%?3i=w9k;g| zTjgL-3eeiH&*nhwQY(&C5ZQi}|0DgbhEFD+wuCPuZZ->yLGZ3g_-WP1MLWvjC-Z3P ztq}((I)mG}yIz)95}9_fTeZwVPn9fb7UXRCOh}08(7AJz3BH}=V7t~Ll^s85L5o7_ zrx?%j5GY@@CUvWc1yCbLk1J}jNOCW*|JP%49ZN0k=|PY$7#2*Cc75W(-80E6I=Ffy0J1I!&T~Bx0{*Gc zACHWDP6eKhf89H`cYp7<_45NCvU^q_eOGpKo;fkg{ZtFJ&S<~Xx{a0!zFat7i-Bls zXBwzq1uaD(SK|ll$I*;bhpUYToe3+FOx*k{vClQTVppQny#tSm#g>~|{}O%{NMx-u zGZ(Y`LtDI&FSPo(!*n_RGcSU2f}4v+q?L%g%mu4aBVK3|Z{lso^IgCW`KRQr{(4T? z?ubqtdA@4$tg8$yN&${qK3@M+!!)oBCT}Ru+H*7WS+YersOk0tA+C2HX6{dS-|vt0 z8@H+$blX=F}cIGZD(%+HC(kcf~o;qWbSkaUYjTCuA_}0yKL$6||@5 zR^%*vrzk;QKxwa>a3O}@& z`2=vqvQN$qxEPwN^!>2KNCMs(7+J-r#e{3Gm{J&W8t?S%*>)z4Hh`0r3TBokAs;G% zt1F%hKOIF&(L4WzUMAY9is$-jb+C1KjC-Da$8Z2id zZ!MsrJwAVz6BS7eqXan9g_Hh;ql5v+q#9x)wH%zLX)4fh&9%xikzziwnk9~$*U!+w ze{4c^vF>h%4|MxNL`0^3Ys$n#g{jD0H0^ZucjbOWgTC>z;mYm1ojf=;l(re%5i2!@VWo zxjFyTPJhPSlV}RL8Ed;KNW^U&%4kjIsS1QG>uQ9hFO^(wF$>Y89OFt1 zHoFfH=X+0-n?EP(!}z)xU%U-yk&yH}UjsE(aXe7e8c5lBH>sX%D3u*OS#RYPIMz#m z@A#7^jcBJ`RRy%0ad6({h89hSxq1m!LCX~r`yzq-qG0aw zu6=E+E<~(M#1Rd#IVl>b4Y^~mhJ7QtT+VZGo%Z)yFa*woX)_DYuf_t}5*k|?7+R@4 zl`oOGZ<%c!QhmMl<4S<#GVG z>#;4G>E9o&TDqPxdV_Y`q?^T>vwH6SQ1uZxG-6!NUo|s@i-;XRAg*fWju@~$NrZCS z=60zRy$U=!7B2Rd3qE>t;S*@0s;QVw;n+$P2huP!9fECP^iQUi!OdzKV#ow7H@}S! zy|hwsYBMKGXsTv>a0!Z*KqR|YX+Mu;5dMB-J~NHSC2G|5W%Hssytum%bnc7pM=;cS z|CL6~oqM?9G<;2-{FBpVO+3K%c}yPCbATu$pa6K?32GR$51n!%rD8Y5#&=wxgKUeI zS{)vgFDad|M?tSzy^74ns+kqWi&~fEN-Au6`W)I$x3(DM$t4Y&7c)UI2(Dc=3ZIkR z>gi!7Ckt(FIx;Su!Rt5F6b9%sYyyWjL*3`t!lG%cY-TAba9w39;cP3Pu^jgr zJ_O1gp(u3S+8a%wDrXgpl;4@_VYHk^cSx*g7SSmW_-k^*eqLHBhMTWygGC!GV$bT$Vs+n7bP9qX2-1Nkza0Cn%-{7KkBnw>M=r z43!#^2<7VoSc$nDvYr7GgDG}sH=EUttdl-#;v=#GE0s+%ftW7$OJ|^kpaykRQOn z^<2!iI;$wNEiQA2((vmb2g2jT>`YcbpRn|u(eUT5d9}Wh+NpYON|15-jwwj%ebvX& z3t<_o)XQKp>DAj_ylW{a?Wm&02pOrLZ3{BCz}+mL#9Pv69(#kbR7PERX@6FK{CL}m za^)|*h)?REjpjNLe_Rl6yp-fr-zH$^b;tr)eDMb!|GW7OfAO=t?*_d*Ma{ZPy;G;U z8syu1p-^iF=kkd-}}0aq(iiZCHU4ybRumuc}_*a+mgl)hrQ zo^y(BJszl_P&%ZMLeQwoz&_q>VQy4i|9~~7 zn&N+Guj+ zjy%$*=$Sqb$H>ExmFMo|kj?SHGj-@9p+opJ&^)0pSqihj)jgA~9ErJ<-otFd^YGqI zeaOcvDkYaH1l9NJN2uq44vDJ-e+4Fg?vT#Cp!rfGV?z$D!qUWU$OblKT?Tb%_jbKq zo3e8T#vsnGjSpmz8F`2l2M$oIruHQ;d?+L)E_!B86=p7Ob`EYfrj)}CFuMP6lOS3yp&(Z-ArMI5MEQ^aQuGx0NCNp-B!p>uz+5d*^$!?s6eI#dfIyh5&ZSB!opD+``C88kQ9j%xK*sj`I;!SMAe(GGyc5Dk6c&llE0)>^r}A_j!K5XMXd}&c5+XwA>ib>;2_RwEBV}^!J4nzJU4s4RC!@z+9dL z%J?92`ZQ2B8OvM@2Ifs5b57O`%pXi*u4V$Bx$zKk4DdWCfUsmhk;{diRREnsEa-I` zP&}9jeZ$fe9O_P%IN*QCXo}h`KCAdC^D*B5B9_#cHy?63kApnHXj*p=|Pe=ec>K#iAmB;@IO&h z@9PTzA3gjyfvUD8swFUd!9Y_V`z!o)5lvk-1)ub#n($|ES}WBwQ+E65=hZo9jl=7c4o%JD7n0O=SMyI-gJ+br<|fA!hR z;;l;ULy%^$uBW%QTHaIFp0i6-TpMwxOq|%Gr{e2v+PfN$eyg8J`EWz|b{jmY@yyo^ z;afxCH#pm-@4y22&2nsuK%Qzpz^^gb1BM&!qn`LXmtd8hQ@JU_8Y!iiM|jw=N`JGq z-yZGoNrD&cL)9tm#xgvjs-_(+lDcJ}Dx6eRS7eXPgXeK-Rg2jgpX{J9oLfvMuGD8? zsSb}`E5{Moi!$)YDR=-+m>QXodajNnVYHRHRm5B}c_njIFS84#=A-*No)ZHJ&fmCc zXNbQGtY|p9gO#??cla!0&&=Ix1Y4cI+;gmF`5l#LmiVFURyHY8$mwe0K)W96E@qR9 zg`Qolub?n4JZ5xnEASO5??Q2Xy3dSmIl=^yA{a_Mu2UMl2MYXWl#|f*1$fHnxm*xR zZv>zunywmLPez&arf@MRP+pd7RF1O15QCG}Gv_dtTCq7d)p+qNJ98V0#hf1f4Sao%p+!Tud}gwyO%hZx8mtLEVM#aD4%k7P zen`4*5go@j3TowcpOBGhY==ORiaE(=AoF91t|ga$hf{h&e695H}Ekf_)g@z115jihu)WyXm4vVu)r&5xj+kKymK zJ&Oy1yIP_l!DD^Bab1uwT-1~X#dGxnBPFMxLL@(Ojnk*SXY@!YAiKDZJg#+eT~Xkz zKJx}*(8hia`YfaC=t{kK2hEwnlvJxby%iNlDYf$M%@g@%9-VXRNfC;I9v#SU)%E)% zZs~>tGSF9L$yL|ROG08sNSXkD%ii3YATcS%kg3`51oJe zEoyoN#c<)*M)x3R(l^h#g!s%z%n?S46i1+h+R8rw}ywk`bs7GG-K~p`-=< zr=UC8>O*gK3ZaB8cz|+j59nt=q;$Ejgm`bnAlZ%QPN<88QJFW>;2w)8dUb3dD<(P zGTnqrge&Q#NScFMYMH}BR~ltVu)}jzd)uBrnDAgn$sC%vJ=L-dfBN@7oxgJ^7W`5s z>^Ft0+p!O#_x$>2z4#;&?Y z*WP^$zjirMVwWNf3QJjyy2;`?9muP#z*D50g*bh>KBHTLkSuEb0~Edh_ZUe7Lf(vG z$Z1E~DbRgcm_%;?#2`(p1-<0f0D5D%n61<`f^S{^9P4%I%~o`)scBrYIeqj1=v##3 zIZNWYgmBMr(p3__$gSTk@#q=Zq@lY?qVBoGxgOziqN6S+8aM9!!qlm6`a%~D%(cG? zLr-fo%?~rkWBG@XuPuvzG*sC7s;6%!V&6SN$KHfGcSyOavU?CV^+zueYsh|F_7SOMJ#}jusx@@-{3ZdvD=K`^t|vFPW~lAan!19w3`a K*|se?BjsNc*(_=R diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-2024.4-system-info-detailed.xlsx b/docs/sphinx_setup/_static/benchmarks_files/OV-2024.4-system-info-detailed.xlsx index 7c4d9cd0e40919b9c3370f81334d46bd21539d0a..4e243b8190c8765629ce6174e2b1ddeca6178605 100644 GIT binary patch delta 53476 zcmZsCWmH?w7cCCOi$kGMD5ZqrP^@@?;*tap?i6Wp4N#;w1%i7C65JtJik@wn9+c(^t7l-;dKyLe1cmoicE08{%1 z?A2_G6Y7u*V_B9~*qk$Jqp?-<3wDt{?$De@QVH@QpznF%{yqvY-wJlH^b?g5bh8nC zsC9B!ODF92mf%087a*D2XfxYi>$ydy8Rgr*QkXkRXw&F)X>uT8Kr}oZ2Y8UCKB4P@$?x*vDs0r zm6*CuhqGPTIZMdYTg(818VyoA^*};n`JM@6-Nz+&j2a7JVC&OKp^P_3@AoY%Wef{T z7Rxh)m$*mqkBNs}DH#Kr5#}ww6H|AH^Y$gT$?h$!EN^*m=HvOHq9pxwxB)ie5lXtO zm(R<7f2M8=F+ZKKqbo!?vWNT*p!g;I(QRnkr{BT1Gqq9{d5|F_4s5WWe2XYIi=v@) z!%^+XKYZPTRGlgh2ij60f7TZ(i7gX+&0{y1cEFT-`ujV%7|#y3EZv8S61&I(!6m@x z%J-^V)WRuawJkM1H0(*!>7g(s6(P|nZb#R=AWb{!+=K1R>w%(a%5^ zc=JbJesOP^BZ;qel(lLT90>%H{;}M@iwI$98IA}b?!CXdfhNemeJk|&Uhxrj4)3_? zyIUq7Jo=!SDEZXppf?eNsWa);CPDs#${RC(Yl)5WQ-{h$Nq{awaeY7IJA-LiKY!Gi zLWsXpwbW1}b3a|#ih3VG8B-w4WpW6WjgVFyStjY871=L!F1Uzs{!>P9Ha;U&-S|5* z(!M2bEl9f692+Wu`AX#O7@G@(`nbWU4NRU+0ls4N{s44PF;Auo;<_gu; z5M)qZ_I+60OvCNtkJuuzUs=M)x3)T;p+w%ZOphqe)21&dPo*$Qksvd7F)Sy@zjVAc zrfQC$TN+f(mIrT_iZELKiD|%HM5vZt)8qUKVm{Y;BWDR6kxSn-G71)i&9G ztDRyKSp_WY6dijQ^SVJsg=?nUJYp}T3Hf7q(p0Y|b2ul)MEUzB<^5{JqmRJ_B7JUy z-IiAD%D)W)#l7wxYuKhsL)ps~d8KF1dtR;RvMWrzH zw-n&SxP7X-i-Ux$MgF~7`2iV5b*p{X&8)h*RwXgNhvxa=jvWNgz}>*7mcw7CQoPf> z_nzx__wy*_onJdI=Bw(QTBf-Um8X|(yq2L}=UTd^I}#O{>HXDJdP|TAMh)6%@y?0X z@|;xsc9G86WE7OgaY(vsULl~wz0nzezcKpT#ab^9*SSgRN`2y=mm-<0)69dnfyEc< zsfFY7<@_gAhf(tCvz{%_7kSD$7ML~ue)QtwbdGWwd()uYKFcG0=ibU&wrAtG6Nm#_ zuj?DQeKe%`F5Z3_dRz$kqGt_p&S>dx+uFFTycA}Ws7bN01&^_Wi zVZ62PxekHeTPj1vcR!7F4;pXc zWxjHWs=BVbni**2%q&`1shevUPiuM&zEVG&Z)pT}3Gi*6ka;Q3;OutJ8GfiO8VajO z>I0pCTWh8C>Kc};F6K817VZtoGh1P)!)y9iAIPCTr>&-g5zHPR{33cwTDt!Dv(!3 ziF}^!^9<={C*mbLNy_=}bdA^+Pd?RHQ_j!)v_iMvci*cT(SD#7t(2Q;54D#oAi-w+ zel{6=Kt&Ni*CvP!d zxzc0YmljyqUlOVKgT8a&E>Q0_95nvId`ewGPy90aBL%q)KT)OA;y}flBu&wogHjFb zl|YVs6X8d!Wta-A@--gdkl^2AAZRGGS$-E;eQHN8WabmHIyXVy|@)^QTNBoE3j zP-nih^32cfpMrtyF-PcES!wSOXI4Kx2|1C++j!U^Y!n{jpJ)2=m=P8)9`XfQYnTPk z!{n9m_>+R)|KP`r|2{)cImhof*+oB3$=@w%6XBBn-omiB#;6!ia>b05%f>3~s~=dd z&0Ge_oE!F1jV_Fld}Sk@JeQ6J!4Rnhu^j1$p;K*h$ca! zc6>LV&+qvg0mV=zH-pe@41c-W*mw9f^W?pe1S&@cw&WJ}Ro|tOjxPnJM0v5<48PxB zI;8b}M@DR=VGIv`+ahotP2UQj;cyea|4T63Vm76~GCzd*N{o(4JFekrkPjJfpe7E7 zPgTxP!%UA<-#gM1slJo(_$~gq*uEbNbxq#<>nulD{3Xf+qGo?w5K^HE61J>`xPg@gIpq@Ixw7LEXGLL;~rZMvTWw}85E?Y!b z6eT2=4P<`z)GV9~IGrHG#syW6DOrZs3OZS>f7X_!>ujO(FQ8^UNn=fre0-O$;lIGS zI<2=I6}NvL`PgZ5$c=)dHh{;oBma14Tu8$##*b`E?yR@QS-4bmzS6>TJ5whMhA$2- z98>N!m`gVbGp#gQlO=3ED+{96T?t(sE!{b?$w58eD7VBJ63F;yfKRr&RsR_C>D;0Q zA;sJ)h*Q!qB`vMCspj*x6oX~lwBwNC$<#aRvHOP@C}!O)<+C?XN9X4fiRDcKCAdTH z(h-R`r}i@G1EBWh!%mVREQU|5_m1L){0j?^LcIs^MeQLzj_xkyKvw$uEfj>;`WV=1 zW%jo;m12sDirJgjT3zGV8sy(QU$(M1y5>Q=s^0m$yi{l3i~sY54|%Zg*^k+eYkhP* z#X73~akBoLxW~fK#-64?Wqlx=J+Ox*ut@e&ll?AA_BKv7NElr3&7}Ree07oe+g4)9 z1C(QLvKR~$VQIYrRE9Z&&Otf>nUk>aNlaP&KWESE&97bV3zkVV2|qbtF28sF0Vtly za-Wu4_0!?M9#k0>S4jmp;UlU1v5-^_n8{QQ!Dp)b8JW=V>rchQuU_n?hI2mUfHsLt z^X8cDdj+~l=<6iIfi$&4)&q$Gyiym@zG=v0cG#R-TAvn^xI#M}D7_5b2SwzDvco;j z>f`#A4Rp4@3rKRRs(|lm6x0+35&;M^<@U6_mfhDiU$+SSz!mdeqI7?IDui z3o?`0lhU=HVQwe2Y6GDcUp4FT<@wbKObN>qAjN;-_bR`u=I61B%9?{pJJsh`<0+Yn zxNo~$rk@m1{z+~)BI<<1%fC|4*e^z2vcJk4D8}XO!qgtC(*Fk1`Kbhv4VD7^p}Hmf z1Je0*GfjK%7Ne|)^Dzn`v|1qS=F;IBFdS(qs-U3_qyLaEZu1B#&_LwD1gT>wE^@dw zK7yBr{lZ7xpP4?~@6KLDrhjr9PFEiHrpzk+OZ+`u=im8;jFj_BF>@4|fPanQgE5KI8XgD`PlNn$OC>_rjNY?>({{GBV z$K!@52@yQAm>19zHDuHHw{9~A*x&j zq2?G>K28%im@5Z~cb~a!!7{)lv%LC;B^@hp=`Q~V$^>w)y8iuS6lX1<^o6td)IzAF z7eqELGU^Lo1h=8&hnboUzE{wlU(W|~=Y$n>_vb8V%N3`${}%5lKleKb0i?)+b8r&C zu8RS61t2X<@%x1b`!x8_^42x`DQCaRG35j5DJwW~GoNZ?kZz>R{!NwT8<%VNmF?(q z(eb5bdRz2y+qCzBX^d3TGY4uYdDYOX4*tNm$GN~`Q|((XD*SCk$tU+Si%h+mp9sjV z&Ds3OmaeMYuyDhQ-d9C0seDCVz$^ge)I3%)=BHUQX0KAO;?V7=7(47M7b{ikAe2}{ zcgXPmPxMHvE_&pLb_lri34yK__V$ zeQJkTkP&@}=q(*ZNS9r)S%(Y6`WKdd6lNr+p2R2vI0S*e%LIW1TUX6W?vtE`q2=@h z2>=8=wt7bk@jGg;eIc~PDPAbiNxO(8y~SkdhX+_4JPq!B7oJo!LgRO8aTl9pbH+@s z$^!OosaC2|-D(=UYj$iNB5vE@>GnnB;faE^v9or1W9AmDgLJf=L|qI;WJ=LXUw0uV z4T^H;{@~VNE(a75KX@d3?m?KZ!YvkKGr5J}nA~Cv(_Ea!g67^XklY*_@`q{O6${*U z9&@G6Y#Nxz;QykOeg_2dMKTwbLV)A$lp`<6MjqQHH#uG!)dhUN4S6Oqd~uc^oXiJ1 zc*Fc|C8&5Q@8dUMIlg*BNksqq_!ACOGGEOR)_4b@IXR~T$?}LOkFU(I=P<*M#9Q+V zqtmgq+Zcm2nO!7RtaKVp=n3qm=en^Bj&Ufyl8c(m1Z)KXOydJcs z{(F+qfG!nqn7(~+TZlg)f^jZ(MBeL{hvP!b{p|}$h$A2O) zYGG)|Gp{=fp~N_3`1<>}IT1Kx81e7LhNldw5@Ami;c`qe(Qnc+q<-(>j&H*Y@dou% zn2D_X!faKR+)I_<|HO&TPTt=e+lxfl8|*Ak>K^`h}3&x8NO^? zHQJ{z5T%IuiZ(p#yL$m(QRsp%O{1La1a?xb#Ju+uw%zkCt()e{sQJ0UG-G&}Qady~ zbi3Yhg9Lk`)SJm~J!JaOZTgBgDtI3UT%8>Sn9%^KLuk)$LXO{p#Xsa!efKT^WQ&eW zn@MWJG>l8)K(=_r?yGXJO5g_2!JI*R#9U>$+Iz-}t7xy-FJL|_2pcAsY35}_lg(W9 zX)2Th(=bWFi!y#Q4sjY=rS4E5Kp)PXXNFj1#v(GXr;?sQRdh$~IwDuCy#@-sI1 zPQLu@Y_iKvWDNI^)rL(Oln6wWMtlNHWE?n0Blg5-(E*nTHI_br%qv2GiXm<{jki-J z{&B8r(PLd30l0JhR9*Ki9Ez@i55@u^-3N_{ayi?#K7{(TR5>JCn##e@^kK;-opKSL zC=Xe6^CcIot5b^*|Jo+;$!PS04X6VbR(B!!le9r`(H7)y$Pb|fY2*HV@v0^9DsYZM>hlQy zXI?l<#}Af{GgY}{0QWH)-fI3#AhHhgeJjxJN9$fC6ek{T}_zD34!@?%Uwm$W_wktxDo*WaznaNr68>h=+HM zSm|9)@P2}h(ZA{lee~`IsmW3&lok;Fqync;Zv;@X072F!nVXI>MI|a6JkS31j7r9{ zkgeQ~E7j11O7uJq=A=vi&b$rf3sc)K7V(5>>32rqDz?vfBF682I;KKhFg_DS%MD_I ztv%X~Z$`ss8}DgL0A*_x%b$Cnr9vd$Kui*qB?yF7EA!CfV}W5*fBosH?98L*x2CV^ zzRRk?#YZcn5Z5P((wkuwq4i~9ik7uyaYweIV-ZmZ6$B##06hbB34$630&C+Uc6$G? zpur(oY&4^SfU3cZD#?77god?2VW@0qwfHs~zu^5u&*4DY(+|_C1|SrVy&RHsIdd1j z*U>=UgYCjw^jEemMz(Dybwjqzrj+}$$X#L{qMZE}vnw-;qDXuO>N}6`2~PG^!DXYfnX?MS+F)N+^8f_ zb4O*6TN@05cW)qOe$W`w&Cj5bj37kFfcNLHmPg&A5CW(mmo!XjFbrfbt^j5SLyO)( z$Y7=6E_Gx7y<6r@4&*8-K}hp$i(|1oDH0?|Bz1F%ljg`)R@VmqgR%k`M)PB-QA=;n zsjLJGd}a}Xpg<9W8!Z0qb!27`q852@>nw;=CbRfQ@VaEx?1@_6#C%FahU6jiq37u* zUUYs77G=^ae9ZmtppGGkxqWv@Q17k`xIK-&oWJUq-=jQ49^rr2Dhe^=sDWyqmO=kQ zpI_^a7ay^}3g}22iAGg+vzE0jq6FXu2|@SzoQc2;+S*`c7%32hBH{8|xkZrXA#~L) zy3TS3Oqzo{XFeDn@!3!$R^8#blvfgtQ2zv!?LsT0qch|M6?EGvkSG0ujsD_aSjvuw zKX`g8uSAYz^f{mAvzC<%oEDduZSH~xKg;Oe)nqXFoEH6~!s(CCBu{6V{npVJ1Mw44 zC~iY*QQdCZ`5I|s06|ev8CWA$>T@}481LUaGn`p{`lA0z5T|xvq4R<(De;&k0SsS< zRtHxW$`EnY0ytyDs->P>;91H;S~FS84k%=G%@0}k5$IJ;5C2lB$>%|gN1Sv#3_UAg zEvpy4<6PQHqc#BV5L%(4AJw7(UREXtsmZS_92hqIf-0ztDj!|B#jN#xwVwcPEnI{j z%Eh&s46v4YDpTrg$y}3Jdw|jL7}}J3Zpmv&Ughm~m+yyf(#NuNsv1&Wn{ouK{FhPM zlLz|sM9}s8ODOBv{mFAJ$xT&c{bObQcT;f;z=o?z#H+Rpp%3+U|BJ^7V0J>5GBb6S zGW7lH_DUZ@okd^;)?tdHMz`=M>YtX!;;w0~`$(;*cU9zf{GfC@DmMn;YnOcSH=I8m zCWXb!NNmIVQs84W4$Dd6;|?X3MT+vX$T4jUg#RO%j-yFZtj`rmyBDH?x$j4xOB zNeCeWqz31enhcc)RQUOcY#LbSnTq* zbAxh!dYcIMd(X_!g6*5b0D?Jwt?huMyvmd)#GWlJ!ZL+L`Dmalu2OFl2n3%_E>o#h z_8C2%qOfR47X1+$QLO@)#4jrZnBaV5@M_Hce46X4`ol*?gB>@C%$88uq$j*1u`8`x zBll4P(8W=#yA+anQ8_a@4UO&(@2Fp2*uwhBi4c(H0of}nXi$U86A2Nb-K9EuEX7)S z@3**0$K8-Bh{6F4{V%{G2ZC@Gj#D~lP}V(9#m`dH@zQitJ3lKFk*|0FmT$3p2@0=ePscok7 zu zf~K+6ZK00|(41LvL5Ylcs~Xoke;Y@@$=Nqcabd~0(>t(%ETWV#?J!gT1sHt(GMQ8O z!*9tUpAnZP%5ppYit-2GO0a?EOW^%ZGnKDdBIN+c*PP5fTN0c_Gl74z;P%EwIA5)S z#>RwlAi1epy7IG_+W8I)6yNQUA~2RM`o!#s8nh2P-6K0U9#+Y7iWfCks{V{Vzv{0kNl(7SwApY1)i{)1yjJ>ox4$KRMFF`F zZyDassr^1V$q4q?5 zxLU_8OM?Ff-sYWKD08`NC^VTP{52=?g(8kF*s8cofO>rGJ@#r8?%1)Wy**1H zQtXi(Umxo8ywW$jIrgoVi-a z*{pYgV(S1_i4L!O5yTo3l)+|t`L%OPezr~z2p0}hTt_&o-F%~ucohT%luC`dr4aGE zAXF>wYPozh>_F^*q6=Vfuafnxsp!WvQ2C$wl@#wD6j|S6cB{ zll^!^`*sj?>qzx5#S2Ye`t7HDt3>sXF8nYIWxUcOUiBizib2q;WN*zg#~^40>$lQ+ zYcB2x>A*Lo0tinhsj-f)^4zld%n*)MFTC-9ff5x&ec^u$uRdD(@JcgKdzzSJ7O9Z{;v5+5w8c@4+bG6?a8I+0e>%^%W&jp{ zpFfaQ{2Ac5VpKIibs?v#b~9qeY?st)`wA*k#|=tfN!DeLZSQn_wD$A84JiJ?J?0HY zE~9K&NOrtX=AnyFSk$T1rrgrv)hHsWCQc{?43FO6CN`wrbuhNt$XXg!I{#Yx z0^ z{yaY9Wo;&mzkFM_*oaZ-nKWS$2@Y;Qnp(?;)vqgh0SBKAWu}Fdv+f*AB=7bJ>8H(-b!m zLAv>sWQ?3huVM4=_`bu4IOT)-Fy=HBYD4(a0*Ko1*`o){df!)f({+7FXqbyjr&yF-z95`|}pK5xLZn`Y6zjxinrpnUT<(l2TDm%BV zeII_>x)_K!kApXAu6Gn$Tw@NB@#n%yHe48ODEi*x98Ta@L9 zZ@Na<7=FtZ#O7-bv+>0`;6rh5QQS2U1YM1cvtRE%s`QkUX{@b zqTrqNMCZ9?m=k~oiHfF@bJwqCJj!I|gQnaV$PbS(fSL9eY>R1467TfjxEP(t;`NiZ zjw-C%onEWbsdC&n_a&4&(zozrlE~aucGF9T!tiwE|zK2_M_G^ni+j>(yoIpkO>k?{3+49$Uk7ssTXP}z@|X00dm zAN>AYxT7c?+~ahrbC9r*t6v2|NjlZ{Knae>w9iacPz4UQO>WW(3x@?8fQ;!j*IqoE zg^MG2yYR~EFu2oZn_`CR^0)`5srkfTC_>nk-GCRs@S32T8|$XTjXqA`46Soxfr z+*pJ(*!XRC8u3anXxhhLl>0o}~lIYPV_dR00eJJ6`& z!9*jB5Grg7uBoig6{t)^OB9j~h^emA%vyAZFJBwJZVb88)nHId*HfS#;wlv7s~T{_ zy@|4iks2j1XAJWc7jzrjoJGj&r!86$V81nGa1KQ`nJOGs!{ny9_h zL+sqUVhup4jnYVsCSeA{Q|KA0gHUkn^+jAwXu2!?Bgi%kiDhEVz32DxTiIxr zehaMh;I>7_ed6rAPlvorO*n>xLd1Ms7r_vT4nW}as`{``4y!MImx6>D>zASzP10Sk z&K+MTE|@<{>@n|KFmH7o700Hbq0W_tG+lN>pF@hdni5|>YRA!a#zpxdohu1npaLDh zz-a|kDq3-D)`E+9&Xgod^5 zT|14uJ*LC+JjIVF}H&cJ*+{DMs$t&+$gZxmu+WR|6cP?;D-=wdu z?j)7Yw-LKFkZRi5@sTfe&p)_eLN_Jk7n9$ZQ(fZ!*{%_yk&2YSO6h4!LCbbV>8+G6!$v*C|>H6eQ__@}2&Vw?OG$`8~a-Z2xA~ zo@Q3vQ#reX&>bqscPl;kW~L3hH@2uQQIXjBJl7-|_aPo3^oQb;!vD=Z>B=Y3H(O<; zN^aScd~*)-axU&-kF$F$yl4-!oZ^9tM}+uh218OhQ znb(-8`9=G}(_Ne5C2(%mgl--usd*QmHNzGT>O_<=(IHG z2E2Z1I#xO3I|3Aw?Ldlf&&*u41%uJTQ#oQFCpLm6&vemE5q9-a>KbDw3Q+{e3-4a^JUCAh;JMe8gYE-qB5?wqg@uD$?X z>ut2^Qmndvr}QeRz(WVC(m3hd<2RIcUhGug3-&YIgx0oBX?9Y>PnsA1lZsV({^_B= zdC!;2M_d(&a|ajq*x_vUkTl9{I{`tcRMCrQ?u0aADg+gw;9SLudE~K7{@lJ59gVm| zmIzw@31j&3Jez3>jN2V7WgSjb?8On@UAhMpJ`YpL26$k5ple?jzP1s);?R2uZGBbh zU8Ut;MkhHMpw;JRgH-msMoLJ>{CC~k;s`h^q_D75T7I^V){|^p9~ocu06IUuf_&Hi zf{NujPtNBWdVRE=ZGMkLtJvVlsK9LT3qi-vr}V;Tuyp{OX;z}g&ozpo2v4_f z(|^A|(_ylk*bGUY)7)$qPtf6g@s3k_(Jv9Ba55Lk0#5{R+o!t1Rqy8y@1(>?chw-l z$pD0mN`9q`ialiQpiOH!g8yt1_0HLcDM1IMezLxFdJ&ae`9Q$=uO2tM9=2%NezDkN zz8szY*r3gQM3puv(_0Y=MM@W}j;N14FWYq9x!}S(mcYZLN^Yo+$9h>pn*>%}16Ds zKblDSn~$#3r}JnJ?Cf0dGo`g8lUXAIU2rb7-kZ-U6F-sah+06$dlm$Fh?Oyvw8X+ zovGXmE)qS6c+CN?v_NoLS_B)egX48~VBbsiOD&LfCsK@{16v@wJ-_ zajeX*r-J?dK;Pz@{C-Ql8ANH#dqZ>~LhM>l^6ib7i`ZVbD)p*VTZv3350xTW#mJA` zs(O#n32odIwzHt7K-#>QCQt6_wvU3te82l_20p#3drY4Ma&x*I{%mHht&C&!@+}E5 zRh4xf_U>l~)<@GAXOXON?_aL9T(SK)(PtP8KgxCbT6hS>BE8Ywq1e<&x45sm=+l00 zL<*Izbb`K+IL=s?f{8wL_|T@d>J95T7XNXKX0lKf|9ReDUWc8Qr4^Ze%e>F@Tw-a8 zWd1*Nw!%~(;PgU>p!3HGTIS_&;f1;&KbBtn?@`-RVxW*45s%daEup2fm<(uRRjf!` zr?+kVTAi7v5&_^>0bO*Qd%1t5_<9b^Cd3CDP9nw=R4u4>RUWuO+sf>UXz6u1nGStD zDFOU|20juS@OHHi1oMlpE8b>Ixu^#vi_T4{J5o9mH+51 zH7g91{@2|I+VIdNB9)uk-WAw`S4m~Eg00TcxOBs2`5hKgK_;|VaINM)02BBGK|bCM z_8ERHu8uC8NKsXS?A=}AW_+&jJ?p=C*m{JGczWn;-c0;s^G9{8^2x*Sd~DiM%f-jM z83Ar;7OAv0vLqx8TJ+gl7C~}K5EHr;b55?I%G1Bl@mFO9Wi(?kOYVj&PK1_5RsQ0S**-^_FIk&Q<5QAw9;q0S|%cAW%AI)sWHxAw07?pZvlyT zuzIs9okEI5y(=z|H5|{_jUOC^==OI5i)%gtl~xMnP?Qvgb=%I%<{9#{Yu&XF2LEl{ zlg)$v*YiaG<)PEvB|d^={*|0k^!>X6C!1%@rPJqfu0ZcN4sn&@?xi~YIFP;M@1l}# z{*7Ai`s`P(e$N!&zakFF?|-HEwVwvzBjUR*gEdSRk5HbD%64AF!a9_5j0-K+oGOcZ zmh-!4rfbic)`RIJs%2*@7*|9XZRIh5Km-1LHqx!j=Y!}u=Oj@x+IsX&JzeBZfa~9> z3PoMSeitRmC34hERDdGJKJ3;-HkU3sEggrvKJ$k3?UFeahdK56E%~B-;FvgIil=uc za^7m8zb@C_+}i(O{-#51u2rfND~r%~esXEnOTp_bjE4X`k`A*l27WSO?}C#4c-bLm zdxGD(^iqzuYLk9xDt2h*iJn^ClmLRuJWB% z<)lBJKzOy?tvAZw-8F}wEdL6&KhTtAwyTY4cnudS*-cL?y(b!E^-3 zTB6I_=#iCY*c}uvv1QyB%jCYF%k7vyxZ$9$NsWpb6c9cyQ>j7ImEI@^HGiK?EMMx~ zAE8YpPqJ;rhi_JD>(|aX>B+k2N}q43 zjQmU|a_@IMKEo&BHqQ9hGmt&cd+aIE=17-Nv6tC7wR^OtokSy#PXRu*-3FjMz16@} zQJIR>Y%#e^mqzhl_)dA?cG)mp1>t{MhxpW=b}V?5!=4rQldtHal-ptUOv{d#T8;CN z6d6x|hWN1fdl@=9&5D>RwsjCL1K-R|{xVS9T2rL$G&6A+y8SbYoREI-tjWhEi znjSN>w1;X$JO^*YXX~#<+?V`iFphef3}}8{Rx+sO>O1@GV&-^je*=#H@spv%oY(sS}G{Fp!dxRmP0=JOcd2EL9Td>y@f z9mVF*%h?OEl)0Frq^4A#-PoJ?f&WT@J&O>|D2eO&+xBB;I=$5+4)TR;3Y42VY`6i@ zorn%Ui2F zj~-Ln`1_M_)94NOI*y{`SldB-tdb2Mo1fLRM5-j|h{5S;Xt^{h!{(#B`omZ&bgc9q z-;yxnfPzGt#*)7VV&Z(b_=Uy@M-Vynz(QxJ9gno@;8^2Z$HU6legGwZR>Ad{ z^l)y2p?@qycur4CmlKm&=|_EpEId2*Q2%h-MAncUt%*$YFkAR$72o-tcZM`NAcX4Q zKUUk_(eqiVduTwoAGFoAFkg*B>yKOu-CxVXJgFwZWxGGq6+N2YSE@um`n!W5^=WzI zJCeouGg_MZkH>!MYV|{}(T~s%3~X=q@EK|Q7I2`J%(V6;ss^#tH;I({Ur)Q@eKjo; z`I$(v5|w{M8aU@C#BcGuS^e5?bz-W+EAEglUYldck|i;0Yg59&!Q9p23vCJ8q5N@z zQ1k9;O+>~m8BPU-*^1h*oFL;!7d}>10v5O`%A^kuvPM*#Z@coWbU)hQWi@U1#L%NM z1O1+Hzt5RYr+3Zg^L23&YdSs6dfqE|ht#55Yyo&DD76l^E}cv{4R)-!zPNo2ZRAj@ z!RK4tOV>3fh+j$zxL>9VBx4TkOP(c;81BwhybJ~=B?W}OfQR9JN!&q$ zdTOet6k(~6U1cAfcKuA1!qAqDbffnD)D2*n*DLGS80V^MlvA|-)w;j+(S0k{!N|45 zFipYqkHth+x{sKpdJhdSBs%|`DG~qXaH)F4E>n7c)P#SBru#2ZQNKhiuvE@0Jl9M) zNo7jUfxsj{m^3uPTs&BZ8r2=37tqoEE=Xs;xa)D3h&h>1wp?kYug{H_XD1Tbb_{hf zORP(*pO#CE1G22M=b9?fp-e??H@D;cnC(PxU*5Mq#}^+}lx^kY_Fd6~@y{9n6kEAV zW?b`pe%8>+7}6 zzRk%La(SW^_mvlQCVP*Q#1`;k_fuM*`=KeB_p>+GqhWj%@WH|v{+~w|4%Q0Wzgu6XGHr#b;rCsHMu<~-Gu4VT?e^lE4TPDhHq5}eKcxVm z-l7HPgh9DsI|IjA6}x`AauV+Nv)@zD9OtIUGT4uFwH!b>Ur@<5{m2MylA-}NPEO$7 zFLZDh6K0Cri(9MS6_2yG9X+`d)p9#*THb6@r*&u8)~1g2Zkd019NE~q;FNt7X(>GM zlSPZ$Ci~gVGadraR9;{jHkx!1VS#n>4o~3(P+PCDD3!bdKtNN8-dx`S&s|-ft zQ#`pn!I;2B((brIQ(_M_gaESWd_AiCI^evj;(aLCi ze#ZA?RRA%p=nB9T&Kh)pBHQILU*(ZqT>WmFLR@{(Ztop{+t5weGf#wRW2Z8StG;7$Bw_mQx}2&Sl`sDewJ~yS;p<)G>)f=6{z|FNLO+f%M|;$^l7VpA34V zpLZQKP3>p1v_$q|Wv@T3w#B(;WjVpt)3P7z{zz89y~~708VqK7zNh<9cKd9BP$#3N zZ&^^eL0Ft7R4mlQ_$;W(77|Dx*>c*Gcf0H|06=w_@ypR?QL5+8v;C1p+AP%I^jsQ* zeFRc4R|PFz|4+SUPzeTs;C?g+Hb^OLHzrsO?>q>RCZw@vjW*=jRfgTkm5&V)COCM2 zSzuxu<$>lY6M@Hbyc{Y;Oc*-UE?Dp2oG7LnMs~9@%3Mn#C>w8k)k1a8ED*m7`U#ZFhg%OZJ2Qa~s87+K4pwb^0I@)xq1>R#% zwZN1-e`zpE-_AUyg;0dCCEjDalT0C%891zJc7UU2>V1{4D(u!F?qW-;0bcbk={^2P zHNWaDTPg!MD4h9+3H@!s|J3S^^cMD2jCuy;T=Qa zrWyFh!g$9OrWEf{fWkGN&J=X`{+Gup4=|5B{7^$FQcVQMExrHa5=K`kE3)J z396f3NQi@!b>;hl__s>3b8 zGR3dllN}r$g($5)wb<}lsDt~YF$rH8s=M+e|LaF#nQ{}4==XN)5a+cd{3)P9%a^@S zo|}H`1Td>AFSk`hJB}8+^P=euO6{<<9B=NQChk>Zy%&)7R7FAQ?nWYmGdqywEw zHISXwQpuF7h^y$jEu2p~vDfm()>4qv8Ju~21g3uAjOcBjS} zu}o6e?`aq#K@jx9_f7lw`?$-s%pVJrxML)tE=*B)f>9ofy1NnidzGf4Re#yfz8)5WzplR1EdocNa&!y(lIdYP z@1i*c6KNLFn~j9jX&JU$btwvSS&fk~2N()R|&%^Q$FCrIn{2cG5NA1e{vZ*%W zMW+M7sI3MYn8i7Ye4!mI0Nde1{@E79 zv{sq(J%mP3<*E74H<2Lq*LtFB2jgg)vSJ{f?69%zj0(smKc(<2zR^wrA&n3i^bMWL zV?_TuD5!nyq@MfEeCwOCV3Cg;_#t-h%+D_Wa|kLEj?PpC-1nt9-BBu%Hg|oPLCkEi z&_<)}%cTAE9Q|R6{HXg+6P@ne)2YfBRX?B&+(Fa)K_Xx2FtV=B^D@`ktZViU*$-FY z7i%^*{O|6`g{7y2q!ofkj>l8qo;Xh4PVqk4vUfsj7?m5+*JQu8*3qccD)LDXl4pA{D38}%0ob|gd1;3KB zXo>y4_MF3Hf#r>sCVV!nS&e;*4eAz|!+&O>k|FYh$?`y9l0mIEs6p<04;_8C?_i*2YfP`UDyy#(S?nFnc}=-GXe){h z4G^8aq@g%{VeQK#iDEsg!5}4B@ohtrNt{pPc8)7f)sUHE&M-ienInTV7{h!&8bnpf z3*ZF3xf4|f(S5gR&ite(l3_HC+HzRFF9fWAWxNE|z}HYOx_c^S9C^WUt0uQv3%&FR zHG%n}xV85`%jPSVR#9T7AMhCzmz__6bq?o@%N!7v1#c2_;`d^Upj&K&7rxs6(bb1w z<2eQt2I0X=A2rvTuVruHz{Si)Ok~66VBszg9qvP|K>J0Z8%g&C-^4!-b2l4 zHl1)EI`%Njb{*Op+ROgMLqqju?U^*INjum|8re2w7YjXd0^vqLczYEfNR|od-{%bU zzgV2m>1s6R^nue}&}~ggV^^zedgY$Ikqi4i@PSw5z~?m|g&YNgRZV$GG<0He`VPTIcr8 zw@cIJok15@tT{H(u$?`zK3hyS!Eny7bI->R&`j4p^5}Qe_n_tFRxZ|968B51 zyC2LticViKi_kB(sC9sc%0aJC6NvooS8t6-5t-STpNR&60Rpbnp}e;kn6<{+$kcQf z48lXDfy;e^DcWfA(fuSSF}P?beGuydyRBx})1?kk`R8k1c!QQ2SXBKA6gX0Np$h+A zH=~_9DL9loig1)XP(#s2R%rwQj69S)@iJ!nrFa6oBY#uSEi`Qk6DABZ>JoH_YMUNv zS{}dGn<#Ea13Y(m&1SDpDH6Rymc1W|%0Zd6-MI zXuIV;Q7I&i<1JUZ%9%o>hvOR?%ae?526Cz^u0J}=5-FA$?A0EdrGtwDxPKlXENsA#{TWN4Y2 zf5h^q$pAMKvBE}%OFVT#?dLd55pB?JM!mxw#y1nzHLJHHD!#D7Jb|b}#Lq9KpD$q@b&&!RH9s89xi5voT9mLYA*~YVb);S;)JgvXWob<>}neMNf zG5zec!Z&B^mZ5}TR3TKko&pE!drD(@S2`GJ#h-jd@Zcbhw`dcE;&t0Bg0mpP9!73| zX*U0oE9j2xDV~1#N5SzKn)62N&u@Jyrnn7b`g_rbFIqZ79lWOs4Nu!bofl!3(P6>Q z*}0038G}I-gZ(D_8UBNdumN9<6cg}a^UBvQB7+y_C7N$^2hz5*9KQDw3vx7P)P$)z zza3PrnJ>mWsuH}%EbkpnJt(H4kbBa|nYZ z!yNjp4AlL2Z`wZFxCga3^oPtF|CqN6=+h>!$CgB9@@M3Cb~T-dc`^kn;*9h(xxb5F zE|3v8@#t6+T<&Ep|DBA$;HGM^m)YFI-gk$mgSDo7NqFqE2{ZZwig5@uzH#jIVkh6- zv@f&OF_@-nbB#Z3HumcinLfSqn$I0$lZEpbP3rXO4fB=0O5xA7lMPPB=~14d8Ifn9 z6x5NB*s%@HQ?8X!_%HXZ+ei~mbhEk`H7sP}5NRK_4->hAaXYS99xpL^A%ii`R?l7h zA=^AW!jBF+yjJUEP(9Y5Su)E8v4UA-My>IPiUMwxidbB6JqEY3nalPI2S)PWi61{;E_3JCcD_8;It(swVpyk-nVJwqtDO#=ZS=ycehmT1>TGwJLZ%v-#A8v6V}J zI=-}#ie&E2h|VeNF(lxXzl?7@&E%8kU#tjK);HuOj+f#jx}_x1a`2wJ>({5u7)HBC zn2p#p;4RfFkn9(gE7>Q-;kb%C5E*N+bfypAnuejTkC4fDZ1K*|yLqllTas&&n6EnI z%ySyo)0(L!(?rm4tD>}Bo1`Z>bKj!S213H=GTzL{+g=l%zk=wq$oGmlRQQj02M2yfQP zB7K$)wVjx-`pQL?k%J6v-uZ>!M}7XG$b2oIw~O1QF3hIMK+5~I}N7udPNrTvC%kth2z zvpxYjcQ6M-w5AH&uI=5!L-JtKv38$h#sJgvSPySmsvd#H$P0&vMMAwBOJnY|>Waa^Bbw9k3QM`b(zsyeEHhl&EJ+9h zD@BIk2Oi?SBlB-|^Qh~}H)5swBncvMF`4KywmhB76(~Wg)stU-5Zvmz5beYFRedG@ zZs7pBsb>70psLNLvD_-`z#oKse8X>LYb2|+-1#<044fxShFBz>DFTjGyRvx@p%q?T z1iK|9W_dEbZ0Xfs?Z8p5@=AgCa$gpIol6@ z{8Hp_S#l>3Bkt(ZtC`UA_p&K{_2N+!-@tNaWADW}B~@7W zL*HnZX1=Xb10H%SXNWuN6S%vW=La)&|VeNB{tk33_3MPC4jh-w*>`R!0^-{0bXQ^yh2d+Mj_Hxr;wt=g`s zbj&YK>o*m}d2Y*U*W)L&#F|V`FJBzpe{yRi}_C- zlY-v;9-M5pk8$gTZe+Ly(Ry0Gx;d4IjVd52yT^zj)!ri^TMZ&=4PPzb^@jf3cGT+B z_D(G`mS|9OuM~Iz-4u3$;~@(LL(hEL^-H6+(k(KrtI>Q4jayPE6gbLrQ~1U`Ns{hs z){OYY-6KTyEGhI2$EsfMqE&zTJvlVQRRmlA86LoAWuxy1AOb{Cd}Ga3OGO?78CP-q z16Xy0p4kK|#g(CH#u}Z;Hp+UgK*}c57aE(g2cKXmOEMZn0oQAS#}#5h+ttg}_CxYxutJEpkXYISkQ_3TG{ z<;O$Oo$=hnLAkjVN`65`aP#7U_{)!8!D)iCZbF~g^}Ae z7o~#8%2{Ryoovn`%%!r#=^XLmZ`Mf9*aw4JKGO>WL=%(2;U%WC-`uMBv8jv>TotJI z!-*Kz^;l^D2b|V}Zkylltntj+uJ|zk8U82_Z<1L0O(}3{VcTc9DZ6)r8q5VPbf(k> z{e%Yn&R$lmpDqe7(I_iw3H|c<5plbtHbJcKF-%HGg03UKIw<)PSdLL$P4Ki&RBrh0 z7JE5f3;UdX#YYhcYahwD4Y7RmNv_aYgyf#S}G?)U;$~1 z)l~jf>())+?U#4JPIk?iT>02(?Q>>=7Uy z(wT`t%at9eqHa*{wyyym7GB8VZXE`reKqO2LnenwydJ4^DPtWtD#OIFKO^UjNrimc zVhhPecQ|2?GZ(auPYkkxjRCEW_Wm~hE)hN0L`G9};X`bNDZ(mIf7J2GXHpY&=5zly z3neqc0>e;|NQTgG<-9!s1N4FwoF*WjHd%_QE7yCbzR@Nf!v+gXJR-xOWFr1DrXY7o zwY0QE9Qb_=pQzHNxH(*r<%ISZ%*8!Nyay7PhA`yS84tRE@5I|Cs)2GKaWOJkNzcLe zbnjRA3850WLf?(2zgnc8w6Ew@8($H)2 zV<%=s{+J1GX^|hqoMS=JJS{T@sjTyVBJtISmZ)fMTQ+0lM#;xp{RMW@4-g4HIcRD# z0|%=J+VUIc>5ye&Vk`8@g0guTEBKiitVf5fkQRom@N7Y}jqFbLloxlSzZp;8;@4xo-Nh|+fdA^kb(@Mg>2_z9bBwWwJ+?%6ZI zPS15NClkH3bk)OiOfe^84fVw1R~b*@b&G9$_ogbbcp-4FGGmPF253my)LG#ivXl1X zP%$mQsVNBV}W;7We2t!pA&cap?DBdWXJ(-&=GR zDM-|mZapC7e>43G#u9tu{G>t^6frd#N>8DoQo02t7kkI7GnTkpcjJ;7OutZfk4ECs zd&4(q;FB7mJgp%kcXg-R_5OyqvCpB1YgV6Y#)^^)sTnRCu^5YLz zHN^rXYQruF{D65!fl~M_njah-J*+CKEo3nwz&X8#+TBZ<42+oK+D~&_@1XYstCko= zcb@0ITy`Mqr*<1Ut%x>IKgUr8krRxIrNp69nO(Ya{Mc?zX2vi3oBQ{+n+XGH-tFty zQuSNoaTK1)R+}kB04632OT=5YTv-0LFT>Yg9D;x!puE6JC}&9PG^9(soXjZpv%iNC ztO69zAuyRgrWtESViy(0OOmEsnl~S@4c*@3$UY|)HCQYmR6Gb43}G|@{w z_8tyqtME>C1-V1@^@lW(#+6H3PV$A3e%Bfwoqo&v*itC8nPU$Y8;am$ATQfV!`A%d zCI{JiaGMNF%7>1$-9A%VIcUH_hNQZXUCocmS$Vh%pk2)=)lG98EXW}%*CLmq(YYH= zIS$_QK24>Su6^7>;HPmrUqRmNQo#&#caq9BRw z@bK3;HexrCN2L=!?!GzA&4}(fJeiriMe#Q5C7kYa5gebAk}P@_9eAVWj-f^9^9>ryrDgR>Pxs70%Ju%#jZAp5()Ak7= zd_{HEX7=u_GaePo0oXR)=DMg|O2ON))J3)oNQvV`2wbYJ<4KCy_tf^?F>MC>_qiH&3gMDwfw7Ar?eF`=QN6j|bY~|7BAqze(}lU7 z>43o~zPM@Ys;M3ZmlMz{Y*iZ}`xRtn(6HKG+|cbORIq;S_9heyP6yUDx<=ztqThF1 z8#3!sZQjerz7Q7}xc-vKh8z}g|E-@qxawmidDo0>m3|Qg^>R$^KTDLDcjL+N&EuH> zH)~OpsRFY1Bo{mY<4$7%8hrLp=?yW`5*vJ5>j9^$6$h|E$Gqqp7Ozm&eD6>dUG&^x z2BuJyaNSG@u9W>;huC8Pm#g(Ah0SC~KgPG>@@hBIY7wALn2PoamkX)Zp4@G;d5CHT zx(ycEDsxY}*;PybBJs1|m}1v-?QP5EOu132LLlv@06#m>(cS>;#~Y||bp;k8vH&I# zh~uQX{ZJ28kvh57y$#Uk+&ZOZxxw66mn)R+__ImZn$q>ui73uj#e?isV=*RbBgr$&hr?Ls+3%Z$^~p}? zgdy4&a64$ZT=W>GV8bb9iU-XO4Em@ApD!zLXSuzq??EmM78`VDbrm(AE>?n7dcSVg z%L9mM_#^Np$Ix%S@^}Mi*ZTV&H}Z_%KAi9TA_N&fg1IMF3VE}>`ST%ee;>llua^QM zbw_|}upr8Gx(`I^Q#7)MPPlTBBIQxuw_74zeT|wNk2{@3N`(USKO;sZ`^~NfTGL4$ zcUWc6KbLy$ijpgQ7r_n0LUu_^!&#kS!1(H0#Am+!L4Ug3fL=BMMNg1jkfuV&j@=`u zm9S!q1VI<1wa6c%&{d~Io?(gV+)a@hgnW2Y3 zm8-P@1h4p>XMyHSZ5uXr9NjG(b>gmwzc6;Tl_sPH?<+DACKB_oEInJxB z)!ghDH@05n4|&gBoM?M=!pDO8!J`Kh$Qr(ahEv=Wo>SaG5!J&!O=3s3pos)k54C<@ z(t46VR7} zV)%mbL3Q=plkwkcVbI?bhd5UA?w$jVa&~dRmjUZ#ch@eSt(=r zlQJoegZS`=Y$?mqAOyptZ?hh4M1@%1DKxNrS8#3Y*3x;3dn;Var)j?7MM}}fk#CP$ z`$G!pFFW;0rtSlTjpF8(^0PZ1*S3dn1ZRk7+7uCCFq0+Fw4>tyHU6nn2=0ZojjiFz z<$8N>1s-jjV^%9Pp0W>armtO}@B&jT<5QdpP@jq+wg=%fW^cUM{*l%Z-;HW4fp@%Y zs^ZUZKiE*^t{XFB-WHYOFIUia(hx(Xgyfg-NJ~+*9oN&WPUl8S32>>ploh)D8xzWeK(=Of>-_bYS269QGKyXrT2y3y7|D>$Mt3! z1OdulEHuVF5pWw0Hnro-}Ov-ZX5D?k<0NWFF0}4&wIM z4p_}1A;ft4#QcG0=?=4ZE#io{n9;e{L+ll1h=V-s+%%iK9R_PS9_UBjHK|VX4Um7P zSR&~6m5uBXlLrxTjfX8SB&F9>kZqOso0Xp_R`K;cY0D80bVhsX1)8gp=?TrmgJJCY zuYu~&1Zj86@EFdgH#|&_w`n!StgRben|n0X#1D33VUKsL#+%=yv^TL-@fp8+$DQ?F zCGj%BUP5OL;o=;mXN={wg7yN9YuX(Quh%t=bku2)-t_qMzMu$+b{{Vh zrRe@G>qqbQBlPQKx@8UU3}tvbHJpB?8O^F}{4sm@loYo#G^*{J+K{Sg3~BT2)kyWWG;xa%fcmvs}hhgCJt75ZwklYVpVDupk?K z)Nu7)uu5cp)EM&Im69G1z6>$kVL{n3o4Gf=%D40c55T&X30-x>ofeupEZ4>N8TH8m z;XS~qFftXCurd|3VRplUhK};#{2)6w;mOGTHbWPjayp}biw2vlwoR96tHIr>6;e-@ zjgZqObDMW59!df+uGYkm;r(|u&vNSg82a1d-SkuE%>)s=o}~vZF!sSL@XuiXTpZTo z3Rr=+Ff2HiYS8`2d_6T$lUZxtnJTv?Nc|O=9Js2DUg{K@nURA%5DQSgGZZ%u1B|_$ zF@=f#GgePvfJRT6=Y@As$ydGLkM-0Z@BGeb2JyVBw7uu6y@FLli8gOXcVY@1>$Pm> z4INK@AV>bH(niD$fkd}5?eQ$3s6+5O);jNd??R}&T{|ypTsvh(Al`s5m?Mv=3=gxa z!l&G3xFAou*i_W@IRvHQLes!qc(WZ-Q!g?6+H+rL%*{L*1he8?cn3G=dfF1>a_%n; z>uBl7N%s$`0Kw+lyp;oQ-m29_nA;`ohnd^SFzeGqTF)7<1bmhj+U8g*ZZ)ESlQoNi z!oJG{Q^3WgxpvyZH7niy;Yb}-6!!jm7HcfNaq^!c2ZgS4Dj7PpE>RIw+)-U*G|reG zaetloUZPu?P-8$H$S(0zfLNg5xL{X{ME2_E}2itu|Pk7l)o ztZ#Q#4XIJU)8R&o=uz0+D2JYF^*>Dhald2V$haFq7XA14)|cmj?!SND^4sm68-p%h z9suSN%#||zd(&EQu|Gpm%%9SHo51Wi&!E{|JWA0U`JW`GFBfj1Q81v@iSR)^m5W%aDDOMUR)ct5;krRWV4 zd3CObhzkeN>Nf+gtxvQ&0N7Hmj~1EVM?P_f5tCk>axgqu!x%mIR7UdkW@5VD=)g?0 z*aYsc{P?GwAEM3o-xPW=8}CD%x&u{~dsq}_(`mfOH?=9vvm9Nz(T~e>K&bS17-0T< z#c>aW#+p=~dpul;{jWTL)SnS$3So}{07l3_>{iIY){c8B!*W^o@sBIm7lLFrN6Z^6 zf}*pZ>NxK*o=-V3(IdH=!4b*~QZ#aPgM^Fd3|c@5as|(5PYOKx@m}+uZx_K9Z1CZ8 zRSIt&t@pe}MO{+P3DkO&kO!;e&En9QnG)ObqSkK18N)%DrW_(&PI*iUH}vOXR%f45 zaggEDadPZ&vhr~<%TDsmZ0)FvUeb_l=_g&H6Hc!yIdHpBUH>}1b$=yk{!Arl(U5J^ zxVEWb2KKih+j5KPHdnF3WQg(0r8Kn;QrbmKw~DlzZ_ap$`=h)J3UTbh(%@Mc+6 z+O(qkTO&3K1|2xGWit%A72=qA2wn3AX}VBo@eDD#R1eRRD6R3&!=#lR6|qZnghEF% zmrmQ(!*#R(WRSS?J3F>{u@6RCd>-O?eV(=&{|9M)$`IY5ht?4bd5GJlO>LNrv4v6s zi!)qs9#6-Ak~08DK`MUoQVw%k2?Wr0d_p`NaXP!74fIvph#%j1e4K^uo&1?seCuJm z>N-ck6Rp0af4-pQ%r!D|QKX+dr?Fa=1)?S$=H*1c(YVeZSG+aIEc!z(Wj2XITDAVo z2GtV}&QGI98To+<{{P|mP{}iC%1pYeYige60dsAcL!&fttiz|-M=F_{JTX^$C0y9G zQFK^k)~oET16kB%0~Tljf-u+4L)wcGSHIPEBVT&BHr^!$SpLN}2SE_wc$-J(Y$1t8 zT!=@f7E zc##ZT$W=#X)UG8N7eu^oSRJ%&$Pzq?AWn-m2Ytzbp^^O-&W~sb!13$)!a#}#bDkxJ z0$%+SBnm{Vdc_V5fJAv4c1L?LQVRoxetH)bx@Mn9DxSo8aN7yapTzlab<3f-ckUY* zE-~aD>674pW{h2|!jIl1CJJpe5~(Q||2ZAnq&L8QzC9V$KYZ#rV3``MqWQdMr-p|}AfY+XCW%D9&z114^Fc!4)y?Bxw8Ok6wRE=|ZM zZ{-C4lxf?#D#d-|qX>LJYe0=s2q)WG^R*st@n{zdBS*dX;*QzumAAQ%OwotcIkq4f zv>6rlaQsQ z-jh0FzVFE{V)--og=4KjCc&|t0y9ClM>7E~-pq{E&thlYtNQ)sMFQNE*?%#_==xks z?DkyC_QV1<*_bK27moefQER|#e1aa7yLyO;VxI`H3dR_XT>=l;2S-8%)++!fpB$UM z6LMc4xu5K zTMj~CgK;hQL`?aeL^S+{yV;ibQWaW=t|ygXy92gG#b5qSnC`hu7$|)>tErK!$?Xkp ziv-nT>7z1ccE;a5_tlE=w5s`dOcNhKB#CqN;h_{+!3lE7Yv)P-FuLM$q zdu|p?1KXBG*obiSeu&Qrsxpg-V~zA6bKfre5t;YXNN_OFcdMdMsRK&nHBQ3kV_Cx9 zYnIXrKRNE~a+Rek-;qh|?$Pmh>nSPweA}x#dhM6`_wFp0t3xMwOBln8zS{JjJ|$ zm`{SpYD44hT26@U{U0oGe_aotvU5*E^&x3SlrO-rcfa4)TZHU4^jKpJKKcmlP0!U? z`(g-2ryN;}t*t0JY_N|4)pN3H3MD>E5$_qDwtPTSM1JK=rP!KCK||F{fy0|s=8z_Q z>rhqBLSrFM>{Sxu+*)Gp60SB(qYkh5H9n`1K35W*LW(h6`JNm{U3A*Ez&pMwit*4A zU8%rpe(UT&zO;P^EH={Lbh?m!=9P~{%x7DJ3nib~g^`h$y6Hd~oMb=8D< zHcF5rs%a@^3o3{XZGeMuEI9l!1}<5<({{qy7P-)@%UA2OOr|y@ax9ME*3kNaVD6HR z`h?fxW;|1BCtZ7FRGUR!Lp4Og)f(MFB2%h+miy?~axv>HJqW{OQE`$vm7*4PJHeTs zGgNEj^V#wKMJlE5PD7S5<#hui5t-2DiS~!6lq2E~h5Mj+h)sI6l_Zb!So~b7{bMTd zL;ifD$c6~J@|Z)))<5a*6X1=QPsDVCq&MBh^G>xQzhNbZ(teOAs^$mKBUcz$aJvOP zkAeQ4y><1){#IfsH7MbxV%!L6(Fyz~r54h)zfUmS3<767F?0Gx**xGR-i{OHY#eYa zgT~~vfDsv`aqOmviI}7tY#N9kXD9frf>H@!#=Y0M9T+k(O{#alSU0T4_j-+`J_sp| zk#(-^8!tSN6VWKQ&M^Ija4U~xiN-i`F~Rm@e)trJGOvm1&ZNS_HeV_7ZwT^K*QTE1 zRZ9T-Gqw8XK85!*F#bCUK=bU=XrR{2Ld$@N@WZ?=TaD|KH}sIP85adDLF23+Oe$N! z6~hF_fcw)`Ki#)Z%+yKjOE?C5-xsX}WGU)atuTIALbclXAUbzlE#etYG2RH#gG;N? ze~G5>mp2Pv|Khgx$(oR+7WJgy{+_Jby5&iflZYvDSJwr&X1pON!Z9Xg>=7Y#e{1bs zydgb?Hrqg>$ht_`$zz3nKuaXW`J0{m$d6wwBnr)(rGjOye>s)t5nVwfPbbEHPEeM5 ztB(hlm^eGiq26+ZMDV)E_X9h#g;Dw~nsG^+*L7LKHN(=V{5yxIqHJ*(ClyB=I`@>E z*e0;LRatVawAoQc%;(FMIBc+EK25>=qzYk;rRZ@Jyp`%IA^b?0vi`WMMonS_emGPl zb!LA2f`nO6qRO&hp`7EIc^}5BjJ831Tc+nasgC{sa=#zQ{lwlj5|EA@To)aU$Bi4K zQgT>6;M-B)-_lqvFhd(k4^H^^OOfV1*g}WXDoWBSK1p%$;g!hAXiTclrHJ$F*BC%B zLnkXe=Uo~<74!w!&H4hq<$?juA?%^*6yS>fykDF}mg+L{k?EjKq1o^G3i&#$wPc1} z^OA_Eq{%3Us13xcho@QKcKyeKPUXL>Ue8D&bZ1cUV4Lq05e!n9;D->|Sa+tD6k|BT zF>&<07hWAxPek;MlsX5dg(@2+$-H(Yubwa-V72mCJpJiW@?*iRdB77hG%%_Y-V6);cXYyv1KBNiHI@7tK zQe42!aZ@A!0QiM~`|fsdE%u65emZV*AQ zwTd6`3Go@z-xU@y)>XFRk-3kn=`&GU*crxuaT^ogD^C=b3UG1rn9u*Zo*ac|St+xmO_azVX$NtuG?rP^d{Q`EoXq!^`4M?hF~%r2Gd3-@c+?1cCYF{Nk9pIM2a! z84=$9NdsLPWsu@4dH#zYg$W@J*%XcL*R8CWeqIr=V|a*>1bu3wA^>Af)iB~y7XU%v z8s>LG>g8Z6G;`qt1L$`>f4wcR%YMvO{8L?Fk)x34hFW8)W7OrNvSNWt4v~J$jMD(O z!=v`sBMoN+>^n|ku{660)trntKuJ4dHi4cQR*vO<4#rN+_Y>B-IIOrjo>*q&XHsgZ zQi>&WHmQ-*ubP4F0@_9#c6sR%{|6QTUYp_KI3bjIAAV?=vES_V z`U%@>QW(+6Am->uO6sHDj{087K^JzRpplt_Uo&~J2D2P&od^bH*-)$U&PDCBn`z=Y zG~hwkpYH|q1_{;z9ni&21rCaSdTQ8~J^F9=9xPm>s-UGX&H3;9O^#JM-t)(2qn$K2 z&xZ@Ak+DfClv+}TgDOjhTKSG6ibx`6AL}cd*fEpx1wsqOuQe!;T_*=5Ok%LWc4of`45cooD(OjXQ#(~Q1@Y6Yj zp1~#aUMIQYy|#k&!#N5KUZRm4vK zHiTRunH7C|tB)+>uwh@lK7naR2h*B!HW*e;umd96ATIra+5A|`H{*#fzK${=?l@+r z^1g7QLnC3P52Ok%tn1VuL?b(*!N6k++Gl;as;K6~xG1XoN0OrAy6$&sNDU6S)pv=l z`i=$O{1h%EHLuGttIHu==$H>W9^q_sukTt(8w|Ukg?k4rQe!Ksj68=aot4Vjw^0hm`OQ{am+0Y zjQQaNkWDU`xhYOr&60IQ`DO2OxIODTn5gh`S%zE`@!5hbQK4OTk&^dU2ULu?=novI z;FQ5p5hH|Cv$b&0rHYelX7e8TKg~f3i$L1CoZJM zl)!2nqw|KIx+KusvP*BiIci%#Gr;>lMh4gaUWbHf7Pvr?#c*kYY7OdjWvU3ry`SZC z1yTOt#W9K@O>BS>;iWvDUwLB+0s}}Y09EN6^U>xwp*H9Yz zX){!5Q^iEvJ5TBUTJi16zv?{i74=>7kaI!Hh=eMF)sv4zkbZTlLea<0(^BV#=I2k! z@lju*M!pb9^K)dV7Se8vI!x#ppB#h!+*#B6L`3)LutDF|l}JglaqqR&q%LP3>^kA) z{zau;9?NOFvzqb~Pu7b2BT=+41wfg<7|(%@lx-mO*t7N7WYvH8AoRsieT(LQGmj$3 zw&Dk_tTF%*^*6-gX|HMy8hge%M(KFe>4uSxzS0a_tyTr9Zpi(h!e;C!Oczpi5UH{S zcicXG$rj=rG2(5dVXmxdL#4_KD_7tGk27Y8Q$}BDtq5f$=*TNoWIL8;29wutK$wm#wlYNmv3D<%(Pu%S4`IazcmamG&+a^>F7J9xpfO?;OmJMDbdu7!7_@Eft4e_@zHl?B zt0K1eY9s3HtCW1T(G0K=FG=Fhinp&iV5R1w1s4cKR;uz?zPpH#HsJELKH+JS)m4Ep z#TAE28^ zG=*Y6NxX~@6r(NmO#KAhBN705Sa^=gaig;Miccgoqym&yfGnG*lgk0V{=vwaS^XJn zW}SdkdI3gwmu;bfCiMV~A<{o{&ug7};vJPds}tu3*p?lDG^9e+O2{oE%t~@gD^qWS z9AnruYZEV}Bx}|GQNOK~pp$6!W9E=Hd~hg6nQAHRmr;jof$Rf(t9>VyhO9sK&lec zfvB;JCEH082ayE;+Qz?S-Ri&?I)fNJRA_yT6(t~fn7<_*s`Pk(&5iCiIW9cXm1X`S zDh!3M)5@(JTh3KKRgcgeUwi{)UL0`&oUFohL8sQhJ}412X1PgE`D!H{Z0^V8b}w5? z0Pk)!^EVrpn3bDhaBucYxxOp%8b>`*0}4Il|5Ld08GF}W%5qxq_usof;zG_5`|P&S z)^lJ@^7Te(=jA&b-EarxV1R!op9rC;PuXwH5w{b{5jX8RE&hTc^h#xV!Ce2^ntHz% z!F-r-ZRyt-`%C<@T~t}uCTsIPV)}Lyj3a)cBg;1W?&qNb1InAEwYE}aC1(vr`pu58 zPD1+bg2E&E?x8slHA-E0Z|4`cAiAp=!G6)dB%Qa#dx|pm%t>BRvL44W9#$XCe|*UP zB%@kHi5`3Xnf*_AWRw+!ZWOU}x2CzIYB!G0ZO4tRC@kIpP0JNhZUFppmf%~Xx=Z1~ zz@#fDfwa{&LY3A`S2j&ts9fk|OaT&LauH;?vWfpA5YZ~SnCtmb&aXcMm#{l5diTgo972poDGb<6<&6WfZ^sOCO zH-ah3Phk4{;z5%UJWkufe8w7)djrp3rZ>m*A#HeIp!xLwicBRkLieYnHE%){wxCuK zodFD1%-tKV!S#=}xZt#~7=vm(=28FA4dO;*4Fx3xKzPm2slePC=8>8c2@P5ZavYxu zHS^jggR`JtkrfoNpD`ikGGVf~o5JCP_fM9(BZz(;L4B2n9G^!t;SL&Kp6nN9hhdo- zi|c}o;5Gc_vtg6h>YNnUe!(~dFSUH9Ys>%4G|dgG_f<(vonLSTdEOA!okis>QSv0n zOruc4Ie!vu3m1$@`~4NU`q)GKTd#l}>@olb01+j3EUZtcmw_E4@{b>P#%X}U0jPiT z|5Y%wni{VY%2y5Cd^R*-Kk7nQ1guV><#NAq8%S6FUnv%}@y!|)+FrPea`;JuPJPB8XngCv2;v>hrI+ZU09A|ETWl zi0hK|MP2&mv$MJl5?0dvvhTVw(>a^igwOP*y`JGy&|32oNT-?ErVQ-a_i+zc#QgAts?yVF0ZHJX6)~ zq@Qwp^VL&}f?oxu+rVWorO~X^ECBlyMA(85=jo4gfZ*!6690*8qoH7+E-3_>8d#$Bif{RQ*WBoHpJ9-Zb--asMB(|4_x@ z0BnVss+ZUX@#nl2R5kO6ZsZ#0kXAJuj=|S%(ej`IVh;Oj`%ymhRfFjM57y^_!Y?4g zj5>VuNChi-E?WWCB5!@Jz@RvU-GZM`Nu}}6CK4ERra!CInnV3+hy!X#UTGCiOk_!)7?DeQ#d*8zg9|6!jixxZ$apiRe`1#P@LXrVSl^V zdiCW@F;MKG4}vf3tLI)7ox&oB!)}?DD|$fP*7k&v`b7)e>~SVEk+jb)r-|rLmUG56 z;uM1~Vbe9Pk47x2ITpX726AfI_iMs4Eih>{Riyz6;g#r3v<@CC&i+?k@?5>bngnU zK1oCmn6&%7;R=G9y?oZC(&J72b&@)zC0s<4vEj{m$DS|O zogqF0I+Px7RYCT`l`vx1Q^fEpkMj`S>l70*5G~H44G{P$W=<1(pc`0IUM7Mk8dn7W zGq7NK>Z(GFxqcS&;ue|RQkrg$-6zIAkli?4a57Hq$Auik9ODru>5(z+)4jFZ_wsmN z)#C`ZnO;?~jul4g&C{z0PdD=%7%!x6y>=5geS0RbH{WIsLnpj6FG=?J<^F@;t|68wSL(}c56K-Y2k8ZPBWFC&-IG* zq)Mx0v}V-XJYqqs#Zn{*;nb%AfpUfv59@5^B-$xkkAlse5a~)!X530jtix;j1Gm!Q zeFQ7qqXI_M8nX6^*8XurX&y8BoF4wvUH6Qr@(R9WE%93%JBs|}-9_LAb4QS+#obT= zQYr~tX;Znf4^o*ag(icX%%IV~OgRb*`>oj0ZIH4-uuYBb+5j!b2yN+Yh` zekGW1<3siKbuWrg0rsHQcoTcGGhRhc2VU+&r^I7(h8v8(;F?Pa~Mzavh$}|f7_o4Rsrty{U zUF%Y)U7Jz}6if|hI#{bX%)~q9Lu%a`$$8humR^$Ti^zGSVnctpf}V;+9KSmlo=@hq z+2-bvz`387y%1iLGA*1;5NUObtN-OjLYq@fSr)DNVU0&^-Uoy}ZDhJea%kO(o%1wK zyx-Rsfk^9<#9`w0u+Bv+2(aj6_-%N~g0Hlc!iegF~_}#rh}1(9qQ1 zg@iE4px^Ie{;u%F9y38n=xRlxEB{$f(*QU;Y!Pty^{97!$QthNzHB3(qA=^2yZkqp z6Ei0h<=Y=fA>MTC)WBu@(xZ?VN0Kh6eq0Ia{k{a&tDL|~*fU&?P#!LQGl?6Op*^uH z*ZwtTak%teygPwWtGCif_YDVcyKVAw(MZ;xA~RqJ)IWM`;@v=ovkVeHIRr~~M~q(# zB1oA-n*`$hJ<^p) z2HEP%`58{#M|x*!gnO6a(B^5z%Fo$iizgHEM(%o&MR9C|bF1$Qbt6YzB(?7uKimS0cnu#?(RlFK#-8`Sh~AwRAA{`I;Fe2g{4~> zDQQXRj(5H9`}w_p%-NaG?3}sInd^LGlEr`a7B8SC34D^?@ZL-Lr0R9Iu|`}mPOWCu zO)_-EP^TzX4)#w|mn*|X?`o6IAHGb%kwe;dE~6(^(qNz>W!;EsV5$ih@0qDqo>I*$ zU@-}ao#!wJpD1ZPZ27efbiuotVzN3Y6o(6(?>NV3hV8-~%o44YG0j2V@fD1zx4!R* zRJU3Va(>$KVYDZQC`~g0l|IOu@syL*uZIj%UCoi=Ni}#z&8!xvObJ<*svSfv|0J5j za<2?l8y?{rw86)m@bBty80-vAa>PkNE2^!u<1~;{DI)M)!@6;ZJbF;jMov7uJIo;c zHYqD0YnAES#>VEAqXs z_6Hb1aZbm?rL;x{d9l)1a8pyRN|_NP$AXqMo0TZjs|?o`3jRZ+HC5ZvfjahZ6TvLI z8)=bGht9UUzOc8Zwflno!Bofe-&5kmec9-l`S>u`2(ENT5%h zLZ~(dqzIy&t*X++)2%+GUk6|}m5l63S+Qpqw>5S+rAju=rYBW)RMSwNPHDt;wR&w; z)veaMT@J32)lY zQMEfNoHeB>+ybMEzIBGLVPhjzl5PMkPP{k;P_1~ZBRprf80|vlGJ{=$PO2bLpbA## zXHgw&27&Z#dwZIFBBf7qr}a%(JVgBO@BiIN^+fy6~gmF(FRXiewj zWyyfSUC1u`gwMJz)a?wO(QFC#v}21L2)Mj2yLkeRvf}5ztp|kEf%{VNg$gdo-loEk z3730^50V24At9o3v04!1zXFq>e2J!qX{9Y;^O8b!(5E98L?;EsyImqSQ=)vk zb=cueJx{&%yQ6jY5+1GT!%knNmHj`dl2l<}3I>P$$78T&(CFmHI{!msz+^RdO7#U00BLZZOAJLdP z2k^SKB9Nx121ak*jnt6=tTii>9|l+(EB81W#B7ivB0LVST`}ygzn}+VMBTrI}Kx#6{*zk^Ul6^;<(L zZSX*e*)n)x@bWJkgjQf~6CcWW#a=|E%6o*>QS9<4$jVDXE1Xj0VHv}l5u#(lIFj1_ zC?(y6_Ya`niPA{6kx&D4mC`{Tsi9$I=!Yl7#IFVGnb8l6d1^_ih_H9(1z7Ugz3>OV zsbkG#H%4eUmT9?tM7x7u(FI8~;R*Uy*okonwppc$?ZnmWZ>Qww(MdJI5VZoA+wJ3) z_GM#F?viG+0(m;3ivLHXsR4n?lk&G(UUOm0No_M;>pCd$n-8oO7Fze3Y#|pJVmlJ5 zEEabQwBw+RbCyiPHxrFL=64I4j#uSE6}d@m3jOtskotKUK;}=_1*ZfU5VW|Nr-H&D z`~((PQd>p!jaHoWT?(1P0|FQN|8U9qF6Hw(C>~A7z1<*xh8P{iUb^{RO7xcIo%ZsG z9NP44I*hIK3uEjrEzrtd(aJ*=Wa_ldteWT3dF|qIKy;>5|8Ql>Ai5xr!q#N9PzfLP z)WH7^=}WY-552_ymx+W#cv}YrBqct>tIQ+Dbe3AI(*q)0N!@6wd;p|kfdu$Je3EzZ zEeDsNnezTcC)vw~&fh*Iu_TM;u;nR!eoFIm{%ETVJ+x`(`2U4hcu`{*DjeX=ujLIf zI=th;erkxdiZOKbdA0a5tYqXv=)9x}A2;^K7=(vun1hWH+6+7PVthuKF~|`S(E7Q) zI@pB=w@-&W^(eS~E+U`GgM&X z$Ma1I*GFM6Nq%s-Q&1*U*4>5FA~76YAuXfl%R$HugZ8g3qa4N3{Tb$5sQa-}`R~!= z8hhODQoe@r$G$-df(>+eRwM+6GOhOEb@M8gog;&ArSl#C8756tou$4jzDoR)CH zr0o=tP6X-*jK>>sTp{E?SrNxBY6O&>l^|+AC4tA1_lOV^@1;sDp3$wt#7)#pdYInL z`9S`c-+ol%o>#1H^2dh36wCh~Zwj0tU_H=}YO1mePHD|OQyfF9g*WUxEYLFF^1bx< zK3mw)B9GB-{}L(8l+=w!wM8YmW3q`MTx z4RwMLALVPcUpM)fL%s|xYM`7&t*Tj@D*UTlD$xE>FYjSB^yC^zYg(IreK^r>5{(;w zw;mh-lUrR}fJhu)KW6z?R-_A9)Mm+O`Y^SXdg*(PW_DdPQQP5{OP2dPc%;6jVl{uw zaN$~U#_@fCH`Yv5W6$f$gK~cVkjBD*qT;pm8FHMads7m3da-ZGkza+PVtc5x1^rLCJ!jP2uY9jX_aBQJ^>Mz6i2GkKD8-ukCj!QIj5{!#D^hF9&E+72@r35JR`|(1C<12mda# zeXAn|LRFO1&JM9!ri0%tTZKa6mze-*huLwi%|I-Kp!3ytSGWR9gN2H?IH!xY3Dp^R`q&j%KnLj9q zHr+yeDAKOKGf|cF5AN?VE0yW9Z%4gbcyUag)fI9+2uwN!Mq<2B@QOf1a~(|?d<-|G z8y0loE?P!Ef}MvjtrotTRgJcA!FS)-&odKQxLgfW#i0;s zuCL6aB^`FuqFT6k%yI>fQUjl0{!0O-sOSDu)%J()E_!l7(s+z!;pMCE`qzUQ|IyW4 z?Pqz$HAIDck+U!jwJsL3aQ|28z!v`XVNMyZ1tIGzZvnJ`D5a`T54My)$U0UNBpctE z>|ZI1w%=_{i#L#Sga0RUL~76DCHs9lBO`;WT$dRlg^e;--_iY}bq2gdI^8434A<=& z=cZRreBbuV&r3?y3=AGp_?dUWKDV$c@d~DTU>fC}WGM-4zWl50H>wS}LFzGiL5dK! zRRSt9Fb+Zi0!pB+R)|DQRhHD{k=RSg%(+?D-rCS`?#*BQf{?0JJRm}dmFs8Te7Ywl z3m2VL8UMxmLkjJ`Mhp54T`;K$Q~eVLXSEH&)@O^pwfnxaK4s~~n|AT`{TsPLRTXC20EluP?#M0oVd zza1e962=g}H8KF?jwu66wa_V!e@#>^>`x9Ba~v=^151<lOx8mOLGLfuOL~z1D^cQtz%ZN+ zwkwn1xAhXz>0vZ`lrqP)!}>wev0A=HLsk@fiGlwJbF0>LrY}F%E+|=D_V*E8Ed5nA z*tr)YfBns0>vS__3NikL4;21m1>V9e5)5)+w$6yz;&;<5^S@3Brl4?cOFL!rU_hhf zm19H*tKOLy9Pl63!1e=+EnO0CfNgv)IXXTlKyO_rxO_hSGF?{O;W^YC5$Cm@I?o$5 z&c$T)@&LWeT%(9^!7=sUp3+f?>?)?)%;>;DaZ}9Li}9bh_#rM85vInaQbJ7buqqNq zK<4Z$*v;ScBQiuauiR5JR_c^U>joEAVfsU#PO3%IR;mS$aM=D5_Qei%;ZAPbVCfc^ zOA<;lyEK2bD%S5DY@6BK)1ahZGgHuyGOvr-Joj}gmaVdZf)S73on_oO! z*Ryru!3Szc=c>Y}VDT@O3yzO>=kO}x{Zq>;n3=(YX}u!VffUqdF2bh)2S+)P5)%WO4@-;%4KMVOB;MW}nTysN5s}TBI*v+V}aM%j=T4@os|nFGGXe|Y}6j6b%eLHpRfga%F%(z(54thq{mtrzvuq{P%Fm&^s45_!UBz3 z(APXusPN~B;i1Td&-wV@Ce4+Y;t|3$&;Nnee-$_V#VpMd)2}RQ0}TIDuJj&c1TZBJ z6Wp|A5o_M#q`0>j|BxwHk%Y($)b=A9(B=1XsP#sO!zkzK6Tpp!vQX_VkVUC6kLer2ZL8zm-@SjaeQ0zNR_yZcme+ zvY=-li(zFm0OZoe)3OK+^o;G;Sy8w;amK@#TQj8I`as}l@#hM<{zV_PN_II8I03b- zdU1=VvYyv@b9be%o|iQf9>!mU?hZ(?>+kd&`A=Q3T2nr6y^aV8o;~x@{tfz$fns|D z;_utsS!SZz*bN!rtq3*Z)>K!I?E6sN-wMvGAV;+7ffhL# zYDAusY1Se}g~${rZRy)%8&G4`m{};=^wE9)2!{CTxSjh3zwW(Iw9=M7s|J7;bsiE~ z#c}IqQYCLu`|9`fPln?=qRSeP+=9O%r{aXO(fR4mH1q1@m^JBRAT_R{iOLTeg+4PQ zRn9Mhn!o3BO_aEC-+D=-U1Iaj9DI`Mcztr^eu>>r`Rbbi%8Mhs@5EE;9;2J?A~aOR zizpBm>t8`GD3_CZ$K~BqjhNxLp8=Mya8WEBK*e*pk_{u; zIXbyu3*pPlr|k_(JLf- z*x?2*UEndKUJ}zA!tcc^YGTAdiH{?6HyMTn8yca^=Y>Q5qBJ3R`6(1M5k)tA$O;d3#Tvxr2T(=76KyYHKl@Cd5;+hk z4W>^)Wfb5X*!(hh%Hjl@QplPbq)m4-s?ay}v?h*I5Ct9!ZLTo)x5Ew&qd_?9u}!kz zRgI<}HbQOoG?qlzPkNc7HFAmJe!smxjvRjR9Ir-K*dL#q%A8LdaLX;eOr0_fJ2I$kt!yJGHGnJhUMNieG%erc-J|E7LxtGJN z_&%k2tA68dY++oK=i-_G_8NU7hS71qcndiL24}k41CK(BzmHd)bmN6K?~?oL;eIml z%nJV&;&+qA0~lBmfrjrtQ3FI|I9T029XL9cn?4PRCTYWF)d4#eOs$cOYX{`~$m4@*QqSLQn>+-nlov9G3g z-cAbQa03sfUN_@kLQYt`v}4zmr7RD}$RdD`DCnX-Cx-(gE33OTITQ}ozjN1Z;|^A|S zftKKbF^*WoX6x)v)zR%e1^!uO{^3=d>LY?i5$BQ^^R0}LNk<{}$0rX2eg>YYKk9w^ z{5XkzIx~~bsZj!rjHz!5a{)o+8z&GpmraTga zy@y7uwj+=U9T8OCV_o6JL02`mPFhHGaRhPLsX{h<_fCEz^)*fL-$(-4#h1h>)d6Mv zj&L)qSK4Hr!#t;nH<{n09_C`vPhBT-yimBktPZu)1Zz_82-FlQUD?wl6xjjpA}e8~ zri^J1VGM5N!+G-&nxw_Y<FfW1mV?%Z)=jK{6=@}Yk{D6wO#F!n)Qd%-v(J$)pmUuLMXsR7xlzVt5bNZ z$mflRi5A39!kj#QQY6ah0h4gPKC(GL2KWoIT0X3mdhevif-an8|(lyCXdh z8?`Cxu&LE=@sp4N-@{lk8fOHGvB1e|V~{gkB$o4+4(o8k+ghm+R7T6#{ARmGtwb6l z538#6+SZP5t?0ym@PN|IksjMzED9FqCQLj}9VA<3(PibBV5cg9L)fE~OGP2q@KVZd zW$U>W`zIdJB*e)QS2aG{D5!7En%_l7?Y$I4?H`Q~HO@MjpO!s06#K%y zW*ql85ze1sK6hZS6Sczc*GR>zMBJt$WPj*$QpHC1aL}>w;u)}DhrKw-z93?-%X3}) z!%Yk_dY^V2REY5Q!qd*)Ba{Iqle+;2lTIg2Bl*!)@w&=(f{V(NI3OBnOT{v>V-;So`KiCeoNzS@kDe*K?B<}!aH85x9G}Y% zH&FER!;PIaN_695WmpGfT}PE1%ZeB-1fg+iaSD$l>FQU?$;;@)v6JfhR~R21vki59 z$^`U&_I#frLrB6)=>WnDSZ^jpKYm0JU|IRNd!%3W1qe~n)7AYBvW9yeIyBJ!Y3rf= zcL^L)pi$RV`9~w9pvhJVQhH1an+b;|YgN}zw}Gs~N)95c>%V`W!b3>9;+T5>VdIRW zGOTAxji+zQ*=sAKV96P>IOSOOJgHl;`WjFk;IkL)2<@t<>%VAnPf`N(uBTq(MscYf zL;%gJdAyB8uW;W2%%sIB6Po2GX9^wq`xbn3P2YP4BhjdZ7dXx$JLcxC(K9*97FxyB z^;@eA$xStupI6`Xb>q4J4(D?^k!sn#&Z?HdTR5y}m1(S&LgZ!X=uz~@7cN}OFF#PdR_ z$tpQ%UvsX9ckE?T6D<)Lo9yOQWBC3RRmtY^QHg%yT{x$mvO$wu+!JO>i0AU0CdlN* z;uNw5+~2P~dp$RaKV9uTzk2cF`T51)B0p2l#Z)%Fuqi%Cd1Vga4Wj3Fd0y8_if)v5 z*c5P`*77Wj6{_-zOd}Sp19ug$M^aLxFI0VK&mVs?(bgRbThJchf7mF3%pX$#7!gff z&ns2G#s-qb1I`=EUrQJd9@yh z3Rv9crO3xGe(~JlS(cRVR0>bKD=&_vUVWI&EC`oj;wD0yAS&s!w0%!qVD(;yhl?nx znufI)C+eG3{@=o^UKDS$4=5EqwPjrhR;`;JOEtWlT;g82nr|1GZ?E4~EdnPmTu0Me;F`A6Per7G5R^b2W>g*w)S;)2%cf*YWginymB!q z;yk;0Ir*4+CBjcMHcpgULB#X*YN1qunt@ku(TYc}p+0Y_e!LVYc%8TtW*Mc^m7#`kk$cUmJtF=6U>a>g-W7F<|lrDMly*zll>W1l^qU=&p|qu{rr z#W*E-s}WxRDIjrUx7Q<}p{N&q7xogatfLHAx#3=K9OH=jWg2ml67Tl*-fQ;6maE7= zN0f-So;|^+fg7lhVm>KLxC%Zi!%@t6xcaf4`V%SLBjl_s;ct=0eWY{&+lM{;wicu2 z*E8FCYYT|R^w`X>cJ_}@BUbN9e*14Y2Wm-1vv>!2tX=XoVhKihF-U&u-XqxRbK{?! zH4FvXklUO+kbG4&vrUA_n4a_u$7D*Tc^EK2%EMMbjLJ_aFL_?e*uGy3922IT4CZKV>-mjBUz!NEiTUl$?!ON;Cf7(?pD*AUosv+rKE?UZeJ4k zxV-@|MSe~#TANejn>Oc03|NrV(m^x3M@4lSE!pE0AH|@hHa1W+_i@b!?Uq9DX&%dV z8e~q#WBRsKzm27>`d&=tybi>`+CV!uSG zBN|jxf>_F!;~_fh)QZyPd4&o!6bd5M1Boaf92QQCOl+}ug09E2Kq6JL;yal|Vlqeq zilQWngxGUxr1CKw!-T@dbz*`m7p4H(VA1l6}%sc zi*$!S7x@my7sU>LEWQC}FPu+efUEgP1$!oyM@Z>+Yd)E(piwshz8T2Eigtb-{TWR* zW5R=h*mgq#%^r)zkvd-rc*QPjJ01FLu_6wPHum*6C66m;9 zP6fI9Zx=FJ*5Co3-G|ev3*9|Xg&Bn@h9x@I3D3&&BUuRnH)|Z{mCN}@S_&T1QkxDQ)t?TA;@ z&B(RsW?IPryzG+fcV!c#CYDWF-nctgOr9VI_ASfK1tm95Z`l4#mxuLL-KFqg(Hg*_ zl}c&C2zbBbeXnl)>fyuECD*7q==g=JPBnny+A|m9a_l-@=Xr(Wq1sq!Y|E6?*<=`E zz*IJc`V-l9(A?+3G3{GW$=n1 z^r7tT2khjz%s81&3UgbRRtnG;lSob0UHRZ0SNBuU7s>ZTC{N1$#ASd(=v!1xI_%~s zYN9r@1M4pTg`5d`ZE|m6OmX8?^xlA!rdR%Wr1JCZViE%yq)v$%v(|!HA%IvbI*)hO zyc&JET7QPvfsCl#;jwFf3oQ7Z?cWE|cdZqczYnTEn!mvk#d^7M3Rk7y!k@qXdy;R? zbhS4mOPeRsPt+D=&N_3XQ-b|L6_|Hw0yX(RA~v48r11MM?$cD_?2Qalp*Tzb%^pI< z%;qVzX!k#vKMPh}?>-8y>5rwJ55+|NakWtZS?3)yj*hGw)vHXavF$d}i#8f)Rl9H6 zk$RM{#GDsZP~kNQGQ#yS|01MO`|dg;PT%bRnc(` z;&s?<1UFQWXRa0b7i%wxq`sgR{Z@E{XDyuGRy7Yw9UD>yTufcfGAKJ+>;z7RC3=9|cac1~{D#&KN(TNc~cwj$c0_V@R0)SdU?Q8oe_ zFi3@Dco!>9!g(Co;d|^JVqX0|Bm|>(TD}Fp6Ou&(q$pOzk)=B|5TbJ6;Q5KTVw?atvptHbmd{FDuV^};8pXWcXgsjpUWN6@vNoYO=}HZ3V=Znz z!SPf1diIm>(JsulIUF!G7C!p@L0z#HUm;%#G7ww>A7vat(X(va6v(EAAi>8Fo%3Jz z>Tv)>>V7e+yA{Q?ZL+Z%g;ncJpveti;_m0?N2ca$%9N!DzRxWkr3(?9QU|^2VSmz; zaMJ4q8pl2!$3wTC22x=8yj3*7aXog-c=+4?Q~$W=oL{)0wO8ER)1lC_r^t*gDN!q7 z3Y186N6tNLo&Ax{6!P#RRno#z0yZjo2$g0)A;U{<9%?Dzm?}T^N zIib~_I9CEqR7$-S#2P$op&UR--+LP6v)i>J_xRMWFXEdxY~@*dPL=`tOv}Qq>ms)k z9%*$^QUv;fnvUD|tYJ*?s|)|K->a?YF{W1}{>am+|vRQ0u7pvn`Y$1_fPQ`l(J>foW&<_H!5SQXJ=a4$Yzcktgj9eX>_QA3g)_w}I=-t!gj2rz1c%KL z4x^CjweMupN|!zCAVhP;SEiMvdyXmmVz!Z-HG|C#k0A>mmPD)s2?~1!$Jm+@${g-@ zt`{e!g$R|6D~*D0PIIy@&9Ck<>`m&JErU4b%KdG(GCq18ez@n{u^n9u@)idM<`?UP zqwO)mQ--q)j$lJKCz4mn#ipJ7o5vig1@|0kwXh|R#2l(_G~HlYj^!Wv`>ydR^j`6` z7-@04LSNUV>c3ndLHPJZacGWEG4|=(ed)DzUPzV)K@Fl0=M=ndZ%jpnk{w!`EBL0S zA;}jEvR$*qILNT?Sl7Cf>+Dkwv?#^+TvyV6k4w2{5${YuEe~DML!JAAv{7bzi95++vr^{yOvyjZ>6mGyDE)_rp#GfPN2 z6J_TjeumdeWhz&HWcZYItDtNNIlfn;%WUb@%$9%KthP{B+xi9ep2LQ1@86Xy6H=b$ z_tjEHZI*v*^L;T`JtREW9Ff~xJdk}g!2yfxZjEowg{+qj@qJqU=`D6&q`&d`17dOh zd2u^(F}03nc@~I!gtmH^jTioTu)P#jTbeI$`Ff7?feh&zM^`hGmqJK0>}3~Usib3) z*G=SKHTrKdWjoE5F#>3<)@EZgAZqQoR`$kKXOi8QjoL`-`=s)s8bPqX-2~Z}I^ONy zQfU*PLr2Hj0TA1c#W)APE4O7zL`&)tk!!*8<;}+{44GEINqjhMcu{RZh0xUg$~2<_ z)>oXXW*8ss`?b?WSuA;MANsCyjc+2VqI%ku9%yZ6;$&KV2|QG;aAZKhb@Cy(fSww9 z+l>%HItbrj$pMCdKgw$&5~%|$ME6+SP4FiU$jmFa_QO8vlnMCel6agp(4n?5=!F!4r9$s?1Sao4cFSh?#X8SB7`-DQZtni7OwTf% z19mqT_^5qZ>~M1TtqyUCL947?KUvE_a&=ykUUE&t#Q2$DNN0+AAX7vrZ z{&HQ3;5u`t=>5DxUr8#mbHt5D;+L^t`5XUDHwyqH$CV0@CoxInu z-?X`ac{k5GX|X+=z`R}`UJunWfR#KrT-VOgD-2COMp&8sN+nfge#42P(<88U>sI#V0p?W7oE_sa|fy>O^K_ZOZMEP-)A$ru-w&Rtm=sNi%Z&BD> zT@JXbjDe5WF0;1TC`66;=FjC z=SQXz=!(Y53}K1d5@;;b=t`rDVJY=}+g4AOo@caur|~1E=KEIp8$Xh!wu~P!&RQeu z0fe53eY=F7?#MWgv3+VPd2I(;n=;;6NzkY>5z)dAMkOL_8b7``Yi%nSm83xAmbmEX z6Q@TCs?=AD2&*7Fd)v9>wZ-T^Vs^d5l~e9jQ+H4E@@P$}qkz)dgj4*0dJ=~O!RVhX zPpX^}T_rcplT(Pcd~Vm5i(y+O^2P<=ijloxCo4{hScKKRq4Xlu6~SrZ!hW;j{e?WJQ8^` z++eBv-NSo(^U!Q8RUI=>5RaxRO@|?&G`5_&c&{>z3Lx{AhKo4@{k?_xk1vCEsK8ja zc8*-1%EU*Bh7SsRKYXuJ+ z8l|?O7^vk?4iaTHHB4ic%y6c+$qnC2_KkY0kpLQgk9(;do{!Iv_rd2&?eBN$r_>|A zcZE3cN8<{TBUs+!io1Z zp#F=ENUBqdS&Y9cb6>5Nvj}0rRwDkhlI9hT4+qg@J5I0fcitc=>Qk^V5YG9F&mBAu`6Dgp&YeA2+ zlq;EZF-L?Zmn#8t8QzKC2{#6k>q$1~UoyZt@NVH0OVL!sHL7+>Uc&A&Ijh=tqtrz|ot8?V4Dbov54qW+gtY@}K>Io&IrSx+7#v;l!b>N1o<$G2Pa~Zu6q|xm8nN;rM)wQf@(hQwBc2a_y zoR+GQjmzktMGU#(PcG?h$g~)8g`Zp$!o%Vkca*aFC(>P)%eN8&{i%8HW=fSihH^TS z7I!`v4T%lC4WLWgR24@XhpUU?nLGt53jNlu16jgkAz*1gp!_$@!(;G@~w^px<%T?`W@9|7P<9V=RTwE{WD(s{IE;AKi! zIjjvQyX;<+c5x}Jvor)c@x!FJql^rs#1>934 zzNVQfizW-MOAnfj>a%0e09Ii_2Jw>E}) zwX^!veAEi$?&;WX2EAt`6(pN0M&Ex|{@^t$iTYW;!aVroZLLAWB%2(cZm}?sIat~z z1f&lR9zU2w&x5*$_^2^^qLR~*uhy&MRR!5Pm{ku(&n#z~!k4s&4j`$zh6L^f&Mf~jhJVxC)!q7nOtmu6 zE`BsGNb?s0F+!Pr_60L-vKJCuFq~=f6B68eK!i~+N4emR2lG;8S+|Y5L+25psilGb zFnl)f@$pl6^wqUx!p+3-#bpdTc^6P(wA%H&ecg4xo(%-t-aM@+13r&`9K|{xPScxc ziPas&Dp&ol$ChG}*Ki%z{H~9avtidUCf3>?*QblJ0UiIx%O>*Yi`lkpv5xlVeGJ&c z2oHD3$&TVr>mkoq;$7bSW}mAzl^qTBT((K{F6Wz#AYezc+=}AiAd0Gvt{S7na`0EE*#eBc#OAA zl@2@8w?WkX-XJO-g#+;jj;DKRE5tm&gh!_fJ12#8#D_Bz_A){>47w2&euj}oLYz$< zZt|$KS_3w9_$0$@>0fI=!$CPww8gfc06}y}U3XAqYqv>_72;7ffpg%W+_cetUFwgG z6TC{#0R?m9tH8DLv`UTFO6xT2#TEllpUKfB6AqA$2t)Fv-6zknTHX31H_bVK>GcS7 zLUq(|PlFW<^_jvA1QTx{Kxzy7D^t;u>T(Tc>_tnGK9<#(OFy;r-yZBZ?r)cq@&^t% zeJ#L)n0#VQ+WuyN5%A%$&q+)T!%Xq{Rd{&G{5s48D}2sgX3i&-@xOb{2v@9M5t`ZEv$qbfJjPq(U0<1T9}eowIOpQN()waiL$Z z(!Jym-wiWiz(?&glkhHkOtzotxR>>$3QLO#2&oeY8hRQxGA+L8ueZa>n~e3e($8)g+s#V|MBu7kQ0ryL2zfsr*@A#qG^ke;O}} zaFO}8w@*kWUb-)_IEu&9W5%PKyJ%%-lQ?grJ_I~;potJ;6jhOS7$!;2^Ntzwu?y+j z>7yn=3FUD$M0fEHrMc~anZ5-(U-N}E%v1vRo%d0j%-r=O-l8`97S&stYC+m82pXcq zX#LZDUhOGZA|XGUkGAn8+tC$R=etfXRrall?R)&&0$r9T?Jjm3sXwiA5v6q6u~1D{ zC3TD<-Tis~Gj_TSRemtNep*ug6L5DP%xE7 zs-x}rRlrH$Sqa)C2Z99Ig)9FYw-#K1#PV)`euu9&>HvQe1_iMoy0-Qjb7d#JGE7V1 z*4J`f?Z{Lp%|S5e{CV3dpF6#KT9SUMICN^pafnA?t-6U||Kk?|+U_Naop&K=dh z%i`=V)6%*h=l<|1@J&yZ_+_3KX93~9&WL`lo)c%>-4%YgtL)q^Q?mUuYd?YG52cAy zOvdE8@S$NR&Qxw243tcw6YC%Ib&z?LZwz9OCl-%q)9H86f#m0#rssQvYZhb+X|1ZP z5lcTCY{#>m% z`>uoAwh@f}L|MKS27)pCFx~ML^pZxljo<%CV=21y5}S#g4=52Jt*gQ}&pNZ=L>yAhZsgIcrrF3 zBesJArZphlCfXjo=asr1XxYOj5)~#=r?WiQT|y(p$Rs|kEC}{tD)i^r^eHR^YxLA& z!+cp$7GlMxsn_!+?Z<=Xw^osV+yX&8J!U_mxhcy4qyXr;9p7I6_EgkmU&0sS3o+qt zGqH3<{4ta7+Xbi{o2|V{LBxfFM@}=*+8n>j<6HFefq9rFtY?MR+r33*45^ zm~`s`g%(Oinyel~Q|ZZ<#f7wK(gwdSSJaPs@nJU}UrHylC7(NU7R`_Cq>@jX4!7!X z@v_lDa!Zq@m7r(%qG^s-?P+AFMYKjpzY!u@p^9Z`eN#@FsQuf;;|!u(Kh@74*vc0iu(&*VEPAU{ekq^A=4s#96-@ zv)A8=jJX_;DKYZqq-4hG=8U=wah@NJ zuG#t50v=$b)Yb0&d^VuG)=nBAT|vI){|q&Hew+@;o&fqNo;RUI%H#nL2f*XW*^!Ya zJNda${Vut<-_6DRvvE$k54oY(r+QY+fdK2V_OKrxO?8nkOca^7Xqt)9x!2bs3>H@sHZ!hkjF_N7K;BcDKG2!+Q zU&kd=E&Dfx5x^P4Zo}Te#y<;1o(&$&`o{SUwag&3h^pz19F83h7`45X2VuT=!6o+M z1rF?zzn2}ms};z_+)T~Q#oEErmCeiEE?-#BVRJZf?Jb2bgTVLiQ7P&gr~cC`gz-N8 zx#d{2OHF~lzOvTX=hCX=6h~*q_?c5m;BiC@s#?&xhUc3_6fukt{>pXaFB`8LZ<8kn zp6hkv$(8DjjjZ))d9IaDRzL?|Q@cBW|QXt5orF(RS(BUQ?J!!!QZeJ^VejO6l#u(>}sw7sWl0{wV zp}Vi6(B`ExeS^0e0V450z{G+T_0FBWCiO{k_=WPDLpJ3L>gi{j3&##zdB(}hU> zekwYmOM|90HqV5!6*l$2)z)bZR6x1zlG^EQ6dJ!Ha-#T(5cX}+Ccl-;Q&Oj2Z zQuOW05XT`4s2qfpxaLiNpf&8*X+_}X;Po+Cdb(JzPqDu!=WhW=k?<^6Il3iB2v9?R z`zpxfn@tM!8Ds8prdYszBK>ZcYKirWA1%n=aT;O;ng517GOHk8mF|`N^2-(PWB~P~ zzrIyfo>Tv&;oE(ic~XYHoK1d@;2~@i#2fv7*+Ix5_^oXPa^baIgZc(Ybhjk4IBZinzaw&$<4~`GT1! zPTW@d6{|#+>>*QRYiHasa)`Ua728vq2AXz71X=#AX@$=LlO+g z<@MdJ;n@p3)fCbEKg?6-IP4!I1CYK*aE~w91GsHyd&gB1zm%rEo|=`C_n_Rds#Sv= zM#cHko$r2A*70qKX9Q2*q}}j$d+{A93>t4m9BmF!{C*KDjjPqaV6FO$mw=Ai$`8>& z>U9kDHKJ#ZH;nKCt0fy`*<1^9jZx?q*q-(AuzJU{s_q5oMZ}Ju-J2$Co99|J(F?tC z1bzlmP(bv&+bbwj=rEi$%~A=y`ETm^X`Xr^K5JN+KSnL>gw6s6-&#e{*QAH!9rVTv z+u7f%*pNm`Aa9CPes*^e4BK=xi)SG-InpzLECqKl3d$9W0~nv$IOh$^uE-NSB4 zm8UebDC*@T)@QK2HQfs;ofQPvE|jndcR)>hb&)K-0RHN*ao z!&|L_x2vuL-AYJ^HneH{LQvJ0fp_9D-oI-IIW96JV+mxJj+W%@RzN})+0E-?r0>b?thr5FU<$dK|b4Q;~CNVk=ES7Je4XD?rgX7 zfIiHL?7oeA`|@4)Q?`5KovDd0k+%3lLEY0SYM9l%s51Wt_9p^&5sB8&WW_hv%_UUN z(suHJPe$pD^KHy8j7w#Fb|pf^4oE*@Y7A(+)3*G<;LH9}{so01@1A#Ln9|{k_Y*Hm z?`o05%kEP1wP(!}SzI)PQH~25nATHlvcHzyCFbV5ly+)xKHuJYrJg8lOZ%joY)uE3 z0{1rgB|Tgnw)rd5OOeVd>2t;0QT{eZorm4z0eZM7ILc%V2DrkPPl?Ho3~=sfR~rs# z-8W_Yw2?L!%E=btd}dci7T0xq8c3PpRC! zY4(F<{m+N!=0KsxW9?0?d||MQaaj6gl#JaDx znS#sXxyH*=A^B~3Y;xjCWM>0qCcoYkkS)I$n|342^UYZD>i7oB8w~iOlZ?6BBKvKL{hnAv>1>%rcxgetyFF# zC0##O)14F=H#wjG+}^MHL0svJWkUKdY>6|pR0-#m&z<2B31$0Kh0z7=-->}tdT!ts zdK^ivZ7lz5lFD_!Oi?`%8^OxWUTsMob$URCIdq1RnB$SY8^@P|U$OYO8pY-WQ}!zJ z04-Ghh2ZOt;5T4AG!{Wg+^F_pJQ6f{*`XeSnH9tS* z4dqnE1a4>E<5D>8wqn8yaL&r~((*+sXjiwPZ9#0sI{8we<5~KLPdoGk&PpHCddc|f zYpi^N&^gihnWe^~HQSssM7OHkS7?E!qH$pvq22zOA%oA4CC1;c+=+<3c1C<74IxUZ zWK^Hmz(;rne*aSBKin6Mi0NWzw`|0Ed1MthE#dj~31+&9gOB#>%0kPGf_NuM#U#;L4Yad`x!2l}x<;^S)6$tH!8z@#$+6y2wW_BjT5=%}L1PyOe9KU$hd$ zhS|(Xzi_|y$8Onx$7oK3)Lre54Hx>w z=5@+c2fn;Ry;p%=$=>Dnp+7@-d_EGn-O@x$Iop(F=`RfQ zl;!c{ne)7V^qlwpbJ|Gf9ydp`G^Yv^I7Ox$Ka4{Q_Gb$csw7%%atyD)AitR-<4 z9ePCMi|Ss%>7}sP+!}S-G_+c=L`@4$be?yW@%~0eC?!EM_&qGWg~W7FZMop926A0> zXc+YtcggnZ_~SuDPE#5LTcr-n3k-N7QnY(CTJ=<|kZEfX_4 z$dwS0O}cjL-KsK`c30OoFIy_cRdJFNHQ6qba~<(3Srg69%|^7V2aNf5S3AW`j8V(H zb9T_S-{AvAUxO0wLMB`;M#g>$92elT$|qL(5edwI4ic7h<*oPIK);+!DTqKz7uClR ziB2hEZv-|Nk;jSC9$5W?>iOZ6@`E@5-ota~;UGoku{cBp$zwfm3HR}0Z)^LbKc&Hf z3gpC2V4~)v0Ly@szoITTudQoP*j36=+sb z|LHb-p9vFPdTb8y*j3xNjMGqoqDx@fOmP1S(Vd<*c7ECLNd98SEo*hv>8co=fZxzO ze>xX+2|nEZkNB)MD%M3IIYjWcypR04*bNEtPc4!Tt@D&|62L=UblcZ;r@vYod}4x$ zJ+7$+r6#Z%&PUMSpNR=q%dyXZC!>@jFfX_Caa1-cbnWOqNBa)FQF=UHUkF;HkOA8$ z^D0Yp0~0$V3{Vrs3Ak1IBs_HY$;eFKQ^47(lU$)*xMt=bjf!1eafBb>uD+DUkRF0( z3JsVM^K3QIi<9fdnIlg)lzpO})(jrQM@CKAuZXhmv>#b_mku)7m;6kxT)hHj;3#z0RaL7L4;=|^e8>wVwWly_^s*O%=gJCb}td_TQ zdJQr49BF;03N2^Rwy^TSu!u%q6$rCtf481m$!`mTRnvGb??bIs(3G6isQ8s(ugywr z-?e)8j19XgbJhEQjSA@c!Yii&;WyK8e)FPvUVb-IU2n2P^Eir?+Mjm53|qgx@%irh zhuhK}Iz{lO=WXI=cq*kWJo=ElDcEjdKgk5np#v-87?Hgc`y>g{S2DT%4 z&2JY$GfOqSnU~W1c)SL5m&*H~m5$wo1LK96%R~^MUK0Mc3;Y5vWqhMClecI;8JJU`SpD>I5DtgsQTD{)AOIYOFt!XqX&NwfgS{`b zI#KFsDe9{~6ymTPm>e=AVMlb zF?QVFw47*m^;^nnRCd~<#QK(Wcx!32Q*ean!HEM&P<; zwwe~*?2i3tB)CsC`&rR_b3CAglXedR=ykw;s$-=<^3MT4Ze8`Sen~JmZSQaZLxC&Zor{J3<&AqFu!GKRP=1{XdE9sDJ=G1~B;qSQYy;s5|NQ2+q(OXX%v21o!g zQvZ)3etB#erwJeqoMS{1Kq9Eect-&BrG9k;0C3^Ozha#sZwAUq{Zk#>mbu0Q0Ehl1 X1He&+ry2KTTEQsd`uHxJa!tVBB-m)g delta 53475 zcmZs?by!s0{{>2ScZi?}%Fx{@AtO1=kkTCz64Hkdkw$Q6>6jT(Iz?&eE(HmtyPG@j z`+o29{O;H9KeC&CEPsTP z$o~MzSVdG?;M|KaVLEynM?Lb*Q6`T6^(r9H>>zq!zsPy4zOA_K2h`St7UrMrIdt@q zwuQ9GHCeQBX)+G)Vg2Vr-C^9RduBp1{Hnc`u8K;l5BC34w@##kdLfn>>}z^^JK}(u^V(iR* z?;1m2?_QCz0{Bad#nQn`O?-;kmNcP1g7@F{u)bW6LcPi!)z z?0-fC6o0tODTdFnkp!+ZhqG--NV>i7CS~sPu9@;uDfWpLj|kBY-(2JT)>Uug!uOG0 za+7MdJ#`EejuO(dmv}PE!bDs@l?r|K)ZMvAS@Ug4qdz-jE#^2#w0$*x_1Pgn(wv8NTrcja!4H==&9{Hfe2!+U?(Bhx zq{0A_r2#Mcv4g)~F%qt# zOa)7d-=(=d-%HF|5ZtML-mmcBv*G;ma8^xCi&~TKedF(;uUja-{?7jJrv0IlNBlG0 z%dUTK?|z;dZ4GU4N|)7``i*k$tIsT6zx@roJ=D=R-zuLT8dR2;M5nCIjYQb0qieGC@4o%I$D3A3n+&5sYBW7MmUxI=*@)b5ZCWokr9)1hl(VrReHve?R8aHl`5Vp23vR+%b^7h5?P3TDP?fTI6&%(?TOigE+l9rv`w3&~aVU(}c z=utq!s%x1WV1%7U@W3?FQ8tS2}#6#IoK#t0Ulz0POqaVJ$43;XDZ-zA1c z`4jU?+)~`M@E4X?OKG-x%2>_J*&bESmzlF24Wwua+Vm&$z>o=8rPxB=$HGV5bi(}KMv-A&)au^UA6+RlrZttD6$OmM z0m@ChP`$R;gAZQexvVmZ;vZ$xKIdklTOs|FQBsb|JM0^gsI(uzM(m57-q@gJog~FU z8@5ajjR-XXpC0ILnyAxq>~2)rl!RTRTe^#tamg2xl4Adk@5=gveMD;KsCT~46`EOD zBc5q6M6|MX3<*Dyqol}@;c|SiLU%p_7*sP+yixk~IQP_gX_O-Wv0wdh^qv*h6zliK z^kS$NPBr>$jsrFK?1leg(nE2DeH1RrD6S~(XmN7dkH%}D=ua$af!p+|LQ+Q4hM@{lDxToYh&#xhOj zU^j@7aw@)wr$Y-v4pyyF7(V;@!-@hFd|+fxF+FY;9>lqNXdmcqhWFU```v|0TKD(n z$c;3N;Q<9NEb$GE6SL{mQ$C5GnDWS za5AH`9yoMS%YbO@CJrz)zbHM$+h~51xKZ?TDr}&7p=` z-afQ=Pp&tUD8kOIsXVz4K<3iLs^2J2lJ~gr;pI3)^vM(x*k~&Q2x`{1(HEz%=6vgx zpvU>US$@Mj1VXu^#)FrU*I7>2gjE`=78uz97A%nj?jhXb~wc$(Pce zr3VAv&N45EU2j1a?_m;;B)w40{=y?pRG!mO=aQUUdGDAxa7O8PkE{4c}!iC z$EfM)5hG8j%m&K#u?Bs=>un9(RXf}RJ?$7Z_tcMM07q92>7=rT@jhIylHrdKDxC(5 zO_F}U?0dRV;%6d@S2>$WJ?@@wKW3k9>#H2SlVqgRI->q@pjNr(&lQt1&w-t+-S(-f zIQQaP0GIQdX#Ro~?nN`w{r3+mtts5DI4m{cPVF&@ts~(CWk~Vi6lVDzHaWQQiYLj+ zORf); zDD{AwI0uaKDyS|=+WJ{oAyP<2(DAsET%C9VIHtdK^@T7%i7)Ith4m%7*9u$csD39$ zkD2R-P3ty89i>N$mZ(RIbck~cb%@K%oo0TC0F$gT-gbgU?IIe75|=G)O~D@CO}{UP zb}iI(Q?!iwgi^U)_P6@}r9Tz$=6VULTNg;klk$r87eojdpxa0?dBT_A?JY-^1ur*j zlTHx6={LTJ_+8QaDCKd4nzIkFKHmJZySh`57{L2)I(((*OKUi)gJJ=Y%8=6 z=M1I$O>i~dVPqk@O9!(2`*l}V!wXE|!bQC6+I=PDaA7#qQS+|`5@qh*6>pR(iJql& z|1Ynz1n`GHA}rgf%Nc~M;Se3Upn&@HAQArYLp|B4HnR&j#$yOSjS#^V?)w6*?G=F&^ng|2-n!_GY z)3RHVnbi8PR6QTEG`yS4R8NtwbB->Xuj{5ajH{UVt38+e;_MeLj3*o=L&#RT)NEZ- z4AsE|g8$x&3_{l2yl$zkKG_sqMBO`)e?)a1>qM{H#;#9^WX}j@i{Fml2yeJe_}-Lg zEOz@YI?L?wtFl_ju)aJD1a%)v_8i+Xjm$8NGz6=EFtDTh%^;tDF@n6Pe0C=t9qt92 zRBZPDGE{14GgaYTbG2*lRm$;bJ!Af$RJTAH$k8dFhr7VH#C|zUZnQm}+z{a~=BQg1 zEl{PQL2>nBp2x z=tuOm1i6OS%qsVD$6s6TFJ6bDF0WRE{;kmvE0^#m$3NJb@KSWl=HeN`bg|2b@UY4< z=4kPspXRuD-aQ{u2!ZMjF*qCz*(#tAVD_Qc0l6PtVuWyi44$rtRg^lRnkzu6LRlg~ z^|J-^+iqDRrT2Y?Sjdin2w8tqrFI~0uh+EG)^17kE#tHvuIM*pG}984-)?e^5BpQJ zQ>WiVGbUI?REi%6cgKK2nd37{NvSiM$2_zVUn=zoU`{_QMG906&=a%%EO6^9-Nm1E z*V%-bXs*C5dHC{uoOx51+{h7@ltcD~Vn#-o`VE`rZbnjZrmw1>Z@ff7D`49BJ7br4 zjh>{*<7*{3Y2iDh*ZQRb5x&eR9vnN{O4b3TBCBB+kR^ZVKN{dqhn~kVIw#M4OIc_c z^DW{Dax%$U;rH76udW(S#jf_8q8u+4re}3)L&w|)6PbVQcQW3EsO2qVB!FO;GEDEa z9#ZG(whAB5-+aVi(%jD~1f*Qbxw~aJQyq`3k$UYhE=cE@HB%8bU4G6)he_*k!E8st z2xSgf{P^IvB^MsMR*%(cTsB<F(28=rpcULm3;YK_U(GcmU}vn3<&vEYYS{Jq^4}>OS26&m;Bj`_w_mv zF>XXWwJbJwoH7N7?$lmqAjVCJr}ouNH~Gt*b#CE^aZBP^0S)^z=$NG^Y$-EsOY3%? z{TjGpR`@2qThcc)=yL9rrg!=w+-QK)P|76_JYfVw|LGsJg>4GFUOwRcYAH3xo!a!{ zK5j32?j`+0ST)Y9rmyEWR|xs_U#&rle)lWvOnHNhVW3ukK?}9jPSZz7PrPf_Dh#8V%)2A4i*y+Vw8_XmP`HQ2W#H8z>)$gf?yKizs&y0uWa z=RzE)cU%YZk5|1%GY+08I9G4Dt{nc-g=dd?69<2Y;qQlSC9F0TU?oo|Jz_PNJ@1Md z9`jbzg^L*>)+7f53vc#$jZ2ca6d8w3)yp+m>TeiO8pukI<2{CIW~wOd5?^~w950{fvgeI=&G zOLuqA=N9O~e3y1Hzh!5~w8bj6uI&DeymKS_AEr?;dU>Hf-onszEkYo#azzn-!4Q7& zss0`L{|jpbKe*i{Q2<5Ymp302Z+LJIx$zFA@i66WHnC(~?daA@~*` zKc1mbeDEGq2-b=o75ac4O0ntb;M$-NSVAhWwK~7wIDIs*^qI9wrbI4ukiPu4MTZh8 z*>fE*me^q}+(vg~oE9#QzBNor=6{ss>&*>cxmAk%UViw>-eUhkJuo*1}Osi4^6 z&3Keu&8gMA31V94kq!)M%UO#OSyF)eyH!-+^(m$-*phH>fKhjgoIX&cP z(lOvsaGIfk*`n?U_#bqA@|$3-ih#ZNE~x;6`&-gNiYdf?aI69fxu_(iZ2oX8UKbDM zTPZ+H;sX+jKM&J|0cJ<*tyS84WB8>JCdhYI=V1*`19u~FnBW-tj;PL1wtvn z+R`3$K_Nx!Uc@HqaOC9V>7$X%9?57_Rr4{q{!`f$9|X#De0Xj@Ol}8_$5HbfnlEOS z13cS4Ei}*p|AcTiRPIBn;0J@-ahVS@JaxE-#pYB{c-^}kr#3V!*AAa;`q3N7(ACS( zRZ`0lNReYrVGdRdj8hEUPQ`)2j91i1R_q_XJMOAv=-Ow%(SdOg<)SP!b5Vnt4IY`s z?vHg~>fRFiIxy6huM`>jp=%4iM%;bDhIKcO1BtE~`ziERvQTKzJsqkuISf_Z%!vS@ z#@+wT$c{}MsA!be>?_)zvr5F)%qB$@3NS@l^oUHBm@~9=VZ}@bq{6xRs3eeP2?ABv z|EXj#rE%$~F*9(P_TqsbT0PxSZ#ye(z z(rrbzy{3ibqRcdOVEGw&D18m|T#I^8>%jD9KL38LD!!@sZ)i(=;CgMD8tFlyXFvg^ z-5!ziyd$$tFmXA3`e2Qn(NR+e*7CU`66MQ+K+y|_i=qRV9}`Lo4gL40(uMmCA)C*c z&;t~zYzo{p1GI9{u;nSAE)1QA=}|ixv@qL>m*^P#JNr5h%nm}Km#1Yks@c+(Kg)ay z6FL>WY1VkBH%ClAN+zvE{IEwu>8S4aQrr}5xe3e2*1PT0Uf0v1xDREZ^j6NVCq?@y zl?`q7qhT_BE*U}NrqC$PdE zf%6lLldzApQd#l}`UyoW z)lR9kEhnaZmNPGnY^L}em55kg+~<{55Gl$WoJ|Dp*pjT!x>d@1?SI>5=*kRMPlQcV z1^1uw;T_76cDN;6{GVhD{EuY*n~m8@|4YUi;RiYO_q5>`Ea4c{C}m6=+A=~LxZea2 zk6T(AMjcuPMsW-#Wjj+=EN^s z9z2+>7MWMt4S=W_lX?$MIy^HGmvXAWQ>Hbh0X{hCi{~PB(1(v^OMQs51D}c>()mVh zCT){O6HSBuX*%7MnU-;X{*p$9=tF2m$gvT+s(*Fx>~W!U5hbtX^leXIqwu4d=gb@ z9Z>m1B2zJp|72-a91+?re;c!88J4daotqr{cCIAK|KVWB(JiG{cUef*x4@mA1h%P%V$XHpgDmfm$7w zO2j$=jcDm=)N@`H9a-O)+%V^=?{5C~Onr;#NDU07)??}dzIMrv!Fne&CJCFb^RAS8 zj#278ojqR>;7&BVJN)%?7nY9WT?d8Q%D4Q(W!1w{&Ah#Uwy^9BRDQ`FI?Ky0Mpva! zlooXfy2`wRLGt7bIbes66DJyLZj}q~CryNgvEP8W(gG#)UK~9ZJAu>mJn*^F2+8s{ z8DP}foo{{H{r;<&hS&+3#j4UPXz9W)J@UXWL2pn9D>ziAn^6m$v?U{&t)82!!?Pz~ z5Du762N0bl1bZ%kBbnEGf&J&?t)q(8ZIaf|jnYr-m+2m=?eCPkP7j`%2~`5nz-L@v zW1}ncSGExs#pcxlT(FZE;omCV=b8CIQ7q=S|3x*F=>GtWFT3rjVHcF)7wLG*+6K4( zMs#Bfw~EJG*3QNPN|p^TAnH0^tCnKrl^)AktpHbLn?VsXF6U{^@Y9XAZbmKvnR^Z| z3=G7N8w+l6IH#bS>dC=-8{fLYPp*d0DLSTk{GC1;Yaqrguu{R^Z6%vLSb3dmsPo>9 zsyYhb5E^;xqgeeCNvo@PIYxJdL`3AwnWsu;v56m)vu4YS$->mbVyw z3V>Ser>XltwM+3uEDHZ1v<0@hxHG~xt~;pZLp>r}YK%@Im+uZ{y+v`+GL5X$gPx4aDdF8VtjulTJpP@7{E zb6lg2t?K0YK?E>qU1*&+owHNaMipS1b>MC&Ef+(g@v?q)pnq8!B(Otw1_9p={%_?>YqGria{Ih`waI` z2=6eupK4if16ss#9Da}G zrR0t#R8SvEV{gjYLOEc@?cyDg1s{En7Pu45e{c|vHoL0TM( zf*1&(%m%X_YDMxg!mg3sVqUXYKeMP|QiEUA5?-ermVsF6b!>FDQh{di+*Oh&+$>4_}CmE)XE6wU4 zhIED&eL1kr@|cGV8Af&)=E7*LSsZvu!Sq8W47H%?sm8uR$+pkrJ8HI?(Du)9g=+$% zCy@67Tnf^1l1~0-C;YN8eUb}CwS|y0LFx-{kH{&I$&u?0tOk6 zt)%|-@~)Zde#jc~UPnToqaqUWir@NGiE9THF91$ zWnK<`ogKZ2&m*5Kpic{6RGmFrXBE7s>;_Pc%nNtXEA0gm-wae(mP+Y=X5-S?*Et7G z?<7R#JJb5YCpUgN7pSJn<*D+6Hb6*HaVAm!eL_(wO-uvz*S>j@Lc+n6G4F3j1@389_L) z)QFJ$s*JFI(*ZE(CgJ|@g=;^>3o9_sA0}Qw;&*;JV}{q8`EZ9x749)aJv@zlfqB)$ zu~5_K=T!L5BuNJ&4ui0ih*y4I@u#iP@i4HyE;dJulSYc49Ucp@xAb`Ru^GM>4{9t2 zEv0qVc^B~0JDn6U@;-3vKu?EAX%AX*W@hvV_@mam1)yh7jmuDb0sQ^OPB%lH%kUog zIayhwD}E+F`QtHNq`si2t1xYDJBp4lC8aMkGDKAEez@E#RG8Ta*@T`Z4@XIW9RWVm=L4OK2z& z3#$K<4ybE>(J$S%J}fcd$zC9_ZYM6SOH#-Cpx<)Y3Su+1YB=z2pMAk*R3ts3w;-wR zZRi7QK2BRM9s-Jw&!VYs#*8~dClc7O?M`QuEc7#QTd_t+GPkpECtPqCt^LNGmvW^wTjwhlv{#ns$=j@Btzq(#NV()jRIWZrM`U z$DL(*>JXwPVE56W<&Ub#T_yi*WLT>b1$pc3%?^s8i+k~Fo0`G>Zs4(0fBR;tM(>#z z{Jf_2t?u0|ryDE$dZO!p#18U*XkCnApmzAd+|@t4j;?F?o)_5rui(PNeQaYG<;~iO zF#vwWn9A^(YRXg7+ZQIrlmgvUwKYM@>oH6)CdlG9LxXv3Mgw(k-|=i?n6u@TrV+Z4 zHi{U3Q&ADj{gkwLur!}&jTZ7C&ERiSSWtH@TC3c(aa9&sI_39HZ|o5*%6NQw*3beWHrAj zE$q+BtVS(*2_2BCWlwzg_e#88It-|cFuGJxHEf!S=Bn9}Hx(?^*Ek0l^|$<)imC13 zZ8LXG*|Q#?nk_hm(y|G|ASmdi481xhxrG5o#8XvgvrxH74wLVd7KR*CBJ6vE_G|a2 z=J$8ob5V+*NOo1xWK zG$-?|drSI)(IOFhZ<#3|t`Q}d_`kOPRhrSy-}2hhe2-7#Y#NCUO&+R61`-^E6{oMFSXw5SZH%y@PbDk?WTY%^W z`{=}wi8!Z=ZP|J8+6=s5cX7<~(uiRy7y*1(j0g&E`y`v&Mm6)Ct0Gmf;vWH}0xh6) zpP~hn47g}~PR~CA%1GGkrTVY`UjgO6m1cMKA4HdDxcU#T03<20sEDE`tXInfwxS<-O=8*Uo*&w zFq)gUeB_=f7`UANvdp`S>Qty3zq(Gi76i%{nEs?Rl1bmr@4g~Cj+s(LQCGud@7dJ?=VD(;p?Ey4BkyRJJ_$OE`YS;ba&J#MN*cU!ynt|6%XO#};2Ea^ut zz*AZP#8ja`la$h!H4P4|+HP*$H1F%2rHsz4Uwo@Nne&UV8OLDIhxDmA=8>J`bW|Dg z7q_XjjcYXi>}3hN^~kvWpw1~X7D7DrD>*bkND@R6<890N3K4IFyI1x4kl34LnwDW2 zO8ee+WU5jYhva1&<5v!b-~Qi#s%TG)PKetTt@;lMic%DqNc+#r%EUh}%TOS$*rrN3 z=7dTI1DCdSWEPwGLMENdPT=A<6k58{hZT+VxV09NmxGS~2A*I}(!%su_u9xqTbd^t zn!{ehwutI!q2KF4-nhS`Q6jUg6(L5tqP_grLy|2=^!D>ubtSGX@89$dNy+h@Ws`M}Q(aVtt%_4XtE!5I)k3?<0oE z)-_M9l=4)csM|++!@j%cYNs!EY%WACyMwNwaY1>I67Yh;XRC#cw#7NiFY)()7R@xI z`${lw$mICNQ;ILKNz=l;b`SS$_`V=naG>P zv^=-SRb#Gdrh<1k=8Z6R0&h|}W+LS893~e`Ch8QH*B$Z+OvK5kB@>m5%*P6n0yrg1 z+R)PP2{;jQz1eDj583L*;o?ADSIs2d)u1$DiP5x#Zy9{saGvwR0uQofMqLO+awkF% z=tJM&{+5uf2q9IR-OTiZw-Or?jdJ|@E7K4DxM*8m37k-L@9%T?YwyeAwd+do@0fH- zqgJm!MeE6VzN!GL$Nh(Bd2yBGvWj1m`~PkA&z?Cuvf}FK@N<@?G#eEDK(kP-sI4Y* z&llg@+!d5GzlX&x*EE5GiX9NYwPphm+dq}z`X&{|PDIfkz$lU2$N3QA3AgBI^y|F* z{IiKnUfvMm?&vYC<46KNnum`Yb&9N(o-5J@J}U(bEjT+Z-%rMt6^YCVQy268eU7fy z3z_DBnwr|TZnj^8$bCXywNWlT{}SQ&RM6$(0+jNso38ltFIK}Fd;IiquP6wz0$o&# z1oO&hJPkzT6UotR5y?SyaaoCYN#vlA1*z|j{A4zhZuZR{^P3&7tbLjVSS0nIoU)EY z5=8=I^cl?KvidfSz0p<2H94KkZNHU#y`ogp@Txlo)gaPmp;s(lF9gyHy(TFb=y(fD zByGusjofIUih)AJey}oeB08)?E@bt9-qA#`R3&$xHoqNW+8ti_ox7o*_~hv{Tn@eK z+kt`VW;|Gi2B8kJ;v|hK7oEnc*;$Hd0EE0io`T$^Rhuq%F%9A#`b=jQ++(K0Ieq=I z{a1wF_DYk6Hs3-Rewm6D)$AK~I`+eSMb#YwIQgl6U#Qk%&(d?F@T&#rY$W|4HhQ#u zk;aOl`cOG^)nw62QVU@)Z@y=CAyzC4Cs%>_d*n$$a$BSMZ!FubKXC>=LsbLjrigfC zDC!Dr?a;r_(VHq#oAV@9`$*2b{Y~~wz7D^8ezfPeD2Q9L{O1sA72LHSegl@QW+;mD zWfk0i6as&TAhEl^qVn+Ucu>^?B!25@3x>+!{6>yuWI~Wn+fu8ie8Sz_hgfw~lUSg= zvO*~Mv7rl1J|155jL<0N2B1f+-iU|Lrtz?O&Nwo}Dwy3nZYcKd-i4VXh~B;3t%%Qk z?-TFZJ)LCq>w)Ej)%i2~YU)a(J-w*aJi6Y2C-DaA4=Sx6pq)g=C0z}-Winrx8+hDy!LmkfY9dQ#5En@7m=*B4f||x!43j#!6&>5|L_z2rx84&V>BJ zL%DHH!nx5}vViy-_vgA6MZw}EER8Q%w&Q)rb~3H0fEHg@y2();V)$4TAKo*s$!h?OuqcQi6=~J;*NM{NN{dg_m}Bs0bc@v&^p!S?v94tq~7e$ zPai@RbA@UmJWHr+LeWZ)WWF9249QIZE!#fGiB-Gl9aqe?GF5v&arx2u!VsW=MP9C< zyt1hP1jN))hC@f4IC7*uT%pbioO|U-i}m;I-2WQ8Bt%xg=3;i^76Sj~m0HZ*LF{Y0 zM~zYPbiZh_SEJ&`H>=Kh-1Ul1Lo)NRj^~sfv6Muthg;`d)d�=d59f<1|66Eo>C6 zCP~Q4pg2vreVVUj6)>54Il3q^rDv~wALPv#m#RfP*iSB}oze6MKe9mh1kOkmzMLB{wcym`>A@%y_o z9SkPI>XcDQH8i7ptl3`M0F2IE`%V2c1ONDtqu;i=%U{p3wcjh6b?-{1;hBE^>E>axL^ylIBQ9-xNfr_H^EWL;l;9ag}#Cjy>lFqd0mBE1UJ7JD0pZ*s!{i?SZIp-oL;pG zS&@yb;E&&vatwah>2$Rkv+C_+6!IZGO0M)WM73=70*C zp$$v=vC#xFCyue$KMs`4vW{lzI^}2Eeqfv_G>(j?hB`AGc;J5yNSsxixtpt|Yj&x4 ze~R`fHS*|Lx1(rEA>uk^h@P(0w|uzeWXEBw((G?SaxH8pnK2{smc}pHvclAnBZn ztpUwNZXFb&M{BxTkm+@v*!g1A=hEy>n(ZEqdL8EE$jQx;u~H(qlM}dTUi9r&vU$Lt zr&P-(Iv9{V5C(*{1@lGJ!pg^&hr@%;F>OJ|(8wT3330sDv1NlQK zj#aEm=(STwiL!4eFCbnQQZld(U}fL&?S7wt2Q2IbAY)?WCfDh2Mmb*HopZD=&)nq< zlajBN{CsvMQNgIt?aV3v$tvJt#5y3o>kJgN>m9qR*={p_^fOp}U2@sI*aLW84)loD zg>5&;cL~V#2*|-ZF0$So7SZ+0F!VGq^qh;3$Y0-mhh$Qs_Xr`kHjmf;88tlK8bq(e z--#tJwOGE2>daAae+!0=h{B|1P-HKMVVf@$4Me9b^=h(rOp!zT4ho8fqEpM@0x1~@ z^KR94!hRHzjG75%?D{1;7Qp)_>xbEpOfV)13T;@$!7AzH*uJZ9qD@9tF}{g<_sDKF z6W1^4@AI8xVBPV)@6KRUUlabm9@U9RI=cmpuwR>(g4c(oGgtdkiz6c0*U_$l z+zMk;wP6udr>!TS^(Bvlqn7=Q-(^^hDZER2oiBvYvDc-!|(o zEHue6lOH(<&-l#*EH=Nsd|vWv*Qo2do zZt9v|>Dzxyc0uhTa+bHh_S5evurC;}FQ7zAo+L{HLLrrcA#S%+cb!kqWQi@-Q(Nyo zVBY-M70ttWHpO~(mJ9TqJUt-Bk{p>9R6)b$ByL=AXOW)z4Em)v~x=4)5*v!Ay`Le#;$73h{bG~*y zziw0Ruin=>H#3P9z}MojaFT@{t#+tpM6v9sJ^jh#>0ZanaXnXp6>=QbAY=u(|FSsF zxP7(U2S1JTyVDT*jskL1f(hehjcAteo!eEFxod+MJ1<0xgM*|gCtRh|73TAJ+@CYy zp+lRWDz*vH?dnG(Ev^K40Rk7UlLWP9w+fuM#v|Vw&S$J#|4s4U9GBAlmdDDILXH8Cr9Yd#<~h z+taS~YHEC^z1y?vo1R(CgdG3nw{c@1=v(5bavWM>7N!YvB=m*j0`q@(z2Av!WYa<8 zeM>R|r~o-Y0Bxz)$=zS8T#3?$Vxi0KEfG%hy?cB&<6`rA@j2ivFh3tujcPKrqqLe^ zAB%RozYtBmY)y#+rp8o=L!D67N1&jJyAXZ~#GvHei1fM*5f&EA1Ks&wTltBOo?A8@ zj)<|h+Q|$AP+zfs8|fpo4l|dd7&xeg4}ee4%)@lS&aL2Yht@4TrRJ{)6669N$*lEO z^Ket*g*w5~iNh7(P9#{}Z+zq}I5m!W--(m3J9`A`$-6l85V*N;8l~W7-QeSFeW^81 z`76)!2ABgkt(JA@c+;TsRh)AV&Fy?EYXBnZtA?^UN50#YsH|hRcFSV*r&g`tY2Y-z zh^Ki=(*|h#-SqVmE$M0(vB~waeXUxYVEcLs;mirrrVU!XLGxST?$7ra=lci(i76%x zd`B}XcN9If1ibciZ@QcuBg?5QS( z6`z$K^iysP9aU}014Xz#&m&zx7p`xN7&m)^1JgV;fEdg>RuEB5<^qS9#hPFAF{m<9|Ec$&wD>42+{A( zuy|#Zf6R;3L;H}F!Jvp+dNgA`#=Ir)!(W42*O!Cn=@(zR__9Dc%g^`Dad;{>V$Ulr zW{kn|tkJ&<#>*t-MN7)NC15~B8ekpSq?ClWwiW+TFizX<8(PWb$pe>F9kYtztr4NV zy-LUIGS$9(eulqUk9@=AFHK50*fWT(Pn>b6a#XL3w$xTqk^rKpo2bd@5X*v zTjUcK($JvUK2K^K?mt!;&5W%Opa6pyoK+LU>_lcdf*(z9*y6sZYX&9uXRZF=4S^=H zoy`Of7z^!UB%*w&6+oJbmgW{Nk{HM&tk=87!`x5Q$rMn90?no#@Aid;Yqwje@e{Xr zt8pMa_UD8^JS%ac69+n{yfwu_D*ImOks-Fm1|u*F!J>GH(H(|V-7#k&sEh;ZRt`)^ zR24g4^AjSqC%2f1{ye*(lXddjtJQF0EH1hlh3J{V%dOVWrV_CKc&)BW%<5EQefObM z589AdyEDwa}jqyPE=T{y(;lb`kOwXOb0@_9wF_*o$7%JLhzH zD++nP0CmV3d*c88Q{z8@=Se9Q?hAc>&L^drx~m5{Q_>u$^Kjd_x}iWb~fVq!R+ijqKat9UP#L%bGA2bkv_1^3u6GByrRz%`DQy%s*x z787PIjXDoxtqe#?Iwq9zUr^7{&g1)hAbSMwU1>r!4DAXg39IRg0v?t@>jNRnFB}9H0JGS(rT2ezx4j z;ZnEVT`$l5Ky9trp+)HAwnqqY;!W6_pDAN`_ra4TUWN6q4ZB%ot!o!?0@Wnal-i2& zAxDbhkPG2F&HkX^B(O59O;Q@@Jo={j2JLw@M;o($c`a?h994=mb>Ux45b-$T1}yBt zdFa;XZM4S&dkxiDr<9q28ZYkpy4;QNdVEI99E&G_=oHfy8V?K{g|-F>i<-Gy685KYrDnULAD5po-y2{Q+E4p$2XkBlD_5m zK6WqnNMfx--~D3r&l4U`6hRnB)0rTn3;3PH=0L^oD%yqDlC-3%I{$3Q(wD~>j3dr@ z)Q}5#66n*7jOGWc+M7AB?49X~N7YiV-fGTnz4V=pa}xD{kw0_{aKEA_vuM}A z@)fOkjoAvsW41DOV{F&ck_5}0q;%JZ-nIB{-Zst!lOW`khZ01Gv~`y}TTeK~ZkR;d zPhVn182@Z}RGO)Jvmr0S{yC?$?a!y5Rp%Ab-}c&^Q+I0b;?E68_I?ZpkznI1dE>@) zrqd5(=KrH+3Vbge_a%w|qfRoE^3EnAKtsg?nGgUbu8K_)?On7$hpNp~%F_@vI;4R) zHb!i{l_S9Z=FoKcgY9j5Qu7_3tbs!Q9h*(JUQtDeO8M-AoKesB@Xg`aOKFOGLbIOM;Qu%uc24l zA`WQ3PRy;lWK70t8Bg1QZEFzK!b%ijz}7fSDe2rgIufM1LsVa-Ag22y^za^N5u`mn zNY<2N614Hpbg^(cj%j0I1BV`Us-?jtDTq*skfs@%bMJUH zaj0)>U7v1*sgpw4ZNHCo*0_|eOdgmTKK#mx=ULp=U9FwW_4weIKW z@E#GW`mvUvl=FI0+0a)Y)%uh=na$eu^CqC($F+z8PW{<=dup1YcNh;{T2PckI&clU z>igE$liCUXnUwB9Z{$mla9W^z~p@=e;FW!QfVt}7N zjuaC2iEJQy;BD87qR9>@f{_|Djeqis?}oMQC-NK8aoj`rh~{;3iI7$g55tuEshk=4 zqvl0k5MEgAv!=IxE{QB0WUrU);oi>2uOF;R#5-%jP6-oN?ET5WvVsz!T9>A}Lkl}g zL1wa{I0UL@!D3`2uoEfAkB?$-gP-K~VEbRFV1u!=$JyA+^3d2TSfA7z-~!0C+{6?Y#|41Y2l7gbBoFC8By$rx} zJ9!oG2V<#+YS&FQ=ju(h6g2sjZ}SC*0qDlFj% z4E6>p?BB=clI>(DM#ljL>~%$qEkomU{nril^!SK*ZZ}`$QU3sULUEwJ(o&dt+C!0E zif3V(4&{vTjzJ~GbLvSaA(LH{j``@Y`$>iHb6S(pRYL?s{OV#?+VKg=xc1feP{p#J z{Z3D`)x<-qLY3M{nEgV#`PDv8pbffm?}zd|*a^?$S7Sh5oOO~mkES~;Up;h2EKi+I zj>~*NBYSibNZy=Y+i%F*R2hFtp=MNfj9_9-+P0>mTpZXqMYNpMQQ1!C=qat=B8 zQuwH`U3kP?k)+Yj_0`g;qtZ3Wc{)!>n9KOG!fz{wNr~$K2S`X*{K&Gxk{o*L6-QqP z!a`n10PS?sV(|JGbC}XHP{MZG>9BY!{$`Gc{oRJZ&olo61cIsaBWW~wtu3duCGU28 z4U{rex^PUde9$&mmzu380R21eCwt!Oy{l%X zmp*)HL)|tBL(J87`L6LgS21ra|BCOuH9?$61o6WjwEjV6cGJ=2rOG_Aff>)&dCJQ> zmb2au$nPl?_uU5Fh(T6PlC9zvEeI4; zsSRPTbTMwMLId;9g;+RsaI=@ohDIW->S+=z+k+$dU`~SF_t1GX#(f8+V+it?6<}Y` zHs^mY#NZQyUqtpZhYUnEhi^?c&l-iLoMAmIBUe4(#%@SqmN3?DmO5^;!rD@NUQ4)= zlko7}&PK4-EUEK?wYe>K==p6g)YPbt%`R`B&^YJx6(y{JyZn-33D#ST2DDjZ2OPJB=lr?&HsvQy=0*_s==RBQLdT z)WaCWq@XmP5|9c@+e5JPl}knFyM43~{SJvUdRbU5l$trrvX5z7;Hgv%H0aj9E1e37 zN)L|y`pd5?omx4x{|}DS4=rluA~`=@8qO8Xqlx--(+LVx3?T{T6`0BNMzla}1m_!u z=#OR#4`6*V$k}dE7arCnHEM~lI=zUdcwGC_K!y?A*A#>L^DXhX8AqsAl1S9T#QJ)T z5mBvFdd?MwX729R8HV<;~5G?X2C5@-yLT`rSFV}|%cZho% z2dfnP>>Hh(cDWhELEjz(n={OrH0SbolpV7Q;HFeA)S4#L6VPYv1+=b9B=8ZSQ~p-% zz?DwVeK;ilnk;La8oaQ{h8a!NvfOp@%Sw>LGg-`_wU=8(;pxt(q6PSIcTh5ZE1U1^@RG}f%9SsdKg>!Vu{RHn?o)+d?4l#anwc#=z!Hd=5b`X&aK^#oN-J~Nc^3UkL3c zzCIqe`hTIJwatlW0*Q0-s-+dX}S2pT7U7QG-WxC?c1^; z#26civwr1NpHR)MNc}3*)GX-6caxo513!d^)htbnTm!U`Q7IgZfBC5);k?Sc(k!gw zsHI*|7+S$d4=)s%bI+pyed#=R&UCSKZ@3^KT3xS%SFGjm=d)PyN7y1HkbJ)0%t%?d z1*j=~3x!N%q4y1%*R#MyymWSS;_(H01CQ1>KS}jL;Q^ds#9gLcfu(pX1PoXj{2#JW z0^&_Oc%KK`q*HIfpBnf*eepwiV0PF&?Ql6276jy~o-_QD1&ug9Qt{ENg1>lkwMl zK_X_aNx`UhsCNNV8!Ky?W6Atd_Fd6Q%>MCQYO%AJ{b+n#zHEk0G{y0*;g~U7+0!*B z-KM?34);vJ6OIs75oK``KRpeS8JSpi(xT2G3P-Ly*1cl6O($Mi%!^AHgu#JUKYYQM z(p;{Io3$;F0=&eVP+$GlM0+A7o?=?w4-T^gr}5#8G&{RE)MtohNJUf7FVw-*78vJ# zL(u#JI}q&lAX;p!THsVthxnXdQlQZk(7U5Z*xSQNU-u8=JZBV!8^?w%T8E}~nd{Zc z*zQlz{fTmgQGEuTJU*voUN&31{L*^b8g$l*b1`Xon@x6j%8%jgJCNQeE|uH(JWy!d zNvym8qsQczcDM%wkRdlSBPCtkG`wB&>fNP^7SC;x*-dL?%jUR2dRp|_W9SawwjGxy z=$K-|wj(MsH1L9~vc71sAOXxcn})2^HEMrFl8D@+f;Dt#eY%{###-s#!2-Q~pm_4k zjVuap-n^^tKo-@Y8@65)Ik%%MfWL%g&fShTpuXA-+SQ=lXM9~|jpn$C&KE^!=0Oks z`p7@GNb{OX0|Iiv09l*_LNicqaaea1uZbH3TEv}sI$GFYqe_{R!q$>bELXg+#Yc*@ zUG_^{ewE(1?$cJ{!6jj5gcStz!vgRdWHZz>wFRmPTYm(0FS86SPH^y>!K6RG@qDQd zA~H!@T*$2R6+1JcT_0G*w1r6fz2=m7LLXGVoN63U-r-K#Sy;~ojU#_cP}meIt*_;K z{1rsJ-cxe@3nlM`{kI@&vy@c?u>ZI$BX9=*+4UtXUBi>ENTLseKo#%lsoLS!-~4MB ztYnt&^geyh=A*c9fL@X91xaJ0FZBmp>6?ARfms08f1 z(DzBC#QRSKgu`6W(?Y>Rvt*96UV+u$h{kuvv6&>zB^Tcv;>Egyn>;=r)@P5bmRa^q zX@i>%QTX#v9Uq{35uwjVqW<^y#{b8 zoFmISe;OuUuyZQL&pr`9UPAn=*^m9**f}xQXZ3v@+A_5&X7)ym6A!2LaSzGQ-LI9$ z1s+wqr~1Uv@S_(BRv%=n;23V1vnS4Gdi9)U`u@T#{$NQ!OHj&1+aDLfNpWYj1kd}L zif~!U)U`A{a^z;KrJi_p#c}pMt!5$h>BQiq+=hgRB}N@Y$*4=`pUJ>Dt8^J;7fyzCghKvQ z1t#~Z&j_B=X;{q>LuHY4cx|!D|FxJ6}!ZL`;5b_Yc|_z%!iA$UI$gA5@;{`0C(%#PHo(w=++t z{bTZgR`fM8Y@LkaCh~#VTcw>@>d-z2v^g`{g6?Pa3q}t&oAWJx>Vdi%1Ii>S4?aCr zXC0qti=!WYl*EtTv4r@?b@x!T{q&la-XIy=nDmZE7U|`D`Gs{;D!*6AtDS_JSym4! zcrxk)>D}?*7A9Rwi2Y8YL9r@fNM`&qw{d-;fjHXt!L-}$Q9Fk7^K8d79PNpC5EM~p z;=w>$zQ3ylCKu^{&vd+t&h?o4_c-dlc~__}))zv@0vzSsJB9T#GH#`{va6!1Ma>_r z=6Ex5H?>NBh4zalYW*^E?cI;Dy2P0b-sIteEEDG?!RF;TgjIJOdN4^5yF7&uCJs$n zO*}2S5UV`g0+OV?qt8y4pXGM(iJXtHm^N z@`7I6Ab^@dPRc_qNm#CWCu%Yk^1HWRw|+Kd+NZQdcFmI1_NreAc-eF0h=o>7zxc0t0!Fj4i&Rc|oAqnX;hJ3|7+ z2p^!3nmNO08nRKz)CABo^QMp=jVa$qOYd@^S6q0~XnaPUTf1|`T!_fb*`>OzA}MFr zt;uQHrK;w%y9(QnzoGc;JIss^XYBGSvILKsUgy2lGfi4qrB27>Z_dPXq?4-cViebZ z4#`m$8()^)QpwF+ti{2{&=7g?B7kUmP~CQee0t(jxdE9rF+rV;tm8ogI^Vp{i#|_mXuqHrBRlX?{>M-R22-gm;4op`Yqa?bTSVa=gztfBnS>>$%AOwXHvb zuOmC_8~mCoU2epdt*!r2xiH?;x&4y&N>a^uBss@Iw8UcH#6d~Zaod%xsmp#hqqm%= z|aT~ zOWB5^Oy`ZbUrGc-pJ|fgYluxy%&QrkgUWx2YRaM$<4+88wWi{OHbkdG5Tw%@nP)o- zk~4z3B@FaauEOa>#=%(ACo+&?-q@FH&bNbMj)Jd=(5sQqbY-Bd==2*B2j^-u1OJb*Zq8QZ19Cn)XZOsj^G8~qV;+i z3~_OCAllK;;G(5F>WaGu#0RgZ_VL@l(|J>MSncG1-yt`5c5=K!DL)ly?k%PrZ6p`D zh|!JUKHptT-b6Wc?3uU>ms`a3_Hn0SQ~HzD9PmopfXYVC$(u43`6-vZtT}*agl{h1 zl5dX7g6zp0@nTaM;_5@x*kp89M!`Te_swXT&RoR}G$VfCef*<@cm()8w|~wYGSpvG z)~HX_>@on!<1puB_J}oRp~{$bGqIf8zte|YGW4VGDwn8G%gD2`-m8EMw{$uow-fnK zh+l>LceUTP4tuy;OKS@iWKp2_gc5eKuO__P$h(CDVsKwS$#pI?G$~2Y&D{$rPrpND zou3#{fWl0r_OWZq_rD0Ti?=xw;DFmUp7w<7Je9N(^rNbRbLR|u8ZFpF1P z6O0KnjD7SZlbmqx+d9O>n9y3KL-gq#ySc527-xo!dqr=pD8Iwxs}Nt=_W9Giosd~N zwm-MD5%7JRcS7K#bne+S&9Gi>XQPte25OcU3KYRoM2eI%%{Jddi1b;5=91S%(ak+_ z5F(8iPj!!Ypm~`Fb|(>Ys|(WN$6$>#p|7SyNpD#!{k}ORWmoC15+u2F5IZMw7jrMl zd!WGzU*%2;2HLv6roltqC0Ek?!raI@uLLI*q9L7C$3t~{>uK3#Y)H;n{?&-@?%Q*- z6{u0~!e}eK!}I-0qV*!>pU(&4;MQ?Q7?YI3vLSh?&7HR^m%0xxAe}x;gnfDiXhdN` zC%+#Bt)>y=_2{HziyOTXnAC3JR zwTZ3SOs^&VqU?4G7myQPw8H^O{>$Lx~nc}IR4aoApqRMo;ATiWy4fgL_Y4>_ti7G6#_yy>f(MB-0m z-UIZt=tclVYW2qaI4#`%-S53O3xCw$kO-J7@94Zhkv`s<2s=v_-XqFsY|sc-kH5oN zrhu1$8?U^9aWltFR_jNohx(^i<_>Y*tnAsDB+_=QQLj8>XHsd87qcEvWZ=&4N}yM- zoub4xtP{vCotKuQjugfK#5gyoW46a;*R`Zm7|3Rv83XEVTNsoWE!zzI5q!kf!GYyG1dNe+(NeTR5efauYOed31H~zki=}`^{+D){*Lgc zajz%G@A(yH?K$V2>3FIoAHcPItx12vP6~L;ri#29Vom7{yM{(a}a})$AKY%SLoQp`)vauJR7Mls8!`F#UtT2PU z$N7hLL=CJ2p+GMfI;rtZ3_g(AO^Yl7^US8Kvz8J3w~^xwZ_CU$TsJ2GyI|KK{UTeWkz08GV5#mE z7I#fx6L;H}{h{>*&KJ=R9ZZo9an`-bW0MST$0lR^)El%FU5O+P4x@z9@e zN8z}@7+X6y@pKwM3b?#Bkox8vMUMpomvaQeQD(PRuz=!a z%cJ79XBhhoJgnyq?_0ETzy;7AY`+@%0`vX{uv}4Ug@p24(a!537N?BU0FA_Ly7>Ur~hP=j2f+TByD?>7XQ7QEpJaE*S%*DdK~op_QMLhNI21W(Ta2A?5&9~9 zc(wIsQ4WUfo^gl3s9>m5_K&PpWOOFgIa1YCN2d`lU@kE;`vRF>-JiF{ljbL!E>=GD zuHw*BXC%k=Ve2<0hzRi5CsBC^U!g9>ZN&gSdnh%CS-t}`vy^3KWpWz zw`ggsXP>bFc?e3sftnono|FYtn0+)UkwSdD+%<=TTRUw)jePx01W*i08Z{sQH+a(Q2=)r4>0%vnqaHT6hW1O$K zCRx;K0f8NH%#R5d#9#42>#i;08Qi%B^z>Sy-n&ARQNA58n@G8;TL z%Cr$bvj-JM_3&uB$}=z4e`b@RGR!sxD9R`E7z*ue`xz)K=@neP&1RqblU1OreE6RP z!GFhR4BDOnj?e7xSXF4WYdvhQQyvkPqZ(l0B{=?h6w4b>Sn@Q2;~>wZ{_sKr)JNF{ zudPBcSc^uzQ}23+OQyk(VvG@(hI_7u`V|+et2(ATYtbiD6={i6IZK~!e_SM4%1;^{ zI5s5t6T{J28)JU9L$a&k{MmaZ^pf|G!xf~03iRQ4`OtDv&efGU9{n7f=#HnV6pRLc z-^c3h6^u)dE#MDZ2yXOAr4BYYY?jQ8crAT5l;s$2r6QXD`^w{OpcVdUS4M$NNg5KUo@373sA_03zvxYPo`x2n+LfM-dO(NWrr9-) zOZHq0B@A5Wtetrz@crfHWC!@i!Wiz@p^EN6S9L&-<&u4`B~6NNP$cy=$!glmR>QBe z`TJ%(1YSG|xMalpOXV>;=mwU&d4j<&a$e=Wnlh`C6w>E^6MLM3K{0BYJ7kVc zfzP<+_c;dP2mQP>%V1Z(wVaZZ6Y|c#?*Xiu+3i5VB-#67`9uDwS#13@9rV4e0>Xdl zBi_`&d;DXW8W547}Z=wxKJE27fp|U=&Zxj~WdROhW_S zASBnm+q?WJCd+PCEwM3CLrC6SbYH6nd#eU}N8wu^c9M;%<0JI&v9$RnKVlJs*jHVM?LbP zP+NF&Gc;BnrBx=bN?CnXl)b;zfmV|+-2gv2uVce`#R;I6jAeUW5E#p@A^gp#U%iNU zL$e7*G*XLH>7z?nHd2Tbj|ZLp+{9e$Sel@vozxO54%RO2cZD9(gB4(x_)TO8{jh_A z{HJ5m6~1bt4-ZUN0x-Y)kX2xxfbq(bfhHj||hMKq)k}L}>`0%Mx_Ia5&0vpnL50o6HQHgn9yh zENk-Ocw_NmaH}aejUeLPt|7ec)~nbJ{p`LT0cL4O6uAWvyx6X&n{n0`lWrA~JcABm zGs$x2fa0~~;LA8Mi#df~hUGFMb-vB8K}&SRU^-%vI-CjvtRr@M6lI0kTT{BlL(w{t z%g7H;5gigwoRQeE&PZlH?OLl2+})`xLSAiyXGyc4h$X9d3>wV@QrkV8#tPCgE6!eY2Cr2zhM4@`#DeWSZsCG0g!N=5_A-9P_SdngexIaW+%k zm(XDsGxA_T0nb-3pBjuUP8m%nDoxwCu@ZArgYPdbK21MXufNcvH8toX>PiZHF~^1k zSlj-%k~9bR!^>;d*I4h$J(hiV(mnQlLRDWF`wh92J}%1L^jGTaZ5eU#3e80q%^Ft0XMeX$bQ6-r_X^TW<$0mK~QPqY?q3^Q^xKyabY2{An|u@?Q|Kn_1T0$G?2w!JlglYZM#~N zpFP{Z6(98*Rk?>%xwXUW{Kfi4c}B~O|9TrSk9LHy)ErxmqTy5017oTRBQA;sKuO)& z-coky;Jm~SDXC52haDGD+%eazqT4l^`?oa?z@H(nnkW`QqXnS3FkjP;!!^Bk7R=` zI0SS2!I{=8PDqn&te2g(+tapp6SrHp#cUhD(bVu3>v-2$Pa?^@>O*ao@?d?LBh`U8 z$>B*7o-FWQHdF>wr`J@%#ReO4serIX*uB>W6~^NlAMI&NB7rkKu|!WX;rP^z!E@xS z-D^HN#+svl7J+%&H8zGzn@n9*3GH_bjYjk?l@p@L@2;CYSsAN&>(?_Tbmr_@1jH9{ zI6IbKB(*nljmDP@K-q{n7|I5%6j*ws?Qik(L%2KcTBdJkRB3d zBDOX?v-Q0Tv7&_R;8yJxtRJ7f*1$%}tJXtM=QMX=^xeu<@Ln5gr_$p^VxJP8!$} zowtX%LmOgWHyZv(YInu4Nl1^*&qaJYR6-ZgfZF2*LWR2hw9}(Jq?B4_g=cd8sd|JM znXY-UE*QqT6|H7+yy#~;1lxzeEtItV`mdA_KV<+C~(P?9SKr&?x@4&UK3IZYbPM=dPZt>K#RDr?E0l}yKNx0F)>|Z+eR!J zo4Re=a|In(;%6^Vif{4#75G7$VgXra6W9=yS20<^y0COIX^gSJ8P-5*8_tHhmv4OJWBpjw%uip>Rfm9kPC2z2JbJIHy6i zetxoTgAT?zUkEgM>mxv}?qzo5WN|ct!G^h&<}ioAmdLygZ1}(UQ`Fy#LLoaK{j;#| z_FkvVMCqxRJv@#0K>X`L?q5yz5Wq8b&FQdU2C)h&H4=S(g8FeorCZU&&6qox2}sc} zpZgBOFPxl#>82+5!60wN&(7h>hJDVDD$#+U);Tek<%0o@NFO1Iz8eRc(mcmqKY;u% z(#{4dHz1Cm45@ZTC#R=B!T36>k+KC=Z$%XE&s3ubodkj|c=w|%!Q1~?jUwhcYgiLW z#1nZB!5pLv&7OILZ=e?$-M0WpzS@@;1(FQjmyH@6v0leKr--fXu}_-qK6jf z!dhbU%;T1VLKDEm>k&Eix?^f8vvV;&-u=MX@l~3&P?{w!pumiF({Np6Zk{$!YRwR~ z@Q(4(qHnN;m%V;&M;x~mkG94VRvkgxmZ zmmOS@@B3MN^O$dMR&khfoR3z8;8qFF{~+dK4M?SvzO_7sdZl9fX}sP;Z0lyR5=kxF zYYuA=!_v9-iNfnjwsxOv0tX2|!uKJ1@enf^ zSY%egUxre0M6;?18Qk6K{Ra<$O?({BccLYLJpT+0Nhry;pYHHE52==&A=&ik9?Za2 zHx*^N0}hI20F| zL7gaXeUeyD>V2f4Q=JmhNOq*;PaC$zRQPuCpDQN!viPe9vikhCvt==HOV5Mf6Q%t1K<71C%Mm;G)rVrlq-HinW+uem=fczqJvGCvV?oBs z?{=jdzukLMYs%2?wm&yd;OB8M#1{dOZSTkDIq6W2WiWb3e z?V0K~*%=Z>eb9e2h~;gM=0Pg8j0cy(o*_%&b=(C3l_$N>$b%6LKf?wIfCB*v%qmW8 z^$xRY<|aZ1e6?HKaVnw6G!N@P+5iRx2s!UH)${>`aW_&e);p>Eo&bUCNNv&z_P z^uGTK1YR*qX_bMaIhtnDrNyV4y!y_pgLzshw*v>i6FOii+HWZeDJ3d{*(SlV>=@Mq zr(-sG%wuh*BlwOA7Zn0ww$VU==c>UjfRp~cB385Gk3`d=?>4LpF}gC!`p!=fQH639 z`Bh84HG}cSqaEk+IDF=RxjEJuyu%oF`CnqT)%*q~uD8FKG^^BQp(>4^`caP}3k4#} zO*H2BwlW=NWa0FUo9L?{goV_%JSOVmsF!uOZDC4G)PznaMr}2u{9at@Qzr6qnlsDF z2c1?dv}&RZA$e<={H|H!x}5a05&W^bo0P7VWZ#Q8y2A)L-{5RTLC~5$yM!aYD3BMm zt9K^6;rtrG$w2l+y*u2yyu=Bb{wJkX(40etCat!e>@8TzuppCuhITzV;Vr4u5?-&A z^T@6sUBOtJX5wTIR1H>8(+;e{QExL5rb0rt?5=Oht9S|b3u_EB$#(d|qMDD^#0$*L z!}LsOojMTR7nnM-{_#V9X<74Fow$@~8qPm@>1u+;w3H|!n@^Mp<$V-uJK3fo>jhAU zIX1UZ4icP*xdJ3A(V+dP!Po zD!mq0v?A1`)ceV+NXIRslbL^rB_mGD8)?vVqw0BsH{%;+hYC&&pvMm zO6}>Yj&DtK=uHfY?;nE@?_%{ZJa281;FjL}$n?ZBu= zvRvnSM1-!JfLuA+f4m0APML&1G2r64BVh@M-!u8HN zPXv|boizIhO=r>XiEZ8iSTy^Q7>&*O`%Ww+!vDqx$ptkTw|ab6{TLQse0N9nPi{aKQ1(AuF%_cFlx`CQnph0!rUK)^Cnqz2HZEFP-J=g*!MX9 z&ie4;30_!#%y>>jU+t=9A=pw}%?>GAAIp&sYaf;`wZTdb@2Xua&gV0DP5s7V&)GfRJBih%e$D!?O?&A_xYp-MpS zf)Zh?i&GkSoeUwJk#F&}`OY)1YmwXy_E35?-=C%%j4MR@ev7cppiELZR7L0SB zw%E$%<)5%L;@5akUAY)X2yj`rK*M_4z16kBh%Sv{W^W*M*{+D0f+cS@-#9{#2-bW4lhju6?8`Gk=j_615>JH1Rs|N7kQlH}G6 zM#KURO~+9BVC9$Q&g0KYYQIm6z`}=iTE7s?8YvPJ_6Vq30Nl5L_Mfcf2@Kclnb-Ww z1~pluKDjQ^sBA&PssUO6EPRhcoJ!fS%W*RB7ju{a%bdVUmI`*Z`h5yA>@x*(*H20K z|B$LK^Cd}ovz*CE&|M!41h*9UH=``5*2}sCR-*q2E%asXbSXv;YZwZr9U-Tc=qKWH zU_nX!OV`cMGk~gnhXI_HqM^M6fC(UAvf?%Kn3EKM*#9O>1=^DJZN>7FQ5AWzb#~q= zF~FP>QN$2?dymUu7@!K;c`K@GThV}9Z+0yvE6w7P^#}jUlz@D7777Hko)Q15jmb)o zZvT}YLC%XlSMqvU$LTihz&F#C@{1I#dXY{lp#X{Pm04q?SC7EsY5&0$nRRo=VqZvy zSup#rtA;B{Y%RHp!#T%g?X46+1{Rt~WLPtY+$F{t-H&&W?NhY9iSXwnn-m|zm-voGgT43N-aMn zT8O`rOEGSXNJX(eee0DRp0urEXvzC<$b4h|MmrWPbEE!yCc3)}6JM7w=f@?nb1O1U z_K){LR_GlI)#n)K&u*Zn?}wMOqDx9q&Oxvk=#Opgke2=*S<&Q@{zS0W^m0HL0BQf? zWX0#ofS(ynULpb^eXj0p&dEs6Zh<`x3DMd^G$#V3&3jw3|Lhrt%Ex=xYc27LE$?}5 zNcrC+^gnJLBLnLkG5IroC^I*uiU~}O?mF&_rvim zZn>@o1Yo;NZDq^0Hv+0!U+$SvldfdN%PC z&_GF_dmZ>^64Yh1H118pkipopNMY1Y5m{+A*EhdM|BF`uu7y$dOp(JMRdA6l%jxNW z?<#;tI8hW9IcLYcIDYjvyMzEZH6V-ya7sbWX(@Jd?#KckK<6loQfK~fkxY$D){nyD z%tm=er~~!VQZ9=DlF7`a)K`Yf+~;(Jo{-*l2@w#{m=t(Z<(^*o+S}7QYwtPWz82)R zzO9)o-ly6K(Hu=F)=;~BELDLh&BO`pAC8M-Sy=+}jiT+!7Ns9i>j68?9ox!dB@D=5>}Lct>}`j`e21M1qyR;g zxhdU?O!_fM4|V!&%~?9d_>AqZzdoRp>RR%84faq0U?^MY4qF;?D+NxOw{x*Bm5cG$ zP=SzSNyuJUe@Xt{t9)0XWa&+*Tp(Dpz+QuBR;Q$?JdD4i`=`@h3`2niD5~SQk{}hy zIX_DQ@mii9K(B}{-N|0S{&~wHz5_MVRKtJC0=oB6p>xf@a_;k%q{Sr$?*E%AdTPN% zH~Gbp73H!O3YjJgON0MGq_JDb`{qM9>rOU6oE6Ot3E<<+65z{ys$8Qw(U%#kh-EFn zss4{H!9`zbz77ZM6d7{^Wr4QV8B}@~k9!x9>A=++hSO@13s_Y^J%2nhP{jvI9snxa zXUAtnZwNi^M~ooU*+WRvdF0EB{X4uSEs!(E4}SC}Qq;!i?mw7Ic@J&aSru{sZFhf8 z;(=-5{wVohrJy@EFF++TAHjmv__WJRLBu4=yqIjsR>Alfc#fP}6f4kpO9MQ%GKN;h zTf^DGo@$$y`C;)?-aK~LXBCc#6Ahcv2bl5#uS(y_Y6|WiBCZ`~dm^#0U1*mr@8unHMeb%m46B#`YVi z1!gsrFq;qUDRxS$cje1jYdfYUlq`Xg7BiK{+XKMI?NgTxk23U4;JAhJ zlOm}?IsX$Xmi+2ppYBsWzLuGf(4yvwyjgyn-M>ZNf1Rv%ogA0C|9T(rTmB|}zZ||z z8Ow9Fj7P`(D*Bl|H?%8PwI<4oVzn?VKYqSC2vN!GjsA~ zgm2V@P7bCy!-ug|AR#iKmv16v_7xQC`k|*HU74j^whBW|bNeN~}4nZHRm2Ag2m&!FWC&A%$_&V;V@M~M*y816dj9v8DE-6H* z5DX=0di19lz^r9S`45@_iIKL-zTaEEF`EYZ%K*QYke~Af{*kPWTFSb|1-hP*> za2(1C-+dx96SB$UqD5P07b#nElvxm*dq1Phh@=P&&66n-Q4Obb@9tw;bXUxrvO^N1 zuCe$_QpDXYS)Uc3vseo9uZjf_52?Jn&|~`lma#Izhi8NzHan#gZ1N836<*)#<2ZD> zsg-nX(>Ys~92`YGSQ`se?KDR}oGHo75SXw5Wh`-g*7h;3G+;~LzpO-8yI?Y8#}3Hd zrgO9U4m|Q5WBPN5_$Z*MI-YP_PL`6<{2Rg^x-gVQBpZwtBOz>SoON=z`LjuF`m`=VgIu`Yq|z&F8(%Rkha(>i);{1pt>w3l zrwB39!mI%Wi8v67G2`A)je7_;au$)_B*v5Tz>VpR53{38_LMnjEeqC`s*G8%>7L*}ukjxQ z+Ges15B1>jOMXr~^48XAh6fhB<@TB{f)#=$o<4Y9tB3`mAP7v>I~33NgTe2%eviIm zX#T){$rgP1UQJdO&Yu81$*p9USYGhluAp_73ic+;mHV}yprMZ@4kwd2>L2T?gSXGNdT!EWzn==lD?&R8#)z|VO{AyQwpM<=R* zw@W+tknk`Av&m{YH}w=MrRzi%B4gWm94=n=uVunuT~pO39pTV04tavJ^-Nr~4)2@} zJ;&zHE(Ekze17}er6cH{BD}(_?Lx~yW#s}zx=x`6SQs}K>Y>uUy7F~$RRSFz_MyG- zO=enSyvXmP@=D9wl;k(e)idi>Y1a$V6`0^JHJzC8Af@KCb=%% zXTlO<^@tPR&u1l3ECMraLLi?2BxUI|e~>p)6Ehn2;&>%VGws3Kneb*{u;79_E@XLQ;PcqYEv(}xB zBCmPoI@kav`OeD2 z)E?Lwq=oxJRgCIYCJ2IrJheK*+N2bbKDA*pt9d1>%H|IO<7#>df@H$NfWEqX$TMS< z`bt9XmN_(0Ek%HTeE8H>_Y+Z`Cyzz7K4FYvn8a5S24Aa`p?ae#+w`IDY|#vSwz^?* z-wmU(+I9F{E7IeA;(1yglM7>0KIe4=PRC|#+cX1Z6UMHnw$=S0_(d^EuvRgIl;T|v z9={b3 zOY_QIO$0?CejyQBZyu?*wLc~&o~M)QRO-h_wH!Wsv~m8l!rv?9&;x~N5!des`?8*f z>@yGa(eUNM4)}(L{IY&rI)CcDK^EO8sY0Ad=9M^gI(rsZIdXRp93*ijV=; zCNRkjD^Dk}Vq%p`@4oG0<6m<)TqkT_3#7Rx!}3h<&~Ee-D8E~g_5^))X>+e0`P%;A z>jIN4cqz`goTsPZVcP2C(VKa**Xj9^-6TnvQxpU$sxRBu-+;FG%={DHPbYCqF+lD5 z=ih}}p$84A0+)q5^ns8KK32f}DA7I!Ypid}V`eW4zFsjwqI#NN8?G5LRq7t8{|t5G zz|L}b=_*gUrJwLPCR36`?%Cc@ce}{J8V9c;n_+PR(&so)=hHJa7E7i6x`shp5=M(6 z1Itn1NyhCYD`I!>VpxKick}ej!={GLfUI<4x$y}YloNIx>^jZq2)7dx0?qw zIimBIuFxuQuN`EC8+0np`Z#SPudhMsj38EqEJsQ<^+SI5=O zblsMgmg4SKpg?igLMc$(-QB&olNLF+yE_zjx8m;Z#ogU+`aJLZefQo!PEO7*CzCxh zYp=cbB$*jvSVUX%gQT7cbTacq3JCwxvt#AH0T|4X92>AOG`j==tX`VqNk-xpZ-~Yk z(*6mGaf@2#`eXl*d<;f3_!t9_xm}>A$qWr*VxGpn6oJ1oI|!19fT&?e>!ain?s!7x zUPkMan4(o9O;Xi&OISKhGpGwf_AeWO7!D-Bv-T6}3@VoIgH~%!kU+>AD@WG`5vn4s z&|{vkz{qOJZ&yD`78lk{wc7Qr{H@q3fxy7K?gg9gJ>fW_&MHWDNHe^?Cm8l8~Vl)%=K5GLmWK zOwriLh(eH<_5m(5c5=hHYD}!;e@gb9qB6vg514|b`sRiwj}uddV`Bry~@thdNZFG--MjW3uGNHTz443%`4leH(qzRj{3X=Tnk0CEx^T78G1hsU$4Er z`z-Xl^6@3&3X%X^_MuSqB1B$y2<1)gP;A=3o%;c>cjvDJ7;YE#hqW=+jLK*`9+ac0 z(z)DiW{N(TsL(0&Z3d8^%TRBpD_Okg*It(y54FKE8!pZn(LdxhOdFmHcc3e&&VDoKJKnPU@2W#|?_kQ<@3{-;MvVs=L+0&XAf#Z9{>v#tN7^>vD{DxI=v=Ia zDfj{uWtc=WHLqVIYM_{^3dy6tO8<)+uI9s7Be+BgdpNZ+71p86!@{??$a1(W`4O

G6#b-rA)O#O{e^P7?^W54$7$cq?>c!Au%bvC z7bNFjJPqHx(raE^J>PTFylAB}YJm0@DAYXtK$nqEIW931c0|kaH+T)Bsbd?Ggtkx<~1XrwN zMAgmq>SS|5rXFUMC^@>JX@UU+QZfU97+K6FWUeWfnSv~ZNK+|FuCRHqcwWa9G4uO> z98j7Z9mRCcd-FSknl@$>xDc4dDR`lf%|FBX@e(O-8$i70d}LjMi!T5zSL<|5AeZmw zjK-`3Oija7-)ewFTgLGeHUt0QB}*RtG=mRJmJ8HjU`=2*2OG#1zTZ!;4OX%#QgIkG z+?|1gZ>;H?w=kL>+&s)Ox=#@4*?!ZDu*{6xSfdZIE=kDLAnn~2q@iuJ@*BLYa7EE7 zO#P!4M8k8?g_-8K9<)U-_(u&sYU7cy>fmk`@$eo_G;H;v0(Ed%=%djE$B5q2jv6;6 z3tHMgr%nAb;H{b3PK6JJbusm8suIbPi-`0V&u&k{5Sky=A zDTz$+g{%F4+lBZ)?UK$m1XRd5V4P#|h~DRY4V&oAT1dsqdqWMjY`rA$;{Gt6GK_c( z7#EEz`b^{g0`8wE8TdD>Q5cYZ-W+3$6ztqbVGR`}zW9I>kMI*h_nx(w6xHU#!PS3Q z%TzVOCR~3u^BsfU|3=a|6`)&X=AqUnp9ti|j( zM_tkTZI6I1a~D)GHN;-j&K+eVH)AYN9Zm6tyFT^h=^bJe|QN7AjJ5LcqAZHL4b^5L6SLl zF*ATfZEa+D-NWo@`O{aCOZ*>R`v1ie*oC-Mlf9h(BbMf5GTy<1yyVY6T2`uvN~3zP zUjRq6)17E5z5c~yq9WwGF&LM^!MHS`lsb(9Hcv#Bba2SEsxGr}xlj-ER)U^va0u6+ zAWsCDU859nn$VPkZaesG}e(DVMp#b`EN*<%jpPV@Rk_n4H8CLep$# zuM`=o#`Txe_96_b8~#TnMH4R2!RXzqrsUVlM9VD*0G`CzK4KNkRxtZC)Xop5Bc&=@<1DvLhwKH2fL>WT+3N;6)np=OWZmy?tbo6euUBF~U|QD& z2N<3=?(zRuw0OP2jTUS>>I`l!JPdVBkrT%5A!D=LWKu;zkjD)a#kaT$@=8o;sg%Z# zHmraHt=Aqe!@S@>DX0cb1|ffG5t#`>emA}^b5&XOvZ#;RN;1L*nKboh2mBNFnxB`E zi$3rso&@yi2!dlH^#DXoFdC1*m{|T95DlB?Av*aoc>NgY7LbKaKiO037VYJ(x5^<0 zkev?(XL!|M(*M#iY1!kbX1n?<^t6)mobij#+OM}le`~Npprz$|fDswfNB?{Dr$Ai@mOaPR%?t^hJ{)P^#t8EO3 z*XJrX_cdle%QUjfjfLm{_>me2b&5sJCnuu%#qzBK{%2!>7~S%V1Aci%{|i0s^DjMO zkxg;UqtS*-6woH2EA7!?OVog;H$cbyNYkGpK+!kvelamiMtqqjN`h{98*cysN)rA{ z49`}6-EBdjnIF}ffBu*0jaAwOY$@Al-v_Y})!hq!uzYm`J`~^9>1xO-;S+-V3Y(q5^)8 z`JDIHPONWt6w07t_f$Bax)vQU=M2+X%GcwTM*~&TAVy`$N{qk^eBDs!#O|7oa2D^s z3?ko$1!h^#1qZ?5_ZH3iPZlI3+QRz#+x+o67|bgdMus?bp5}=H%B9kve`5Qal?0nS z5Idl7dKoP4mVr-SUPD%s8I>Ec(1LhvN)A6f8n!r&O|J02VRknFG4NHO@G0%x#~krD zTd?vqS-y&+(bg;YKpF(&0d`7PPxfG+arbW8{#WaY$k$)s`aof-{}9Rd4UkB1ll~G( zUId7m+#+sQQu)4n|G|lA(rjKgFcWJhHSn%=6O{4gr(@m*lO|bz0tppTf6-KX!3qbZ z2)ANr{~vFm1;B(L7p=qQ1IX{%Tc>}Nf)UCF*nv&rQD{))>`vq#77Cp&$en zBw(QhRr#PglVEL$cBB;zZKaZ78lvf zPW1PlU=+;IgO6&F@6njY3f{5`cbQ=Xbq|QCLH11yW_~bP0~N68T=vSwzrR3%{BKGQ z<~3kU4~Af>M_&bQ%3(v-++&-0H_82 zsu+;A`q89G4v49Xbc6q9TEhA4sev>IWYwr{CxIdk`k9PXFsP73qvYy9 zzP4dO`zQ)`{8K`kB*cr3`I4qzA9LwoI2qw@qQ!d1e-ennjLLEeDu&r0FcZ^xodq*3<0oxo1#pqfZqNJN?2jPf z)q{qD25ygsZ}92QLkYEGVM=uj4jjtq!T{$ytMOEsR|4Y3@fIkDcb4I%Vni5_FYAPT7s_FrT+S zEx=)qy>)f?D1BRn-gV1v&(hp%g0%(EYC_QJ*}@A#YR8cNgc&T{;SNJ!$RRiccWEDU zN8M1;oT9S<%)NR82v)58cJ^p0&Wb*m_Yb1n&U)*XQwNCuJ^zK7%iRc-2&6nxu#`Dl zT|1{~Z;vtezh#~>M6c--)%?GlS1jMVsk>emZVPG-%I2|h1er}*S$xP})j~@`3OI(4 z>?J$yC-@TNg>~Zw%XRjJ+`c*4bgjK5etCL$@wjd~16)XkFYu{X+o7JIhZdh^JtS|H z>pdRkltFRCjis>q4gN6S_Yq9iRKwUW1 z`zNE&+ow>He<_goVI4R!SR5YuS1f0O4Cpw>_1|1iRQ>aQi+B%n{pwoE)^AX_2Z1A0 z`Va^RpZ|nuN684I{r?Tq1L+!})BieAXN#eul4#grB`CDPp@qKYGxrkz**H9}XXq%$ zGke`ufg|-%H2t5=z;vhA)pNekDg$HlO6eSPA| zdFKw@MT;v7->>Am@h05cMRVLw%lj0?#U(^PCwPysDQ+a9lX-buVCF4Wh7FO^P0n~c zEmkDMe9w%pt@#Lv`npfux(H$UtY-0TwoR&xyqIjZRlqM@bPgY_D5A4RmtDU>&3aGW zT2Be}K0ep>bv?Zr9wIC^`cv?|&d2hQn|iLgpn%)cs$RB|fQzxB1|1?QTj{OR*r(Vq ztnQ0!%n|!@D_Hh4;+B=iM-&qy>u{Ux5cT52=;;k|V7a?p*64kT4S%J|HhIXch47PG z_nqX3HW#rZi3{dR&9hwoVGF4X#fcUhozFg_7x!)pjmb=4f}TBv;Igs>_!*LQF5)OV zbk~z}E>d$s7&vr)!4~tvNOUC7?puImFr@25ugqgdVEIRGbiuK(bX`iBstVg(ChJ!} z1F3k{{tDXC__U+(R3;at$XAH%rUln)cL5Fz9{7u#FVB}wP0ef7VcI4+Sz`CoJ!+kb ztgs49^BQZgnCJ-xeqMCc+pG#qKUJn=8RP_8kl3a)UX|&@*@RWx8^5ZyfXY~Y?IM&k z5C*=Yg&g?a{JJAYbH78@IqL{qZS+}+#R-O>l_JDzf|Y&fmB()L+s`7*(kD{c04xmT8?^O?6^Mu7g! z;WK|+EqOa{mMB|m&yr~w99&^#t7jJ+?5b;9h+#>SWk3er zvLIlVEdGfr153H?BK|Ajpf1Hlo|BI7z&ixnZjTO~mrp zy|h}8%VwM0ohC&^n~j&vN!qRDj${IEmGrl#J0PlQ+18VloA6H>1z=HhQm((Bx^lcb zCX_*-*f`UTT!W<$gp(dRbNOJ$15W^c@+dW}##Ngxru94y_U8$WGR~+HS|TsH9G!X@ zgTD7Ik_nztHvAV;RXKWDQTszmcQC|sWjnzKcVl_ChFuqJ_i<@O2)mil1M*#k#nFc` zQU|VYGDZn|zP~LUQ4-o5xFmI`I~7X;$_swJZ<`tPlh^}ike9uUUu#fu7X6^us9C!c zK^qzBJip3`)<#*2tV7hp<=Ubw&c1hRmSFdAd1@SNxmRIcdjJ;`%In3g@o>)gqde#6 zmW0%FEcm_orc9>~DXD5%Jn4a@W?8MgWSecv0z75!v{n8b)o}~N((mb{c3jPha|B00 zuZ8=4$9G3bpqqKBAtj-1jkz&}!^Xf?%sHOy#5Dj*usbm30bzQiT=H&bR^zUK1Y;U` zfae3Ud%6Ng1%8Fn(&tA)bzK@ltbEfq@L|HIe@t@U#4k~ zWWY--C#NP|bETPjHAA0#H>7JUXIgMg+Pe0p^Xp0zZ;E-lzb((z{2<=&pTsp=7KBYG z>jg>-T7ZnQzIyb*JbnFubJ{MwtlKry!c)oRwvo8j_4kFrspGKQ*U4kdi*8fX-Vz7Z z=&Wv9PIb$N%S*N!-tu$9 zqRp@U$2H@;LA*ay)eW+pYmdz{md9>dg|p42$Q|&;#W0AS0 z)rt51EhT+x_Vb4rTo??b{_F00r@QaF>n2og`GNFbS0dr%UPT->J2Q0Gt32$Zf~=Jp6KQ$9Wh-&bZg=j05>f6Mwz>4sgb05 z)_)acL3Rn{k;MAFdb#y`x0h3VZ{99MaDhC*OphIiz}?eiWF$KJ+)1Ts?F}*?Lebh` z<;6G}{3Wu8LC^ckmDR0r-J0%|8saf57eF%rf529b7<&fsi0~PTqdGje!v6IAJf5eE z*BYw+G71q1@rsd4aR63_fuGGMBAONq8(g9rRrS5c=}hbVUcnr|%_Cq55<0%407kqJ&w6FT75J(5P@B`x#Hv!% z#F`9ez*o;3g!tT5lBn21Iq>SdqQJ!d?Sg_mmz?)0is5thP(<;&@xVc2cct+&BmZkW z`l3bn#{Kfj^{v4UsVvkFgtm1N0Ftwss0j`4GJ1HoWf|w8#aHgde1GRB0Vao{HT2eL z)t~1j5T4uDozS?o8TOVTI`OAFpS#~~LI)DZUw-|O1wt^nildL% zt?Z|a{R1l3FO60^J=XMFV-i{bV=dRyO!pCkN9B$x;@~Ns+cC49(@bLjch6KdX3FU> zFWpwxI!2pqRGiRMYjyXNcP0<{l#w&(8s)N!5Ui*OyTqH#P+e2S@ISLxbicXCDVVnF zzoh5(4KXtT3R18y>bwvmATbbE@?-1bhNrPJ{a_lO9NkkRk+h)26&b#Tgbk*NgiUT$ zuEyTdTWFC*m+#e~KLp>r4x6`X*E2&7gJt-Wu#A8#@%gm;@_H{fw=SqEmkxZHSC292 zt?@~^z8U-%htY|N*J)3xbXM*D(i-aOzxDRC)oK5pUN*012IZd%;1b0tpT=JeQq#6* zp(h8qQ@?)V=Tdbm;(UDdR^l@gB-bcpV1j^brj6)b+%;o)_3zF4FC22J3!MlQ@^NA4 z6waQmtu-uETi2Y}MYBzR9vm!dX3zsF0<2XU|K!)KVOPq2Zc5U2C^Km%$^ArM867E#z+f z!mOEb-dMY?MQA^>buC$q-?**~+P@~n-)LP_5ThtNxWigniQ5?;=95{iFDdCb{d&Lq z@_gIN_v~?Vqr~^}y!Z0v)vK46SARD-sTxrRB2j5|^2#zS?C;QEoPQ_ra;q4UGJG|D z4QtpcH*8s1984=+v5Zx~*5yEnS`_ya&EVgtN1 z>9i+@GXRgdx_G1?D5?>s%$EccrCn>TF9Q9lRlH8N-60UZ2n;* z4pU?ox01aAiwaEI#S4^PC6?F=CGNZ1l4WkYM~T*{J<6R`B@o_PZ+)8FIk=#bkRH6w zs#ftx*V_HkJ2uZq+{S}UBY-KbjN5Bi+0?hh)HC12A|rCr-+ta_@_KzqxL{QsMa#53 znJ9^}bfJl;k#90>M>u<4jjT$^zE+7o=s+y z*#fy+T-IFH4an=Wtj@)jRH0*+6K@h!XuDIwPx8t!s+e|}KDS`X2YAz{+VCQOC9VvY z$>;c;7h#xXK2mp&2Kqjl%LEcCYgMy*jyfHi@=s%zWxsXz$R^t!gV`X#!o7WPnLf2( z<}e98FN(R&rEN1eQTJE=qe7J=aHkZMC+qdFdn!ryC4U(#E*IRGTjnnZI)eM58DRP1 zvRHo-O7 zI=%l)Eoje;gItrg=zAt>G)ZpR$Qn#&+{q(j@|qG*y(m7kRo2B!X3&czRhIj|&+4R> zgm?X=@wP|Mk5@k?dNBt&wuWUW;N3`Rhqc=wfs>*#U#_RiFHzS7`iKc*icu6_R>fL9 z>=AZ7j*)9U5hFJFZ=#+npaP41*jpkv#8p;Oz_f(L;93QkNZ)b`n#z=nq%sSweQMc6*yI3c^uer{xiahN;%tQwGVbuFUnB6#9#F#2y$57x{7PCp+4b-l>W~*{vNb{{>keI~jkz_IW)~8LO(XAjRUsrW+jUiexev% ziJtbFDk+?rB*3ck*X$fgn&zeLL zZ61nuz@Ulh)^*2=t_7V)^8XaA8MptF&@5qCsSrH`T$629FO_*A_D<%2)^Va#nKEfy zXE=B)_<7&ne{TLrN!zcXWk4%o5&kZbp%z_BGOZ&mhRNE9zMzo={%~QQ{%~ZTWM~nF zFYBn;v#=Vy8vCq zZJ^?~MWJcReMFpUC%K5r$n#uq<{_m*UUlKuXNW7;V=%k9+dyc14E`dXqgMM&XqDq! zZY9r1dM)Jg9?qh3q|7`M5E@Ctc zJ5!&!h&NNKUfLw{AX`!EdJdg-`6H=TBj?ESxK-OTwEpN$CD9t36M6x8CuIqhKwJYO4=c#xNf0KPBc%z}At2?bW2qPXeHRd)ZI%#zd9^ifu5~cZ zySY0@!mxnhK_Bf<-0_!AxsPA&L2H+fT%<$l1;6E<(}aX5+WH1Un05!*d1H_o60s;EU1dmC^f`O-n( ztm%g|Wp*!zU z>-VWbBm3<;w~^=olUhQrNqD)9RYZ$AL;M|z<9_|91LaoC5sQsMrKzW%ncDy^S#70^ z6W)Qg>f26Ak968Y0X5Bdk`6$)kTn8L-FS=WmsC!R8SXeZIIUu)x){=`!f zeFnXKuM|K`ysxHXTsAsS!QNyi{N7-Ap-y*K|f>Sd>K zBS+o8SdAGco{O`WZP-}pNk)*@cQx66yf7E9%yT%`5zCQ^Ejx~B7eTDaldXA zZoC=O$9A}R$}6Wak*MtZsGDPJQ+p$Nknh&!k;`TmzjEhU)Y`B3;w|t(*}pc(J)b!vSv^ zd(o4XtXX=>52x!(3;>yhJ#3?1M!2Rn!0_ewe3IQ<_bk&KLeuVo z6S~t@h;gV^F~J$jftX1FdXtOx%_oakIT_6X_3rt|zyC~PJj}BE+ao~#{fJd9$e7?o z=v$q@LUphquBpQXeG8dmJc0iO&lUDuqf;$XlPpB563af>s-rYzytpEGr}B4su*y!6 zuWRaoW8|IN+Jn;WZ?D(~57nJ}Er`~2F*uobGr_Qr*TDWe<+{HeXwrhX=-S{pmpMvV z$u8gI5Iz^eYZuO-M@d&lZ(xz0T2fTLFz5(QJ9H2hP4h^qoh_&x(93Lv+>FqC_>5SCuTl8sFUV%mRD{AL2ykVp)oD<8SC9%Y9gi z8w%64o+!4R)Wn0zAII!4g(fc3+%UJf6~{B}2&s9zx3Xt(3Ez{nBB~nF6rYxqgAR0- zmnIFspV7M`Hpo#Of4C7RA`u}c@6n3w(<4OQu-jcn*r=R4tP*~=-oZ_aUS}CbRm7+m zj!0tNZqXOIaE)YFwux$|=*=cdCWurgJTa4^l}k`5%0+>txxG1KqTTM$X@kVJ}=M(cWb?_+5@jk|It@t@2Ei(X~dB($x9!5;_IVGA!DZ!;^zT ze1>v_S;V6k80m~Rxy+**A|eu9y7)?f5!D96qbb+YCwqb#wZ~k2Z$tLMx%Nkq{GkA1 zH3x!DrE{KXLKU%8^)NHJeMLf2_yg}WwQt(h3qRn_Y=^qN=_FJV!m zNI%pGRk6rRHzN$Ok8)2ae39R90jY?-Vp?vL#8@F1(kiA$HW&jx3QQ4RhPSrFZIC6? z#Ls;#XvN>!lqGWgC){K8a=Ju}aHyDMafQX3nR_%jdZWw{o1+ed@QoTW*=DQNNAw;L8#D`rVOZ}v@=3T_udX{3P4WtV_zqwIv=6PHyo!l@C+ z@vvq+7tNeC{%Ks4PBPM^ikP+Y)YvG1*hU$0dj{^L?`F_eq7a5K32(nfF~JDrDDwJy z5Q`A}*Z5EHAoo89DR48!e8zw61&j~Q0dB>g_*N_SaanN*%g2jQY zD+;zQ0&@@Qs{$PNns*x}6Ra_770u#fpA3e?;ry!%a4;7UTEz!tD@OP2G&d2>HY(JS zz)cU%=sBKW)nIvO)na*xJBrYH#l99sU@vtrl1p*r?pNs3n4sMqZpJk7T!Fz@LIj;k zZZ*JuTj@0@EOaL90@B}SAe^B?xlN$zzR%f!r~V|yajhM)c_~_i*svEV9Fas+5Jl$T zmg6>7Alf0V7l?m}QmSkdGcFi4v;iu}?v=kgmQq~nNfeQXdY33*%Mj^@5Jh3rq-a5` zQkEfBshLisVXeDgBf*2?KZ&wvxmm^pc^WA08;~v|KF+!{^a*rTaPN`0LUrK^O$b&K ztmpfKJ$s1)_Uxn}*t4Wz6~_tjzh6Urfg+E8c@0G($itteMEucI(fd3xrBvQCQ|et} zl$2dp%m>F@c@X;>pRDpiMD5{v0D1H5$l7{H~vx{v44jR{i_SQ%|PxI;gl}%ZlK}-9bGQ;#uSU&V& z+k*6WJ3rL=?Ze`g(%Jn7#UmQ!3uu6xQpJ78*pDo{$Xb-$lB1JyQ)z7!Ekn;w**XPKJPE)i zDHodg<;9v02-T@7%CHv^{CP+;3EREO!PrPJbU--5o+Zd%1lz;T2W$`McQwk1AK#K| zMSTtVuJt}?k?ZYA%EuCZImcGve)b^lfOHhwcC_n>Dl(*i=?((q#nmz{g4(uKTUJW| z*wmeSUa8qRv7&C|0W1~9hk-kZtLOqjeWZBB*ndr41}{d%i7w`UBXhQ7^!b#PIxnnh~<4xbf&R0`tKgIXFgW?9L8Wf*14CSfjKZXym=|{{Iyk+pGxKM&8 zA#g^W=R%_YworscT52m6iP_=T=Q=G=k5cEaMxwa>#g*(9RtKBWJN=Xwfn2O)x`F^! z)VoGt>20b-T23PxAITeKcwWjiLADMVQ9$Vohr0o1AkBdj5a{i*Kc8-{H7sfnwiK59 zvd>PA7I9JNm*D$dBQfwtdck4jn`rdD}{8(56&loD)J*qC|7&Pu@e0b>IG9xpMa+*!nY^c#(hQ&r8amrA`OU4rGkQZWt?FvMLZ+G(XBXR+6QKF- z6Z&qtR^~o7Dn9)^lpmBrJUJ}Xw>Jc_Jn@#WPy~SV^^Hl)t%URaeQ$jDYTFdU$?D7b z!|Ky%8DE=+$IEd(U(53i2eHoc?eJn0Vj%~y%1P_P_EGQPNl=|ltNYF1C^)U}tj)^v z>E7gMTXF02{bD$9zrQS#uDt?W`6KgrJq-@l5x<;v^1TdloNW7advGqbd0wpYl|HAO zzW~|W0v8?_USp0&mW;UUN4i|Z&OY7crg=+gT$Ch-0L7~p>QNmd-HXF2gzVK1oet#* zNd9HtaAqWTZ!3p0J`LJ2;)cVewOYamaTE=zF`4;KPkb{?m;tc#9&dujkc7KUE4|1& zOj+p;?i{#3#Z|d6CL*KivEm{d2fHz8I03=^roWU;Roy(XegZGkrNMGsn$?5-y4k2U>YDd|mU%YZ>z&p4%GLv!@F3XJ!CO#vTVuzX~&8wfp(F6Sk1x5h;1M10`Qhr z2`}!6!@S$U+bQEM-^Gy+y3^HW9c(wOB|yZQE++`dH)zgCo4Jy=pq&qU(%e|*aMfk% zdV6z3L)9b7m?CB3lU0cA({EZ!KaG@GBg%+7B=BV{L=)5ysIS%~5&BHWHlo0b#QWZS8SG`eok_ZTnIbUs7+ zx_+_fAo%Ccp2d~2CDG%4PXfjxuzKbjavhj{nJlUCS#j187vL@oGWfa>CiG02f5GFJ z&Pc`NL@et;8pxu7Intj%fFD^~^t+dMFfu?*^r|^D5*~Yb09I18zdUyFIf2e_3Hr>- ztud1t62r<=Q)lwadETu#m!{6+;=waZVrY+2=XZ8-Vr_cH?~BR^0QBU1#86hP#gxTu z$hO5B^f9liq%BMNn!!EO-#Zd}Q64Ejsq?oos1~f3X!rXq<&((Hr$=U$&{DssaN%NB zk$AeVAVx+NCEEJzeg9p~s74>GKVQquT&?p>i@xbzlY9fArJDNP)$4hR4JyGm6NqsL zXsQ_r&hUr?9?6eaih%d;v!a-;)Y4-J;)lMAEB|nYmlNRWgrnX>|K)VQ1{k5lz3F|!gL*(K{EL|;xV>DR*0j@_ z`9u-(i^lzZLz(+?R!Ni7m+Y;eGlII`Ge$}>0bJ9S$AwPglE4(*?ZVyx^%Y19T#;uDQXD+ySDy>u|eZQhpz`U zon}(&mK!a*65V?Vo-Lm)VmPY$4bLuTiCFIoZ=HEfl^v{DG_&NB8zv1!>j@Z=t0W2Q zkd+a9_m6cX-vbSXw^iCzSqVxKuW##}tvNZod-zsS+ z3zDumsMQhSwFSh6A0<9kqLb1th>kXH19VJ;rV3n_;V#25{0>_sMH-pE@cz!TsH>-bU zQcI=XJ_0@*V5B%o+7ON0aX^m*W#W%erh)rGanyUV`gA#%d;{Kkz8qd$Q<=IyTfX6YJX+=RyuEu=K{(gp z^SHlU>jh&GGQ+CZ%{kMHko)slr@svFfQ;2xaI>~GnGSeetZg*Kr+;DE1#b6}>-d0M z*4w>}x|1(0o)>R`mpbGhH^ZYRt*)PO=_m*LM3vg;2VTy%Qp;Wz{lTEN=Jr{psAK#9#Q+7X@g`vvg7WIs zc)TeI6d|x^z0nstivQt7_WKV%d;@uOxel@9+ey$9D zI*%@`3LYh{>)#2#+iB+xkAJ!SjBB#Tvtm6N5`vRUtZ}y!PFJzv5}$5-#s~7FnEs~_ ziyq)G#dCq{f!5Eg2ltk>`%5_QBBvmnLqs;Ilx#oq1EUGC_Zf4)a+*TYF;3E0_L06c2X@HeIKr8q2(4}J1@R#3Anus zR1#=OoKR9(?9Y2P>1d{~;S`(d(6K8452r4>8V=-X=?Z{)06BgYDRXS>7;NihBa zrj6r!KMqCTId_ce$;>9Wj(pLu_=6CFE|$4?na>f?8eqvvW-a3S?GZ;nPuOikZ7)eO zf0Oy12$dt*C*`B>pN0?o&7$B)Q!H8KO89%?gn68pq82~Q1kEZR`%6o^?6V~#66ss6 zdYkNdVs>w!T;bVv?iW9f1>Bz;#Ku$+z_s~93+)0A#YM>&@1Agx;hiLrf@pA&>2pG{ zL8bHAY_a)obFIq{!^9hh7I;SR7(T5_RH-k;z}31-LUG$H-%S)uc|^w{iKk9vTE2^# z{-xl1ge2P)>!LX;QD?yOopVypMXzI{Byrl$7&jrvod7xA`C9?x#2VC>HQJOOV799b zwx`O>MBp8njwP|oJARlSw1xG|J<`$l@fi-fV=ak80w25tmmav)A}`guSW+4f&lQ_S zBhB8eQENpr^ zHMQrrkepGbO+AF|h8TY_n2fv;1k%1*@B}XN5NJ`xE$LY-hRYv(N07(av3*+{Jt#&1 zkVPAwp+u(#H2GmPeN5&Qc$U2-MiYnUWnlXA>&;??mBjcjr!;oBuOhGTYI74`1sFQw z4?WR;{4y)0=W60E8i)3($>%b&zO|XU(2Q+Umc>`Wp!%B$asp_d~* zD4sK)KNN-){U{&FlDpQ1j(PPp$$a7h2R(HTDj@Bx;*}95hRPL-SViIBM{2drLMN;x z2@9nTrs*$lRjz1+1m6bzG2hr8?1%Y99Xuocs1~0=0hIuS8qY-uRfXpG#^8#lv`pwi zDr<!kMEA`bEp^UKlrS}G_;pa`Im zd)xqImwfhD&=pLhWv59M%KXIN<-I21A&g?dq*qXkzV7{UcVk*}$7>7Yc)wiHQzq(+ zP!Q&#b#|i5S`!S6>yFxs%tHHBGu~++u%-ATAtD*So#nQ#PZesf&qWq0f*_GOHpe-n~&eVhGY1IMY^3G^rZq9#We z+g-@3Ex^eO4^>7fP+-@+m+_{${%V~hyR3&j^@k+v@bfG_O6)5;xHtrK1UfubJ6~!% z*H}7EyqK^kKc=PJ*e59N%z~B4PHR)2b*0tg>+!GJoNcT*B;h+KJNv|XE(|mtLYhJv z{%dc>>OV}>wnM{{iZY>;xR(~xSuM8B3T%b% zY74>Oz1wgnMdw2GdEiEle0fl4|8aD{$_nj?&HovRjaEzHS7{MF!ZR@5o@_lg?T$qZ z$hnMKl0h$wzsvVIhJNaOFV6wDIBlYWs?zq}B7-Ywu*|XK$Ser(AIT%WO)kgmgNSPzsLTcHnKVA)% zU1O;#srv&xrsbZY<^<@amg>F=q%2!RnE{H3mHTIhEU`V7$r{2P!n~m{4NLT$hSKC{ zIyqMA$~ZHu=2vz^_dwG8ES>i*GW}k(bt5}<&DTPIPH&4p+y%hvpE%dqkRzE(v0%by z6SK=W*4=Y40{`_XWv(jA&03U-54rtlk<$^4)`Ke5j$s2nzD=Uy!h{Y_$W=V*iLmT_ zfAEGjwUO&-lnxr0b7|L7a#xp#!gW}Nz1to@+;-kiYuF^E|?mLK-w!VR;R>+s8` z1(jP~*RhLt9C}(n2dt))u!bQ{M(sco&(QWDo-`H=1#(B+&<9#;#vdGS!fXdkwT{kD z6<@L9mG4IRk@#Q}GzypNQBDvVcK#X_>iyDq4N1}~sNE}!*$*3_s!}w!)iQ#F<-D8O z|Fny4;eo3mrQqAt7#Hj=*P!Cf+Nj6Wz}n{zKTp@>TIT`)ODI2(E|&utAU_yq;QMml z|KR&OA=Cy2F3kU_>e}O(-rxB5yV)hFFf>g}ZZi}mF}JymBlk<4ILrM~%4I8xWsV)i z(mFUyN^C+)rN||+OiFb8Fmg?9Z zX?ol$f%i`}qT~t*9KYA4**;?zdG@yl3%d)a22Ax2!4;G9KJ`9OGgo(D%uL18yBaxd z&+N}wkzZA7c#cP|K>Sa6;+_;c&^ou3L%6oUy(v(sN|1iBY zXj4>R-0lQ>bqb8>3J$X0wbE@JF^as7WW3?>@ejP#vg`rEyvYw0AC)=Cu1lkJ9BzLpP6|n?7#cWt+d7iYcLw^&??O)32 zv-

Y>lwQKeKY4nHTxZUu_8CL#N$~nLdbS{DwOfwc}{vON24>lWP9X2%BhRX_^Ws zBQE5BP2el;o2x1*B^HGTKShe=g$es4xw15wF9D{Ny2nD&=b-xhp)G9(Iv=DQg@I&h zVf6M^JsvbiW!;7+ALkl*=f6D8?d`FMGE8O|4nBD)tN5!yz4;@jRuzQZD*dc8>;v21LP*%OQK&tj~&iU$oL>uR7T*CJ-*W!FWSm?WUpk| zqprf<T5Gb!h0nub-5B328DEqa6-RFQt81k0^RkJ0h-&8qycAS2`+A{u?_ z+`jdJYSbWgdd1NgaKEybU2v68tT{FkVhRtbBqnc-H@YJhQm59X(YK}(Lz8*rzn;l4uf|%=@#YboF=H+k^gApM z2A0!xFwIaoJlN}9h!srOgeEpk_vRavYSl6-eP`O$?WpS9%!EAT3 z$!ZwZEQ&L+4#k^vE3`}@F0ZZ|I`E(Qg#?5I>`At(c`;;`K(V=qPqh7bWjQpSY4WXHCi z#NPjWAdT+zu3eo}L`9kM6&lAfGZcx^|P4M=w!$slc)IYM3 zOyZKOOG{^;=%j+D_pDWadVc;zwRr*Oe8Tyh1c&nxQzJ*wwaPsP;;GGxPjQsxF;T(?e6J_xHV}lq>a?TkSvZ2Y}E2b zwxUGzIXK_avKJ4n%uS86uXK2$87TIc8@JR;2_p}cCBJ!rTti9twB!?2FMJL)Pp2{6 z;=7EDgGQ!vP9WCt0j~TAFVbbX9RrbhI@DCv0)7E@Va5JE+{1k;qu%R_Tqe%(7MWAF zXvU}iY~+=byG+onHNSS>#Z}MOHP*IBI@J66%kd0?_L^-1sWY5t1!2mS(DJ4c4O~0W z6AHt!dlsIsR~$hf8G$SS Date: Tue, 15 Oct 2024 12:23:13 +0100 Subject: [PATCH 024/112] NPUW: Function memory management (#27043) ### Details: - Optimize out temporary results (activations) when possible ### Tickets: - *ticket-id* --- .../src/plugin/npuw/compiled_model.cpp | 5 - .../src/plugin/npuw/compiled_model.hpp | 2 + .../plugin/npuw/just_sync_infer_request.cpp | 211 +++++++++++++++--- .../plugin/npuw/just_sync_infer_request.hpp | 64 +++++- 4 files changed, 238 insertions(+), 44 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 2fe90eb82c41bb..a312a806cac4bc 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -516,11 +516,6 @@ std::string ov::npuw::CompiledModel::global_mem_device() const { } std::string ov::npuw::CompiledModel::funcall_mem_device(const std::size_t idx) const { - // FIXME: currently we allocate intermediate tensors for EVERY submodel. - // It's not feasible to allocate them in L0 due to high memory consumption. - // Until we make such memory reusable, hard-coding those tensors to CPU. - return "CPU"; - // Force globally set device if set const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>(); if (!device_alloc.empty()) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp index 4152d08275ba6d..038c1bb176b029 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp @@ -46,6 +46,8 @@ class CompiledModel : public ov::ICompiledModel { // FIXME: This class has many friends.. friend class IBaseInferRequest; friend class JustInferRequest; + friend class MemAccessSim; + friend class FuncMemMgr; bool compile_for_success(std::size_t id); bool compile_for_device(std::size_t id, const std::string& device_to_try); diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp index fbbabf083bccd8..c4e2c3ee98b676 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp @@ -20,8 +20,173 @@ #include "util.hpp" #include "weights_bank.hpp" +ov::npuw::MemAccessSim::MemAccessSim(const std::shared_ptr& compiled_model) { + LOG_VERB("Running memory access simulation..."); + LOG_BLOCK(); + + // Initialize the read list + m_read_list.resize(compiled_model->m_compiled_submodels.size()); + + // Initialize read counters for tensors in the graph: + // 1. Interconnect + for (const auto& kvp : compiled_model->m_submodels_input_to_prev_output) { + const auto& read_to = kvp.first; // who reads + const auto& read_from = kvp.second; // reads what + + if (read_to == CompiledModel::NO_LINK || read_from == CompiledModel::NO_LINK) { + continue; + } + + // Record # of reads for this particular Source + m_remaining_reads[read_from]++; + + // Record a read request for this particular Subgraph (who reads the Source) + m_read_list[read_to.first].push_back(read_from); + } + // 2. Global model's outputs + for (auto&& read_from : compiled_model->m_outputs_to_submodels_outputs) { + m_remaining_reads[read_from]++; + } + + LOG_VERB("Done"); +} + +const ov::npuw::MemAccessSim::ReadList& ov::npuw::MemAccessSim::read_list(std::size_t idx) const { + return m_read_list.at(idx); +} + +std::size_t ov::npuw::MemAccessSim::remaining_reads(const LinkFrom& from) { + return m_remaining_reads.at(from); +} + +void ov::npuw::MemAccessSim::register_read(const LinkFrom& from) { + m_remaining_reads.at(from)--; +} + +ov::npuw::FuncMemMgr::FuncMemMgr(const std::shared_ptr& compiled_model) + : m_sim(compiled_model), + m_model(compiled_model) {} + +void ov::npuw::FuncMemMgr::set_alloc(AllocFcn&& fcn) { + m_alloc = std::move(fcn); +} + +void ov::npuw::FuncMemMgr::assign_memory() { + LOG_VERB("Assigning function memory..."); + LOG_BLOCK(); + + const auto num_submodels = m_model->m_compiled_submodels.size(); + + // Walk over the subgraphs, pre-allocate and pre-assign tensors to the subgraphs + // outputs. + for (std::size_t idx = 0u; idx < num_submodels; idx++) { + LOG_VERB("Process Subgraph[" << idx << "]"); + LOG_BLOCK(); + const auto& comp_model_desc = m_model->m_compiled_submodels[idx]; + if (!comp_model_desc.compiled_model && !comp_model_desc.replaced_by) { + // no model & no funcall - optimized out, do nothing + continue; + } + + // Simulate subgraph execution: poll its input list first + const auto& read_list = m_sim.read_list(idx); + + // Now, get the outputs for the subgraph. If it is "regular", there's + // nothing to do - this subgraph owns its outputs on its own. + // If it is a function, though - look up in the function's memory storage. + if (comp_model_desc.replaced_by) { + const auto real_idx = comp_model_desc.replaced_by.value(); + const auto& proto_comp_model_desc = m_model->m_compiled_submodels[real_idx]; + + const auto num_outs = proto_comp_model_desc.compiled_model->outputs().size(); + for (std::size_t out_idx = 0u; out_idx < num_outs; out_idx++) { + const LinkFrom this_out = LinkFrom{idx, out_idx}; + assign(this_out); + } + } + + // Here happens the imaginary execution... Hocus pocus, done - that's a + // simulation after all + // After the execution, mark that the read_list was read. + for (auto&& from : read_list) { + m_sim.register_read(from); + } + LOG_VERB("Done"); + } + + // Report memory residency + for (auto&& m : m_memory) { + LOG_VERB("Function " << m.first.first << "/out port " << m.first.second << " : maximum memory residency " + << m.second.size() << " tensor(s)"); + } + + LOG_VERB("Done"); +} + +void ov::npuw::FuncMemMgr::assign(const LinkFrom& from) { + // This method is the center of the function memory management. + // The logic is simple: + // - Look for an output tensor to reuse + // - If there's one, assign it to this allocation + // - If there's none, allocate a new tensor + // - How a tensor to reuse is piced: + // 1. It should exist + // 2. It's "remaining reads" count should be 0 (all planned reads + // happened at this point). + // The tensor storage is organized like this: + // - Function: Here we use .replaced_by as a function identifier; taken from `from` + // - Output index: taken from `from` + // - A vector of resident tensors + + LOG_VERB("Assinging tensor for Subgraph[" << from.first << "]/" << from.second << "..."); + LOG_BLOCK(); + + const auto& comp_model_desc = m_model->m_compiled_submodels[from.first]; + NPUW_ASSERT(comp_model_desc.replaced_by.has_value()); + + const auto real_idx = comp_model_desc.replaced_by.value(); + + FO func_output = {real_idx, from.second}; + auto& assigned_memory = m_memory[func_output]; + auto asgn_iter = std::find_if(assigned_memory.begin(), assigned_memory.end(), [&](Assignment& a) { + return m_sim.remaining_reads(a.from) == 0u; + }); + if (asgn_iter != assigned_memory.end()) { + // Reassign this memory slot to the new "from" + asgn_iter->from = from; + m_table[from] = asgn_iter->ptr; + } else { + // No free space at this point - allocate a new tensor + const auto& proto_comp_model_desc = m_model->m_compiled_submodels[real_idx]; + const auto& proto_comp_model = proto_comp_model_desc.compiled_model; + + const auto& oport = proto_comp_model->outputs()[from.second]; + ov::Shape oshape = oport.get_shape(); + + if (proto_comp_model_desc.spatial) { + oshape[proto_comp_model_desc.spatial->out_dim] = proto_comp_model_desc.spatial->range; + } + const auto& device = m_model->funcall_mem_device(real_idx); + TensorPtr new_tensor = m_alloc(oport.get_element_type(), oshape, device); + NPUW_ASSERT(new_tensor); + + assigned_memory.push_back(Assignment{new_tensor, from}); + m_table[from] = new_tensor; + } + LOG_VERB("Done"); +} + +ov::npuw::TensorPtr ov::npuw::FuncMemMgr::get_tensor(const LinkFrom& from) { + return m_table.at(from); +} + ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr& compiled_model) - : IBaseInferRequest(compiled_model) { + : IBaseInferRequest(compiled_model), + m_func_mem_mgr(compiled_model) { + using namespace std::placeholders; + m_func_mem_mgr.set_alloc(std::bind(&JustInferRequest::allocMem, this, _1, _2, _3)); + m_func_mem_mgr.assign_memory(); + m_use_function_pipelining = m_npuw_model->m_cfg.get<::intel_npu::NPUW_FUNCALL_ASYNC>(); if (m_use_function_pipelining) { LOG_WARN("Function call pipelining is enabled for " << m_npuw_model->m_name @@ -67,27 +232,20 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrparams) { const auto& iport = proto_comp_model_desc.compiled_model->inputs()[p.idx]; m_spatial_io[real_idx].input_tails[p.idx] = - allocTensor(iport, m_npuw_model->funcall_mem_device(real_idx)); + allocOut(iport, m_npuw_model->funcall_mem_device(real_idx)); } const auto num_outs = proto_comp_model_desc.compiled_model->outputs().size(); for (std::size_t out_idx = 0u; out_idx < num_outs; out_idx++) { const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx]; m_spatial_io[real_idx].output_tails[out_idx] = - allocTensor(oport, m_npuw_model->funcall_mem_device(real_idx)); + allocOut(oport, m_npuw_model->funcall_mem_device(real_idx)); } } } // if(spatial) for (size_t out_idx = 0; out_idx < num_outputs; out_idx++) { - const auto& port = proto_comp_model->outputs()[out_idx]; - ov::Shape shape = port.get_shape(); - - // If the subgraph is spatial, promote the output size to the full vector size - if (proto_comp_model_desc.spatial) { - shape[proto_comp_model_desc.spatial->out_dim] = proto_comp_model_desc.spatial->range; - } - m_funcall_result[LinkFrom{i, out_idx}] = - allocTensor(port.get_element_type(), shape, m_npuw_model->funcall_mem_device(real_idx)); + const auto from = LinkFrom{i, out_idx}; + m_funcall_result[from] = m_func_mem_mgr.get_tensor(from); } if (real_idx != i) { // If this function call is NOT the function body, do nothing here - the original @@ -152,7 +310,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrinputs().size(); i++) { const auto& port = m_npuw_model->inputs()[i]; - ov::SoPtr allocated = allocTensor(port, m_npuw_model->global_mem_device()); + ov::SoPtr allocated = allocOut(port, m_npuw_model->global_mem_device()); m_input_tensors.push_back(allocated); m_input_allocated.insert(allocated->data()); m_port_to_tensor[port] = TensorStorage{m_input_tensors.back(), true}; @@ -174,7 +332,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrsecond // Function calls have their tensors allocated, so just use one - : allocTensor(port, m_npuw_model->global_mem_device()); + : allocOut(port, m_npuw_model->global_mem_device()); m_output_tensors.push_back(tensor); m_port_to_tensor[port] = TensorStorage{tensor, true}; @@ -920,27 +1078,22 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool } // if (replaced_by) } -ov::SoPtr ov::npuw::JustInferRequest::allocTensor(const ov::element::Type type, - const ov::Shape& shape, - const std::string& device) { +ov::npuw::TensorPtr ov::npuw::JustInferRequest::allocMem(const ov::element::Type type, + const ov::Shape& shape, + const std::string& device) { if (device == "CPU" || ov::shape_size(shape) == 0) { return ov::get_tensor_impl(ov::Tensor(type, shape)); } - ov::SoPtr remote_tensor; - ov::Tensor allocated_tensor; - { - std::lock_guard guard(m_alloc_mutex); - m_remote_ctx = m_npuw_model->get_plugin()->get_core()->get_default_context(device)._ptr; - remote_tensor = m_remote_ctx->create_host_tensor(type, shape); - allocated_tensor = ov::make_tensor(remote_tensor); - } - return ov::get_tensor_impl(allocated_tensor); + std::lock_guard guard(m_alloc_mutex); + auto remote_ctx = m_npuw_model->get_plugin()->get_core()->get_default_context(device)._ptr; + auto remote_tensor = remote_ctx->create_host_tensor(type, shape); + return ov::get_tensor_impl(ov::make_tensor(remote_tensor)); } -ov::SoPtr ov::npuw::JustInferRequest::allocTensor(const ov::Output& node, - const std::string& device) { - return allocTensor(node.get_element_type(), node.get_shape(), device); +ov::npuw::TensorPtr ov::npuw::JustInferRequest::allocOut(const ov::Output& node, + const std::string& device) { + return allocMem(node.get_element_type(), node.get_shape(), device); } void ov::npuw::JustInferRequest::subscribe_subrequest(std::size_t idx, Completed cb) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp index 7335b54c30062e..88838d8b39d75f 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp @@ -22,6 +22,56 @@ namespace npuw { class CompiledModel; class AsyncInferRequest; +using LinkFrom = std::pair; // FIXME: This is a third, if not fourth, definitiion of such structure + +using TensorPtr = ov::SoPtr; + +class MemAccessSim { +public: + explicit MemAccessSim(const std::shared_ptr& compiled_model); + + using ReadList = std::list; + const ReadList& read_list(std::size_t idx) const; + + std::size_t remaining_reads(const LinkFrom& from); + void register_read(const LinkFrom& from); + +private: + std::map m_remaining_reads; + std::vector m_read_list; +}; + +class FuncMemMgr { + MemAccessSim m_sim; + std::shared_ptr m_model; + + void assign(const LinkFrom& from); + + // Function ID -> Output port number + using FO = std::pair; + struct Assignment { + TensorPtr ptr; + LinkFrom from; + }; + std::map> m_memory; // Dynamic assignment table + std::map m_table; // Static allocation/assignment table + +public: + explicit FuncMemMgr(const std::shared_ptr& compiled_model); + + using AllocFcn = std::function; + void set_alloc(AllocFcn&& fcn); + void assign_memory(); + + TensorPtr get_tensor(const LinkFrom& from); + +private: + AllocFcn m_alloc; +}; + class JustInferRequest final : public IBaseInferRequest { public: explicit JustInferRequest(const std::shared_ptr& compiled_model); @@ -64,15 +114,11 @@ class JustInferRequest final : public IBaseInferRequest { void connect_subrequests(); void recreate_subrequests(std::size_t idx); - ov::SoPtr allocTensor(const ov::element::Type type, const ov::Shape& shape, const std::string& device); - ov::SoPtr allocTensor(const ov::Output& node, const std::string& device); + TensorPtr allocMem(const ov::element::Type type, const ov::Shape& shape, const std::string& device); + TensorPtr allocOut(const ov::Output& node, const std::string& device); - using LinkFrom = std::pair; // FIXME: This is a third, if not fourth, definitiion of such structure - using TensorPtr = ov::SoPtr; - std::map m_funcall_result; + FuncMemMgr m_func_mem_mgr; // Owns memory + std::map m_funcall_result; // Provides a convenient link bool is_pipelined(std::size_t idx) const; bool m_use_function_pipelining = false; @@ -103,8 +149,6 @@ class JustInferRequest final : public IBaseInferRequest { std::vector m_subrequests_gio; std::mutex m_alloc_mutex; - std::shared_ptr m_remote_ctx = nullptr; - std::unordered_set m_input_allocated; }; From 341de8db8802b4068443ca6106c6a3d06fa5b866 Mon Sep 17 00:00:00 2001 From: Artemy Skrebkov Date: Tue, 15 Oct 2024 13:29:15 +0100 Subject: [PATCH 025/112] Use find_library to find clang on linux (#27040) ### Details: **Problem:** When a project includes both OpenVINO (via the OpenVINO developer package) and LLVM as an in-tree dependency, it can lead to conflicts between different versions of LLVM. **Solution:** * One way to resolve this is by including clang directly using `find_library`, rather than relying on `find_package`. * This approach avoids the implicit call to `find_package(LLVM)` that occurs when using `find_package(CLANG)`, thus preventing version conflicts. ### Tickets: - E-78260 --------- Co-authored-by: Ilya Lavrenov --- .../ncc_naming_style/ncc_naming_style.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake index d20582b03cb9fc..67a58d56e901e2 100644 --- a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake +++ b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake @@ -80,11 +80,11 @@ if(ENABLE_NCC_STYLE) set(CMAKE_FIND_LIBRARY_PREFIXES ${_old_CMAKE_FIND_LIBRARY_PREFIXES}) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_old_CMAKE_FIND_LIBRARY_SUFFIXES}) else() - find_host_package(Clang QUIET) - endif() - - if(Clang_FOUND AND TARGET libclang) - get_target_property(libclang_location libclang LOCATION) + find_host_library(libclang_location + NAMES clang libclang libclang-${clang_version} libclang-${clang_version}.so libclang-${clang_version}.so.1 + PATHS /usr/lib /usr/local/lib /usr/lib/llvm-${clang_version}/lib /usr/lib/x86_64-linux-gnu + NO_DEFAULT_PATH + NO_CMAKE_FIND_ROOT_PATH) endif() if(NOT libclang_location) From 485e5802ec8bf402dc27db1e60f782cede28568c Mon Sep 17 00:00:00 2001 From: "Anastasiya(Asya) Pronina" Date: Tue, 15 Oct 2024 15:11:31 +0200 Subject: [PATCH 026/112] Fixed tests for NPUW (#27060) ### Details: - *Fixed tests for changes in FUNCALL_ASYNC and calls to get_default_method()* ### Tickets: - *EISW-143057* - *CVS-154787* --- .../behavior/npuw/behavior_tests.cpp | 34 ++++++++----------- .../behavior/npuw/mocks/mock_plugins.cpp | 2 +- .../behavior/npuw/mocks/mock_plugins.hpp | 14 ++++++++ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.cpp b/src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.cpp index 093e3235afb78f..b55d39bead49bb 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.cpp @@ -489,30 +489,24 @@ TEST_F(BehaviorTestsNPUWOnlinePartitioning, FoldingAndPipelining) { EXPECT_COMPILE_MODEL(mock_cpu, TIMES(0)); } - for (int i = 0; i < 3; i++) { - // Here we will create 2 infer requests per model, - // so `create_sync_infer_request()` should be called twice - // per model: - EXPECT_CREATE_SYNC_INFER_REQ(mock_npu, MODEL(i), TIMES(2)); - } - - // 1st model 1st infer request is called once -- head - EXPECT_INFER_FOR(mock_npu, MODEL(0), INFER_REQ(0), TIMES(1)); - // 1st model 2nd infer request is never called, - // it is not a function and is not repeated - EXPECT_INFER_FOR(mock_npu, MODEL(0), INFER_REQ(1), TIMES(0)); + // 1 infer request for head: + EXPECT_CREATE_SYNC_INFER_REQ(mock_npu, MODEL(0), TIMES(1)); + // 2 infer requests for function, `create_sync_infer_request()` + // should be called twice here: + EXPECT_CREATE_SYNC_INFER_REQ(mock_npu, MODEL(1), TIMES(2)); + // 1 infer request for tail: + EXPECT_CREATE_SYNC_INFER_REQ(mock_npu, MODEL(2), TIMES(1)); + + // Head's infer request is called once: + EXPECT_INFER(mock_npu, MODEL(0), TIMES(1)); - // Repeated block - // 2nd model 1st infer request is called 5 times + // Repeated block's model 1st infer request is called 5 times: EXPECT_INFER_FOR(mock_npu, MODEL(1), INFER_REQ(0), TIMES(5)); - // 2nd model 2nd infer request (brother of 1st one) is called 5 times + // Repeated block's model 2nd infer request (brother of 1st one) is called 5 times: EXPECT_INFER_FOR(mock_npu, MODEL(1), INFER_REQ(1), TIMES(5)); - // 3rd model 1st infer request is called once -- tail - EXPECT_INFER_FOR(mock_npu, MODEL(2), INFER_REQ(0), TIMES(1)); - // 3rd model 2nd infer request is never called, - // it is not a function and is not repeated - EXPECT_INFER_FOR(mock_npu, MODEL(2), INFER_REQ(1), TIMES(0)); + // Tail's infer request is called once: + EXPECT_INFER(mock_npu, MODEL(2), TIMES(1)); // Register mock objects as plugins in OpenVINO: register_mock_plugins_in_ov(); diff --git a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp index ed4bf72a945f79..4c6c470d81e47c 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp @@ -245,7 +245,7 @@ void MockPluginBase::create_implementation() { }); ON_CALL(*this, get_default_context) .WillByDefault([](const ov::AnyMap& remote_properties) -> ov::SoPtr { - OPENVINO_NOT_IMPLEMENTED; + return std::make_shared(device_name); }); ON_CALL(*this, import_model(testing::_, testing::_)) .WillByDefault([](std::istream& model, const ov::AnyMap& properties) diff --git a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp index 4d720796c6abbf..2cad81b6c1344d 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp @@ -23,6 +23,20 @@ namespace ov { namespace npuw { namespace tests { +class MockRemoteContext : public ov::IRemoteContext { + std::string m_name; + +public: + MockRemoteContext(std::string name) : m_name(std::move(name)) {} + const std::string& get_device_name() const override { + return m_name; + } + MOCK_METHOD(ov::SoPtr, + create_tensor, + (const ov::element::Type&, const ov::Shape&, const ov::AnyMap&)); + MOCK_METHOD(const ov::AnyMap&, get_property, (), (const)); +}; + class MockCompiledModelBase; using MockCompiledModel = testing::NiceMock; From d52cf4a7f854c506b4665c0e04f18e35d495618e Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Tue, 15 Oct 2024 17:18:12 +0200 Subject: [PATCH 027/112] Update numpy in tests reqs (#27061) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- tests/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/constraints.txt b/tests/constraints.txt index 053e9e93855aba..616aea79c82153 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -1,4 +1,4 @@ -numpy>=1.16.6,<1.27 +numpy>=1.16.6,<2.1.0 attrs==23.2.0 distro==1.9.0 h5py>=3.1.0,<3.12.0 From c6801aacdeb87ddf668290d3bd4e7a863f4b88af Mon Sep 17 00:00:00 2001 From: Shaojun_Yao Date: Tue, 15 Oct 2024 23:20:17 +0800 Subject: [PATCH 028/112] [NPUW] extend DQ & PMM processing and make reduceSum not to keep axis (#26779) ### Details: - extend DQ and PMM to support more patterns. e.g. fp16 matmuls - Make reduceSum not to keep axis because then it will convert to poolings in compiler. Otherwise reduceSum will convert to the convolution which is less efficient than poolings. ### Tickets: - E-140570 --------- Co-authored-by: Dmitry Matveev --- .../src/plugin/npuw/partitioning/patterns/opt.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index c9a162421fe243..26b24a15509a4f 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -335,7 +335,7 @@ DQMatMulGQ2i::DQMatMulGQ2i(Context::Ref ctx) { auto qcvtw = opp::wrap_type({qweight}); auto qmuls = opp::wrap_type({qcvtw, qcoeff}); auto qreshp = opp::wrap_type({qmuls, opp::any_input()}); - auto qcvtr = opp::wrap_type({qreshp}); + auto qcvtr = opp::optional({qreshp->output(0)}); auto qmmi = opp::any_input(); auto qmm = opp::wrap_type({qmmi, qcvtr}); @@ -409,13 +409,18 @@ DQMatMulGQ2i::DQMatMulGQ2i(Context::Ref ctx) { auto rshp_ccat = std::make_shared(scaled, rshp_ccat_c, false); auto reduce_axis = std::make_shared(ov::element::i32, ov::Shape{}, 1); - auto reduce = std::make_shared(rshp_ccat, reduce_axis, true); + // Make reduceSum not to keep axis because then it will convert to poolings in compiler. + // Otherwise reduceSum will convert to the convolution which is less efficient than poolings. + auto reduce = std::make_shared(rshp_ccat, reduce_axis, false); auto rshp_out_c = std::make_shared(ov::element::i32, ov::Shape{3}, out_shape); auto rshp_out = std::make_shared(reduce, rshp_out_c, false); - // Convert the result to f32 to maintain the graph contracts. FIXME should be avoided - auto out = std::make_shared(rshp_out, ov::element::f32); + // Convert the result to f32 to maintain the graph contracts if required. + std::shared_ptr out = rshp_out; + if (matched_matmul->get_element_type() == ov::element::f32) { + out = std::make_shared(rshp_out, ov::element::f32); + } // Now.. Reconnect the matmul readers to the new output (reducesum) for (auto&& r : matched_matmul->output(0).get_target_inputs()) { @@ -752,7 +757,7 @@ void mergeParallelMatMuls(const std::shared_ptr& m, Context& ctx) { auto new_cvt = std::make_shared(new_w, new_s->get_element_type()); std::shared_ptr new_mul = std::make_shared(new_cvt, new_s); - if (new_s->get_element_type() == ov::element::f16) { + if ((new_s->get_element_type() == ov::element::f16) && (orig_multiply.get_element_type() == ov::element::f32)) { new_mul = std::make_shared(new_mul, ov::element::f32); } auto new_w_shape = new_w->get_shape(); From 4afbcbd75583201efce09061667d2ddad6c98d6d Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Tue, 15 Oct 2024 21:09:29 +0100 Subject: [PATCH 029/112] NPUW: Enable PMM for prefill by default (#27057) ### Details: - Aligns memory format between prefill and kvcache - Recommended to be disabled by default when sharing is in place (e.g, when DQ is applied to both models) ### Tickets: - E-143367 --- .../intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index 26b24a15509a4f..077fb6d6660132 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -695,11 +695,6 @@ DQParMMGQ::DQParMMGQ(Context::Ref ctx) { return false; } - if (qmmi_shape[1] != 1 && !ctx.get().is_spatial) { - // For non 1-token cases, do transformation if and only if and only if the block is spatial - return false; - } - if (!matmul->get_transpose_a() && !matmul->get_transpose_b()) { ctx.get().register_parallel_matmul(node_to_output.at(qmmi), 2, Context::DQParMM{w_param, s_param, matmul}); } else if (!matmul->get_transpose_a() && matmul->get_transpose_b()) { From 0663325041f1e42ef2b5ec5bb982c330647d65a6 Mon Sep 17 00:00:00 2001 From: "Anastasiya(Asya) Pronina" Date: Wed, 16 Oct 2024 04:18:02 +0200 Subject: [PATCH 030/112] [NPUW] Polished NPUW tests fix and added NPUW tests paths to labeler (#27072) ### Details: - *Added comments on test infrastructure changes* - *Added tests paths to labeler* ### Tickets: - *EISW-143057* - *[CVS-154787](https://jira.devtools.intel.com/browse/CVS-154787)* --- .github/labeler.yml | 2 ++ .../tests/functional/behavior/npuw/mocks/mock_plugins.cpp | 1 + .../tests/functional/behavior/npuw/mocks/mock_plugins.hpp | 3 +++ 3 files changed, 6 insertions(+) diff --git a/.github/labeler.yml b/.github/labeler.yml index 5421d669ed224f..daa5375b175bd3 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -76,6 +76,8 @@ 'category: NPUW': - 'src/plugins/intel_npu/src/plugin/npuw/**/*' +- 'src/plugins/intel_npu/tests/functional/behavior/npuw/**/*' +- 'src/plugins/intel_npu/tests/unit/behavior/npuw/**/*' 'category: HETERO': - 'src/plugins/hetero/**/*' diff --git a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp index 4c6c470d81e47c..950d80b279324f 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.cpp @@ -243,6 +243,7 @@ void MockPluginBase::create_implementation() { .WillByDefault([](const ov::AnyMap& remote_properties) -> ov::SoPtr { OPENVINO_NOT_IMPLEMENTED; }); + // This method is utilized for remote tensor allocation in NPUW JustInferRequest and Weight bank. ON_CALL(*this, get_default_context) .WillByDefault([](const ov::AnyMap& remote_properties) -> ov::SoPtr { return std::make_shared(device_name); diff --git a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp index 2cad81b6c1344d..e8f9e134fcb324 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npuw/mocks/mock_plugins.hpp @@ -23,6 +23,9 @@ namespace ov { namespace npuw { namespace tests { +// Need for remote tensor allocation in NPUW JustInferRequest and Weight bank. +// They utilize "create_host_tensor()" method. +// TODO: Mock "create_host_tensor()" method and add tests for it. class MockRemoteContext : public ov::IRemoteContext { std::string m_name; From 58073cad59fec5299657e2a79fc2a549b45683a3 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wysocki Date: Wed, 16 Oct 2024 06:38:42 +0200 Subject: [PATCH 031/112] Shape inference improvements for `SearchSorted-15` (#26980) ### Details: - Based on branch https://github.com/openvinotoolkit/openvino/pull/26904 - Follow up with shape inference improvements - Allow scalar values input - Expand shape validation - Add CPU shape infer tests ### Tickets: - CVS-154368 --------- Co-authored-by: Piotr Kowalczyk Co-authored-by: Michal Lukaszewski Co-authored-by: Pawel Raasz --- .../include/openvino/op/search_sorted.hpp | 2 - .../include/search_sorted_shape_inference.hpp | 42 ++++--- src/core/src/op/search_sorted.cpp | 27 +---- src/core/tests/type_prop/search_sorted.cpp | 35 +++++- .../src/shape_inference/shape_inference.cpp | 2 + .../search_sorted_shape_inference_test.cpp | 114 ++++++++++++++++++ 6 files changed, 178 insertions(+), 44 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/search_sorted_shape_inference_test.cpp diff --git a/src/core/include/openvino/op/search_sorted.hpp b/src/core/include/openvino/op/search_sorted.hpp index 78650942ee8f0f..c370ba46b2f182 100644 --- a/src/core/include/openvino/op/search_sorted.hpp +++ b/src/core/include/openvino/op/search_sorted.hpp @@ -36,8 +36,6 @@ class OPENVINO_API SearchSorted : public Op { m_right_mode = right_mode; } - bool validate() const; - private: bool m_right_mode{}; }; diff --git a/src/core/shape_inference/include/search_sorted_shape_inference.hpp b/src/core/shape_inference/include/search_sorted_shape_inference.hpp index 7ea0598cffbc87..4b9d888891e835 100644 --- a/src/core/shape_inference/include/search_sorted_shape_inference.hpp +++ b/src/core/shape_inference/include/search_sorted_shape_inference.hpp @@ -12,28 +12,40 @@ namespace op { namespace v15 { template > std::vector shape_infer(const SearchSorted* op, const std::vector& input_shapes) { - // [HACK]: By convention, shape_infer should also perform node validation.. - op->validate(); const auto& sorted_shape = input_shapes[0]; const auto& values_shape = input_shapes[1]; + const auto is_sorted_rank_static = sorted_shape.rank().is_static(); + const auto is_values_rank_static = values_shape.rank().is_static(); - auto output_shape = values_shape; - - // 1. If we know that the sorted sequence is 1D, than output shape can be anything. - if (sorted_shape.rank().is_static() && sorted_shape.rank().get_length() == 1) { - return {std::move(output_shape)}; + if (!is_sorted_rank_static || sorted_shape.size() == 1) { + // If the sorted sequence is 1D, then any shape of the values input is allowed. + // The shape of the output is the same as the shape of the values. + return {values_shape}; } - // 2. ND tensor case or rank not known. - auto sorted_shape_last_dynamic = sorted_shape; - if (sorted_shape.rank().is_static()) { - sorted_shape_last_dynamic[sorted_shape.rank().get_length() - 1] = Dimension::dynamic(); + const auto sorted_in_rank = sorted_shape.size(); + NODE_SHAPE_INFER_CHECK(op, input_shapes, sorted_in_rank > 0, "The sorted sequence input cannot be a scalar."); + + TRShape output_shape; + if (!is_values_rank_static) { + output_shape = sorted_shape; + output_shape[sorted_in_rank - 1] = Dimension::dynamic(); + } else { + output_shape = values_shape; + NODE_SHAPE_INFER_CHECK( + op, + input_shapes, + sorted_in_rank == values_shape.size(), + "If the shape of sorted sequence is not 1D, the ranks of the inputs have to be compatible."); + using TDim = typename TShape::value_type; + for (size_t i = 0; i < sorted_in_rank - 1; ++i) { + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + TDim::merge(output_shape[i], values_shape[i], sorted_shape[i]), + "All dimensions but the last one have to be compatible."); + } } - const bool sorted_values_merge_success = TShape::merge_into(output_shape, sorted_shape_last_dynamic); - - NODE_VALIDATION_CHECK(op, sorted_values_merge_success, "Shapes of sorted sequence and values are not compatible."); - return {std::move(output_shape)}; } } // namespace v15 diff --git a/src/core/src/op/search_sorted.cpp b/src/core/src/op/search_sorted.cpp index d3f26a674eef91..8b9bb012b27106 100644 --- a/src/core/src/op/search_sorted.cpp +++ b/src/core/src/op/search_sorted.cpp @@ -18,34 +18,11 @@ SearchSorted::SearchSorted(const Output& sorted_sequence, const Output 1) { - NODE_VALIDATION_CHECK(this, - sorted_shape.rank().get_length() == values_shape.rank().get_length(), - "Sorted sequence and values have different ranks."); - - for (int64_t i = 0; i < sorted_shape.rank().get_length() - 1; ++i) { - NODE_VALIDATION_CHECK(this, - sorted_shape[i].compatible(values_shape[i]), - "Sorted sequence and values has different ", - i, - " dimension."); - } - } - - return true; -} - -void SearchSorted::validate_and_infer_types() { - OV_OP_SCOPE(v15_SearchSorted_validate_and_infer_types); const auto& output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); set_output_type(0, ov::element::i64, output_shapes[0]); } diff --git a/src/core/tests/type_prop/search_sorted.cpp b/src/core/tests/type_prop/search_sorted.cpp index efc2c865416143..6dd10ad0ac3f5f 100644 --- a/src/core/tests/type_prop/search_sorted.cpp +++ b/src/core/tests/type_prop/search_sorted.cpp @@ -57,6 +57,10 @@ TEST(type_prop, search_sorted_shape_infer_sorted_1d_values_dynamic) { PerformShapeTest({8}, {-1, -1, 3}, {-1, -1, 3}); } +TEST(type_prop, search_sorted_shape_infer_scalar_values) { + PerformShapeTest({100}, {}, {}); +} + TEST(type_prop, search_sorted_shape_infer_both_dynamic_1) { PerformShapeTest({1, -1, 7, -1}, {-1, 3, -1, 10}, {1, 3, 7, 10}); } @@ -93,6 +97,19 @@ TEST(type_prop, search_sorted_shape_infer_both_dynamic_9) { PerformShapeTest({-1, -1}, PartialShape::dynamic(), {-1, -1}); } +TEST(type_prop, search_sorted_shape_symbols) { + PartialShape sorted_shape{1, 3, 7, 100}; + PartialShape values_shape{-1, -1, -1, 10}; + auto sorted_symbols = set_shape_symbols(sorted_shape); + auto values_symbols = set_shape_symbols(values_shape); + auto sorted = make_shared(element::i32, sorted_shape); + auto values = make_shared(element::i32, values_shape); + auto search_sorted_op = make_shared(sorted, values); + EXPECT_EQ(search_sorted_op->get_element_type(), element::i64); + EXPECT_THAT(get_shape_symbols(search_sorted_op->get_output_partial_shape(0)), + testing::ElementsAre(values_symbols[0], values_symbols[1], values_symbols[2], values_symbols[3])); +} + TEST(type_prop, search_sorted_shape_infer_different_types) { auto sorted = make_shared(element::f32, Shape{1, 3, 6}); auto values = make_shared(element::i32, Shape{1, 3, 6}); @@ -102,13 +119,27 @@ TEST(type_prop, search_sorted_shape_infer_different_types) { TEST(type_prop, search_sorted_shape_infer_wrong_rank) { auto sorted = make_shared(element::i32, Shape{1, 1, 3, 6}); auto values = make_shared(element::i32, Shape{1, 3, 6}); - EXPECT_THROW_SUBSTRING(sorted, values, std::string("Sorted sequence and values have different ranks")); + EXPECT_THROW_SUBSTRING(sorted, + values, + std::string("sequence is not 1D, the ranks of the inputs have to be compatible")); } TEST(type_prop, search_sorted_shape_infer_wrong_dim) { auto sorted = make_shared(element::i32, Shape{1, 1, 3, 6}); auto values = make_shared(element::i32, Shape{1, 1, 5, 6}); - EXPECT_THROW_SUBSTRING(sorted, values, std::string(" different 2 dimension.")); + EXPECT_THROW_SUBSTRING(sorted, values, std::string("All dimensions but the last one have to be compatible")); +} + +TEST(type_prop, search_sorted_shape_infer_scalar_sorted_sequence) { + auto sorted = make_shared(element::i32, Shape{}); + auto values = make_shared(element::i32, Shape{1, 1, 5, 6}); + EXPECT_THROW_SUBSTRING(sorted, values, std::string("The sorted sequence input cannot be a scalar")); +} + +TEST(type_prop, search_sorted_shape_infer_scalar_values_and_nd_sequence) { + auto sorted = make_shared(element::i32, Shape{2, 2}); + auto values = make_shared(element::i32, Shape{}); + EXPECT_THROW_SUBSTRING(sorted, values, std::string("the ranks of the inputs have to be compatible")); } #undef EXPECT_THROW_SUBSTRING \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp index e5d87c578712f6..5db6f97bba8c02 100644 --- a/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/shape_inference.cpp @@ -101,6 +101,7 @@ #include "scaled_dot_product_attention_shape_inference.hpp" #include "scatter_elements_update_shape_inference.hpp" #include "scatter_nd_base_shape_inference.hpp" +#include "search_sorted_shape_inference.hpp" #include "select_shape_inference.hpp" #include "shape_nodes.hpp" #include "shuffle_channels_shape_inference.hpp" @@ -405,6 +406,7 @@ using IStaticShapeInferFactory = template <> const IStaticShapeInferFactory::TRegistry IStaticShapeInferFactory::registry{ // opset15 + _OV_OP_SHAPE_INFER_MASK_REG(op::v15::SearchSorted, ShapeInferTA, util::bit::mask()), _OV_OP_SHAPE_INFER_MASK_REG(op::v15::StringTensorUnpack, ShapeInferTA, util::bit::mask(0)), _OV_OP_SHAPE_INFER_MASK_REG(op::v15::StringTensorPack, ShapeInferTA, util::bit::mask(0, 1)), _OV_OP_SHAPE_INFER_MASK_REG(opset15::EmbeddingBagOffsets, ShapeInferTA, util::bit::mask()), diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/search_sorted_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/search_sorted_shape_inference_test.cpp new file mode 100644 index 00000000000000..ac0b4763b7bf5d --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/search_sorted_shape_inference_test.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using ov::op::v0::Constant; +using ov::op::v0::Parameter; +using testing::HasSubstr; + +class SearchSortedShapeInferenceTest : public OpStaticShapeInferenceTest {}; + +TEST_F(SearchSortedShapeInferenceTest, same_dimensions_nd_inputs) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i64, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{1, 3, 6}, StaticShape{1, 3, 6}}; + const auto output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 6})); +} + +TEST_F(SearchSortedShapeInferenceTest, scalar_values) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i64, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{3}, StaticShape{}}; + const auto output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape{}); +} + +TEST_F(SearchSortedShapeInferenceTest, different_last_dim) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i64, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{1, 3, 7, 100}, StaticShape{1, 3, 7, 10}}; + const auto output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 7, 10})); +} + +TEST_F(SearchSortedShapeInferenceTest, 1d_inputs) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i64, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{5}, StaticShape{20}}; + const auto output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({20})); +} + +TEST_F(SearchSortedShapeInferenceTest, 1d_sequence) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i64, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{50}, StaticShape{1, 3, 7, 10}}; + const auto output_shapes = shape_inference(op.get(), input_shapes); + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({1, 3, 7, 10})); +} + +TEST_F(SearchSortedShapeInferenceTest, element_type_consistency_validation) { + const auto sorted = std::make_shared(element::i64, PartialShape::dynamic()); + const auto values = std::make_shared(element::i32, PartialShape::dynamic()); + OV_EXPECT_THROW(std::ignore = make_op(sorted, values), + NodeValidationFailure, + testing::HasSubstr("must have the same element type")); +} + +TEST_F(SearchSortedShapeInferenceTest, input_shapes_ranks_validation) { + const auto sorted = std::make_shared(element::i32, PartialShape::dynamic()); + const auto values = std::make_shared(element::i32, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{1, 3, 6}, StaticShape{1, 3, 6, 7}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + testing::HasSubstr("the ranks of the inputs have to be compatible")); +} + +TEST_F(SearchSortedShapeInferenceTest, input_shapes_compatibility) { + const auto sorted = std::make_shared(element::i32, PartialShape::dynamic()); + const auto values = std::make_shared(element::i32, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{1, 3, 6}, StaticShape{1, 6, 6}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + testing::HasSubstr("All dimensions but the last one have to be compatible")); +} + +TEST_F(SearchSortedShapeInferenceTest, scalar_sorted_sequence) { + const auto sorted = std::make_shared(element::i32, PartialShape::dynamic()); + const auto values = std::make_shared(element::i32, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{}, StaticShape{1, 6, 6}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + testing::HasSubstr("The sorted sequence input cannot be a scalar")); +} + +TEST_F(SearchSortedShapeInferenceTest, scalar_values_and_ND_sequence) { + const auto sorted = std::make_shared(element::i32, PartialShape::dynamic()); + const auto values = std::make_shared(element::i32, PartialShape::dynamic()); + const auto op = make_op(sorted, values); + const auto input_shapes = ShapeVector{StaticShape{2, 3}, StaticShape{}}; + OV_EXPECT_THROW(std::ignore = shape_inference(op.get(), input_shapes), + NodeValidationFailure, + testing::HasSubstr("the ranks of the inputs have to be compatible")); +} From 005152aafa3b1bce4837d66b627f190e9f996b1e Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Wed, 16 Oct 2024 08:11:09 +0200 Subject: [PATCH 032/112] Enable mmap for reading model from cache (#26696) ### Details: - Enable mmap for reading model from cache ### Tickets: - CVS-152423 --- .../include/openvino/util/mmap_object.hpp | 14 ----- .../util/src/os/win/win_mmap_object.cpp | 5 -- .../openvino/runtime/shared_buffer.hpp | 58 ++++++++++++++++++- src/inference/src/cache_manager.hpp | 14 +++-- src/inference/src/dev/core_impl.cpp | 14 ++--- src/inference/src/dev/core_impl.hpp | 4 +- src/plugins/intel_cpu/src/plugin.cpp | 2 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 13 ++--- src/plugins/intel_cpu/src/utils/serialize.hpp | 2 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 3 +- 10 files changed, 83 insertions(+), 46 deletions(-) diff --git a/src/common/util/include/openvino/util/mmap_object.hpp b/src/common/util/include/openvino/util/mmap_object.hpp index 364e1eed4ca712..3aba8e69c094a1 100644 --- a/src/common/util/include/openvino/util/mmap_object.hpp +++ b/src/common/util/include/openvino/util/mmap_object.hpp @@ -50,18 +50,4 @@ std::shared_ptr load_mmap_object(const std::string& path); std::shared_ptr load_mmap_object(const std::wstring& path); #endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT - -class MmapStream final : public std::ifstream { -public: - MmapStream(const std::string& path) : std::ifstream(path, std::ios_base::binary) { - m_memory = ov::load_mmap_object(path); - } - -#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT - MmapStream(const std::wstring& path); -#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT - - std::shared_ptr m_memory; -}; - } // namespace ov diff --git a/src/common/util/src/os/win/win_mmap_object.cpp b/src/common/util/src/os/win/win_mmap_object.cpp index 0b14d7ac774700..6f2515124273f1 100644 --- a/src/common/util/src/os/win/win_mmap_object.cpp +++ b/src/common/util/src/os/win/win_mmap_object.cpp @@ -141,11 +141,6 @@ std::shared_ptr load_mmap_object(const std::wstring& path) { holder->set(path); return holder; } - -MmapStream::MmapStream(const std::wstring& path) : std::ifstream(path.data(), std::ios_base::binary) { - m_memory = ov::load_mmap_object(path); -} - #endif } // namespace ov diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index 7f1e2e9ba7601f..2c784ef6081c35 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -8,7 +8,7 @@ namespace ov { -/// \brief SharedBuffer class to store pointer to pre-allocated buffer. +/// \brief SharedBuffer class to store pointer to pre-allocated buffer. Own the shared object. template class SharedBuffer : public ov::AlignedBuffer { public: @@ -28,4 +28,60 @@ class SharedBuffer : public ov::AlignedBuffer { T _shared_object; }; +/// \brief SharedStreamBuffer class to store pointer to pre-acclocated buffer and provide streambuf interface. +/// Can return ptr to shared memory and its size +class SharedStreamBuffer : public std::streambuf { +public: + SharedStreamBuffer(char* data, size_t size) : m_data(data), m_size(size), m_offset(0) {} + +protected: + // override std::streambuf methods + std::streamsize xsgetn(char* s, std::streamsize count) override { + auto real_count = std::min(m_size - m_offset, count); + std::memcpy(s, m_data + m_offset, real_count); + m_offset += real_count; + return real_count; + } + + int_type underflow() override { + return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset)); + } + + int_type uflow() override { + return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset++)); + } + + std::streamsize showmanyc() override { + return m_size - m_offset; + } + + pos_type seekoff(off_type off, + std::ios_base::seekdir dir, + std::ios_base::openmode which = std::ios_base::in) override { + if (dir != std::ios_base::cur || which != std::ios_base::in) { + return pos_type(off_type(-1)); + } + m_offset += off; + return pos_type(m_offset); + } + + char* m_data; + size_t m_size; + size_t m_offset; +}; + +/// \brief OwningSharedStreamBuffer is a SharedStreamBuffer which owns its shared object. +class OwningSharedStreamBuffer : public SharedStreamBuffer { +public: + OwningSharedStreamBuffer(std::shared_ptr buffer) + : SharedStreamBuffer(static_cast(buffer->get_ptr()), buffer->size()), + m_shared_obj(buffer) {} + + std::shared_ptr get_buffer() { + return m_shared_obj; + } + +protected: + std::shared_ptr m_shared_obj; +}; } // namespace ov diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index 9e9ebd3ddcbc2b..c441811c3cfd02 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -14,6 +14,7 @@ #include #include +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/mmap_object.hpp" @@ -77,9 +78,10 @@ class ICacheManager { * Otherwise, model will not be read from cache and will be loaded as usual * * @param id Id of cache (hash of the model) + * @param enable_mmap use mmap or ifstream to read model file * @param reader Lambda function to be called when input stream is created */ - virtual void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) = 0; + virtual void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) = 0; /** * @brief Callback when OpenVINO intends to remove cache entry @@ -130,13 +132,17 @@ class FileStorageCacheManager final : public ICacheManager { writer(stream); } - void read_cache_entry(const std::string& id, StreamReader reader, bool mmap = false) override { + void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override { // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C". ScopedLocale plocal_C(LC_ALL, "C"); auto blob_file_name = getBlobFile(id); if (ov::util::file_exists(blob_file_name)) { - if (mmap) { - MmapStream stream(blob_file_name); + if (enable_mmap) { + auto mmap = ov::load_mmap_object(blob_file_name); + auto shared_buffer = + std::make_shared>>(mmap->data(), mmap->size(), mmap); + OwningSharedStreamBuffer buf(shared_buffer); + std::istream stream(&buf); reader(stream); } else { std::ifstream stream(blob_file_name, std::ios_base::binary); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 9f55dc53ccd24f..32b43f346e9e44 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1397,19 +1397,12 @@ ov::SoPtr ov::CoreImpl::compile_model_and_cache(ov::Plugin& return compiled_model; } -static bool does_plugin_support_model_caching_with_mmap(const ov::Plugin& plugin) { - bool supported = plugin.supports_model_caching(); - supported &= - ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap); - return supported; -} - ov::SoPtr ov::CoreImpl::load_model_from_cache( const CacheContent& cacheContent, ov::Plugin& plugin, const ov::AnyMap& config, const ov::SoPtr& context, - std::function()> compile_model_lambda) { + std::function()> compile_model_lambda) const { ov::SoPtr compiled_model; struct HeaderException {}; @@ -1418,6 +1411,8 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { cacheContent.cacheManager->read_cache_entry( cacheContent.blobId, + coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::caching_with_mmap), [&](std::istream& networkStream) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, @@ -1454,8 +1449,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( update_config[ov::loaded_from_cache.name()] = true; compiled_model = context ? plugin.import_model(networkStream, context, update_config) : plugin.import_model(networkStream, update_config); - }, - does_plugin_support_model_caching_with_mmap(plugin)); + }); } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId); diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 79b1b96d57ac30..7cf12f3ba3280c 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -149,12 +149,12 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this& context, const CacheContent& cacheContent) const; - static ov::SoPtr load_model_from_cache( + ov::SoPtr load_model_from_cache( const CacheContent& cacheContent, ov::Plugin& plugin, const ov::AnyMap& config, const ov::SoPtr& context, - std::function()> compile_model_lambda); + std::function()> compile_model_lambda) const; bool device_supports_model_caching(const ov::Plugin& plugin) const; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index fa1810ff6044f9..5c88772eeedabc 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -446,7 +446,7 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, -#if !defined(OPENVINO_ARCH_ARM) +#if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 6666d42fb4f586..f7fd337afa932e 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -58,19 +58,20 @@ void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr>(std::shared_ptr& model) { - if (auto mmap_stream = dynamic_cast(&m_istream)) { - process_mmap(model, mmap_stream->m_memory); + if (auto mmap_buffer = dynamic_cast(m_istream.rdbuf())) { + auto buffer = mmap_buffer->get_buffer(); + process_mmap(model, buffer); } else { process_stream(model); } } void ModelDeserializer::process_mmap(std::shared_ptr& model, - const std::shared_ptr& mmemory) { + const std::shared_ptr& mmemory) { // Note: Don't use seekg with mmaped stream. This may affect the performance of some models. // Get file size before seek content. // Blob from cache may have other header, so need to skip this. - auto buffer_base = mmemory->data(); + auto buffer_base = reinterpret_cast(mmemory->get_ptr()); const auto file_size = mmemory->size(); const size_t hdr_pos = m_istream.tellg(); @@ -98,9 +99,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Map blob content std::shared_ptr weights_buf; if (hdr.consts_size) { - weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, - hdr.consts_size, - mmemory); + weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory); } // XML content diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 817041452c9597..897a2c2e52f092 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -40,7 +40,7 @@ class ModelDeserializer { protected: static void set_info(pugi::xml_node& root, std::shared_ptr& model); - void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); + void process_mmap(std::shared_ptr& model, const std::shared_ptr& memory); void process_stream(std::shared_ptr& model); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 7ee587e612ad3d..4ea7851b3f8c58 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -575,7 +575,8 @@ std::vector Plugin::get_supported_internal_properties() const ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW}}; + ov::PropertyName{ov::internal::query_model_ratio.name(), PropertyMutability::RW}, + ov::PropertyName{ov::internal::caching_with_mmap.name(), PropertyMutability::RO}}; return supported_internal_properties; } From 1693821ba6be8e231349f819fd816bc54c58b7c9 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Wed, 16 Oct 2024 08:49:00 +0200 Subject: [PATCH 033/112] [DOCS] nncf changes pass 3 recommend (#26873) --- .../weight-compression.rst | 6 +-- .../openvino-workflow/model-optimization.rst | 37 +++++++++++++++---- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index 6c85473502ff9b..6348ca897c5ea5 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -1,5 +1,5 @@ -Weight Compression -================== +LLM Weight Compression +========================= .. toctree:: :maxdepth: 1 @@ -187,7 +187,7 @@ trade-offs after optimization: ratio=0.9, ) -* ``scale_estimation`` - boolean parameter that enables more accurate estimation of +* ``scale_estimation`` - boolean parameter that enables more accurate estimation of quantization scales. Especially helpful when the weights of all layers are quantized to 4 bits. Requires dataset. diff --git a/docs/articles_en/openvino-workflow/model-optimization.rst b/docs/articles_en/openvino-workflow/model-optimization.rst index b4b6cc64acb21b..f5a5f97341e960 100644 --- a/docs/articles_en/openvino-workflow/model-optimization.rst +++ b/docs/articles_en/openvino-workflow/model-optimization.rst @@ -22,7 +22,7 @@ It is a `set of compression algorithms `__ and -`NNCF API documentation `__. + +Recommended workflows +########################## + +* A common approach for most cases is to: + + 1. Perform post-training quantization first, as it is the easiest option. + 2. For even better results, combine post-training quantization with filter pruning. + 3. If the accuracy drop is unacceptable, use quantization-aware training instead. It will give + you the same level of performance boost, with a smaller impact on accuracy. + +* **Weight compression** works **only with LLMs**. Do not try to use it with other models. +* For **visual-multimodal** use cases, the encoder / decoder split approach may be recommended. + + + + + .. image:: ../assets/images/DEVELOPMENT_FLOW_V3_crunch.svg + +Installation and usage +########################### + +To learn about the full scope of the framework, its installation, and technical details, visit +both `the NNCF repository `__ and +`NNCF API documentation `__. + + + .. tab-set:: .. tab-item:: Installation From 8597d9aef99691618532af5459dc3468f17bcc87 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 16 Oct 2024 11:18:12 +0200 Subject: [PATCH 034/112] [GHA] Install pigz on macOS (#27003) ### Details: - macOS tar doesn't support -I argument, reworked flows with universal tar command ### Tickets: - *154627* --- .github/workflows/coverity.yml | 2 +- .../dev_cpu_linux_snippets_libxsmm.yml | 12 ++++++------ .github/workflows/job_build_linux.yml | 6 +++--- .github/workflows/job_cpu_functional_tests.yml | 10 +++++++--- .github/workflows/job_cxx_unit_tests.yml | 8 ++++++-- .github/workflows/job_gpu_tests.yml | 4 ++-- .github/workflows/job_jax_models_tests.yml | 2 +- .github/workflows/job_onnx_models_tests.yml | 2 +- .github/workflows/job_onnx_runtime.yml | 2 +- .github/workflows/job_python_unit_tests.yml | 8 ++++++-- .github/workflows/job_pytorch_layer_tests.yml | 10 +++++++--- .github/workflows/job_pytorch_models_tests.yml | 2 +- .github/workflows/job_samples_tests.yml | 14 +++++++------- .../workflows/job_tensorflow_layer_tests.yml | 8 ++++++-- .../workflows/job_tensorflow_models_tests.yml | 2 +- .github/workflows/job_tokenizers.yml | 4 ++++ .../workflows/linux_conditional_compilation.yml | 13 +++++++------ .github/workflows/linux_sanitizers.yml | 8 ++++---- .github/workflows/mac.yml | 17 ++++++++++++----- .github/workflows/mac_arm64.yml | 16 ++++++++++++---- .github/workflows/ubuntu_22.yml | 12 ++++++------ 21 files changed, 101 insertions(+), 61 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 604ca0fdb81b29..6a163fb5e50043 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -112,7 +112,7 @@ jobs: - name: Pack Artefacts run: | pushd ${BUILD_DIR} - tar -C ${BUILD_DIR} -I pigz -cvf openvino.tgz cov-int + tar -cvf - cov-int | pigz > openvino.tgz popd - name: Submit artefacts diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index c2da4c1b2d2f9c..83770900559bab 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -158,11 +158,11 @@ jobs: run: | pushd ${INSTALL_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * + tar -cvf - * | pigz > ${BUILD_DIR}/openvino_package.tar.gz popd pushd ${INSTALL_TEST_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * + tar -cvf - * | pigz > ${BUILD_DIR}/openvino_tests.tar.gz popd # @@ -230,11 +230,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Snippets func tests @@ -287,11 +287,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Fetch setup_python action diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index d58e879c736610..b8eea4375e7e58 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -182,15 +182,15 @@ jobs: working-directory: ${{ env.BUILD_DIR }} - name: Pack openvino_package - run: tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * + run: tar -cvf - * | pigz > ${BUILD_DIR}/openvino_package.tar.gz working-directory: ${{ env.INSTALL_DIR }} - name: Pack openvino_developer_package - run: tar -I pigz -cvf ${BUILD_DIR}/openvino_developer_package.tar.gz * + run: tar -cvf - * | pigz > ${BUILD_DIR}/openvino_developer_package.tar.gz working-directory: ${{ env.DEVELOPER_PACKAGE_DIR }} - name: Pack openvino_tests - run: tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * + run: tar -cvf - * | pigz > ${BUILD_DIR}/openvino_tests.tar.gz working-directory: ${{ env.INSTALL_TEST_DIR }} - name: Build Debian packages diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 77376d442939a0..24c8542ae80140 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -55,15 +55,19 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "PARALLEL_TEST_SCRIPT=$GITHUB_WORKSPACE/install/tests/functional_test_utils/layer_tests_summary/run_parallel.py" >> "$GITHUB_ENV" echo "PARALLEL_TEST_CACHE=$GITHUB_WORKSPACE/install/tests/test_cache.lst" >> "$GITHUB_ENV" - + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Fetch setup_python action diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index b83e83af4ed68c..99c363d04d23a7 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -60,6 +60,10 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "SETUPVARS_COMMAND=${{ env.SOURCE_COMMAND }} $GITHUB_WORKSPACE/install/${{ env.SETUPVARS }}" >> "$GITHUB_ENV" + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + - name: Setup Variables (Windows) if: ${{ runner.os == 'Windows' }} run: Add-Content -Path $env:GITHUB_ENV -Value "SETUPVARS_COMMAND=${{ env.SOURCE_COMMAND }} ${{ github.workspace }}/install/${{ env.SETUPVARS }}" @@ -68,10 +72,10 @@ jobs: if: ${{ runner.os != 'Windows' }} run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Extract OpenVINO packages (Windows) diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 147afcccddfe17..324e653c57ebab 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -59,10 +59,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Install dependencies (Linux) diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 2fed97a78e9c07..9956a27f234b36 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -60,7 +60,7 @@ jobs: - name: Extract OpenVINO packages and tests run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 0eda00f7afb937..321aa88d614310 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -64,7 +64,7 @@ jobs: - name: Extract OpenVINO packages and tests run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} # Issue 148922 diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index 61b13939fc60b7..0ceb080d82184d 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -59,7 +59,7 @@ jobs: - name: Extract OpenVINO package run: | pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Fetch ONNX runtime version and skip tests list diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 8db2ebf86dca91..d63262c665d45c 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -66,9 +66,13 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - name: Extract OpenVINO artifacts + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index c4f0d1efb37c75..95074dc84f1ff9 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -65,11 +65,15 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - - name: Extract OpenVINO artifacts (Linux, macOS) + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages (Linux, macOS) if: runner.os != 'Windows' run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Extract OpenVINO artifacts (Windows) diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index ce40dd7f0618ce..a77c1318f3a0c8 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -73,7 +73,7 @@ jobs: - name: Extract OpenVINO artifacts run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index 12c63644d7b586..7cde4e6fd18eae 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -54,17 +54,17 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install coreutils pigz - - name: Extract OpenVINO packages, wheels and tests + - name: Extract OpenVINO packages and tests run: | - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - - name: Install OpenVINO dependencies (mac) - if: runner.os == 'macOS' - run: brew install coreutils - - name: Fetch setup_python action # Python is already installed on Ubuntu within Dockerfile if: runner.os != 'Linux' diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 3ad19d3301945f..ae6e91a00d1497 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -66,10 +66,14 @@ jobs: echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" - - name: Extract OpenVINO artifacts (Linux and macOS) + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages (Linux, macOS) if: runner.os != 'Windows' run: | - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Extract OpenVINO artifacts (Windows) diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 76ee01cc76c3ef..db34ec7b793551 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -65,7 +65,7 @@ jobs: - name: Extract OpenVINO artifacts (Linux and macOS) run: | - tar -I pigz -xf openvino_tests.tar.gz -C . + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - name: Fetch setup_python action diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 089b104d7af1d1..238dbfec3a34eb 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -58,6 +58,10 @@ jobs: .github/actions/setup_python .github/actions/cache install_build_dependencies.sh + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz - name: Setup Python ${{ env.PYTHON_VERSION }} uses: ./.github/actions/setup_python diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 6f9b761ce3352c..7b5467b01ad73e 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -200,23 +200,23 @@ jobs: - name: Pack Artifacts run: | pushd ${SELECTIVE_BUILD_STAT_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_selective_build_stat.tar.gz * + tar -cvf - * | pigz > ${BUILD_DIR}/openvino_selective_build_stat.tar.gz popd pushd ${INSTALL_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz \ - install_dependencies/install_openvino_dependencies.sh + tar -cvf - install_dependencies/install_openvino_dependencies.sh | pigz > ${BUILD_DIR}/openvino_package.tar.gz popd cp -v ${OPENVINO_REPO}/temp/tbb/lib/lib* ${INSTALL_TEST_DIR}/tests pushd ${INSTALL_TEST_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz \ + tar -cvf - \ tests/ov_cpu_func_tests \ tests/libopenvino_template_extension.so \ tests/libze_loader.so* \ tests/libhwloc* \ tests/libtbb* \ - tests/functional_test_utils/layer_tests_summary/* + tests/functional_test_utils/layer_tests_summary/* \ + | pigz > ${BUILD_DIR}/openvino_tests.tar.gz popd # @@ -302,7 +302,8 @@ jobs: path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} - name: Extract selective build statistics package - run: tar -I pigz -xvf ${SELECTIVE_BUILD_STAT_DIR}/openvino_selective_build_stat.tar.gz -C ${SELECTIVE_BUILD_STAT_DIR} + run: | + pigz -dc ${SELECTIVE_BUILD_STAT_DIR}/openvino_selective_build_stat.tar.gz | tar -xf - -C ${SELECTIVE_BUILD_STAT_DIR} # # Build diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index cec499e7971130..b23e67a0f2b30e 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -175,11 +175,11 @@ jobs: - name: Pack Artifacts run: | pushd ${INSTALL_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_package.tar.gz * + tar -cvf - * | pigz > ${BUILD_DIR}/openvino_package.tar.gz popd pushd ${INSTALL_TEST_DIR} - tar -I pigz -cvf ${BUILD_DIR}/openvino_tests.tar.gz * + tar -cvf - * | pigz > ${BUILD_DIR}/openvino_tests.tar.gz popd # @@ -257,10 +257,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR - tar -I pigz -xf openvino_package.tar.gz -C $INSTALL_DIR + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd $INSTALL_TEST_DIR - tar -I pigz -xf openvino_tests.tar.gz -C $INSTALL_DIR + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Install dependencies (Linux) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index b91bd65465621a..6e3f344c6dd944 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -77,6 +77,7 @@ jobs: INSTALL_DIR: ${{ github.workspace }}/openvino_install INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels BUILD_DIR: ${{ github.workspace }}/build if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -104,7 +105,7 @@ jobs: # - name: Install build dependencies - run: brew install coreutils ninja scons + run: brew install coreutils ninja scons pigz - name: Setup Python ${{ env.PYTHON_VERSION }} uses: ./openvino/.github/actions/setup_python @@ -167,16 +168,15 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | pushd ${{ env.INSTALL_DIR }} - tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * + tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz popd - pushd ${{ env.INSTALL_TEST_DIR }} - tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * + tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd - name: Cmake & Build - OpenVINO Contrib @@ -210,6 +210,13 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' + + - name: Upload openvino wheels + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: openvino_wheels + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + if-no-files-found: 'error' - name: Upload openvino tests package if: ${{ always() }} diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 8d4843627e7b9f..16658318de20d8 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -77,6 +77,7 @@ jobs: INSTALL_DIR: ${{ github.workspace }}/openvino_install INSTALL_DIR_JS: ${{ github.workspace }}/openvino_install/js INSTALL_TEST_DIR: ${{ github.workspace }}/tests_install + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels BUILD_DIR: ${{ github.workspace }}/build if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -104,7 +105,7 @@ jobs: # - name: Install build dependencies - run: brew install coreutils ninja scons + run: brew install coreutils ninja scons pigz - name: Setup Python ${{ env.PYTHON_VERSION }} uses: ./openvino/.github/actions/setup_python @@ -167,16 +168,16 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake + cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | pushd ${{ env.INSTALL_DIR }} - tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_package.tar.gz * + tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_package.tar.gz popd pushd ${{ env.INSTALL_TEST_DIR }} - tar -I pigz -cvf ${{ env.BUILD_DIR }}/openvino_tests.tar.gz * + tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd - name: Cmake & Build - OpenVINO Contrib @@ -210,6 +211,13 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' + + - name: Upload openvino wheels + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: openvino_wheels + path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl + if-no-files-found: 'error' - name: Upload openvino tests package if: ${{ always() }} diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 19f63471523726..2ebca2b059fdd2 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -176,10 +176,10 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd ${INSTALL_TEST_DIR} - tar -I pigz -xf openvino_tests.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Fetch setup_python action @@ -222,7 +222,7 @@ jobs: if: ${{ always() }} run: | pushd ${CONFORMANCE_ARTIFACTS_DIR} - tar -I pigz -cvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * + tar -cvf - * | pigz > ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz popd - name: Upload Conformance Artifacts @@ -248,7 +248,7 @@ jobs: if: ${{ matrix.TEST_TYPE == 'API' }} run: | pushd ${CONFORMANCE_ARTIFACTS_DIR} - tar -I pigz -cvf ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz * + tar -cvf - * | pigz > ${CONFORMANCE_ARTIFACTS_DIR}/conformance_artifacts.tar.gz popd - name: Upload Conformance Artifacts @@ -451,11 +451,11 @@ jobs: - name: Extract OpenVINO packages run: | pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_package.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd pushd ${INSTALL_DIR} - tar -I pigz -xf openvino_developer_package.tar.gz -C ${INSTALL_DIR} + pigz -dc openvino_developer_package.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - name: Clone OpenVINO Contrib From bf60502a70778d29f081802222b272c68c195e90 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Wed, 16 Oct 2024 18:48:06 +0900 Subject: [PATCH 035/112] [GPU] Fix SD3 int4 OOR or Perf drop issue in LNL (#27079) OOR in LNL or Perf drop in A770 with cache. ### Tickets: - *152249* Signed-off-by: hyunback --- src/plugins/intel_gpu/src/graph/program.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 03cc8df8b4338c..3a3793e8ad764d 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1898,6 +1898,7 @@ void program::load(cldnn::BinaryInputBuffer& ib) { _loaded_from_cache = true; processing_order.load(ib, *this); + set_layout_optimizer_attributes(*_layout_optimizer); { auto& kernels_cache = get_kernels_cache(); From 80d6fdf5277288678f006d0578d93d6516e072f9 Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Wed, 16 Oct 2024 17:14:39 +0400 Subject: [PATCH 036/112] [ONNX] Added initial support of DequantizeLinear-21 (#26915) ### Details: - Added initial support of operation DequantizeLinear-21 from default opset ### Tickets: - 152784 --- src/frontends/onnx/frontend/CMakeLists.txt | 2 +- .../frontend/src/op/dequantize_linear.cpp | 77 ++++++++++++++++++- .../models/dequantize_linear_21.prototxt | 63 +++++++++++++++ .../onnx/tests/onnx_import_quant.in.cpp | 10 +++ 4 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 src/frontends/onnx/tests/models/dequantize_linear_21.prototxt diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index 80fd16e2ed6483..f07b6cf999fea8 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -77,7 +77,7 @@ ov_add_frontend(NAME onnx FILEDESCRIPTION "FrontEnd to load and convert ONNX file format" LINK_LIBRARIES openvino_onnx_common openvino::core::dev) -set(ONNX_OPSET_VERSION 20 CACHE INTERNAL "Supported version of ONNX operator set") +set(ONNX_OPSET_VERSION 21 CACHE INTERNAL "Supported version of ONNX operator set") target_compile_definitions(${TARGET_NAME} PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION}) if(BUILD_SHARED_LIBS) diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index fe2ea3106e31ee..b09bc73467bc10 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -11,10 +11,15 @@ #include "openvino/frontend/exception.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" +#include "openvino/op/convert_like.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" #include "utils/common.hpp" +#include "utils/reshape.hpp" using namespace ov::op; namespace ov { @@ -188,8 +193,78 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { // these reshapes make sure that dequantization happens over the specified axis return detail::dequantize_linear(x, scale, zero_point, node.get_attribute_value("axis", 1), node); } -ONNX_OP("DequantizeLinear", OPSET_SINCE(13), ai_onnx::opset_13::dequantize_linear); +ONNX_OP("DequantizeLinear", {13, 18}, ai_onnx::opset_13::dequantize_linear); } // namespace opset_13 + +namespace opset_19 { +ONNX_OP("DequantizeLinear", {19, 20}, ai_onnx::opset_13::dequantize_linear); +} // namespace opset_19 + +namespace opset_21 { +ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { + common::default_op_checks(node, 2); + + const ov::OutputVector inputs{node.get_ov_inputs()}; + const auto& src_x = inputs[0]; + ov::Output scale = inputs[1]; + const auto& scale_shape = scale.get_partial_shape(); + ov::Output zp; + + // When no blocking dequantization is required - use regular DequantizeLinear + if (scale_shape.rank().is_static() && scale_shape.rank().get_length() <= 1) { + return ai_onnx::opset_13::dequantize_linear(node); + } + + FRONT_END_GENERAL_CHECK(scale_shape.rank().is_static(), "Rank of the input data tensor has to be known (static)."); + FRONT_END_GENERAL_CHECK(scale_shape.rank().get_length() == 2, + "DequantizeLinear cannot operate with more than 2D scales"); + FRONT_END_GENERAL_CHECK(src_x.get_partial_shape().is_static(), + "DequantizeLinear cannot operate with dynamic shapes of input X"); + + const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); + + if (inputs.size() > 2) { + zp = inputs[2]; + if (zp.get_element_type() != scale.get_element_type()) { + zp = std::make_shared(zp, scale); + } + zp = std::make_shared(zp, unsqueezed_axes); + } + + const auto axis = node.get_attribute_value("axis", 1); + const auto block_size = static_cast(node.get_attribute_value("block_size", 0)); + const auto scale_type = scale.get_element_type(); + + FRONT_END_GENERAL_CHECK(axis == 0, "Axis != 0 isn't supported"); + FRONT_END_GENERAL_CHECK(block_size > 0, "block_size must be greater than zero"); + FRONT_END_GENERAL_CHECK( + src_x.get_shape()[0] % block_size == 0, + "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); + + const auto& x = src_x.get_element_type() == scale_type ? src_x : std::make_shared(src_x, scale); + // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] + ov::Output broadcastable_x = + op::util::reshape(x, Shape{static_cast(x.get_shape()[0]) / block_size, block_size, x.get_shape()[1]}); + + // Adding additional dimension for broadcasting + scale = std::make_shared(scale, unsqueezed_axes); + + if (zp.get_node_shared_ptr()) { + broadcastable_x = std::make_shared(broadcastable_x, zp); + } + + const auto& scaled_x = std::make_shared(broadcastable_x, scale); + + // Returning back a shape + const auto& reshaped_scaled_x = + std::make_shared(scaled_x, std::make_shared(src_x), false); + + reshaped_scaled_x->set_friendly_name(node.get_name()); + + return {reshaped_scaled_x}; +} +ONNX_OP("DequantizeLinear", OPSET_SINCE(21), ai_onnx::opset_21::dequantize_linear); +} // namespace opset_21 } // namespace ai_onnx } // namespace onnx } // namespace frontend diff --git a/src/frontends/onnx/tests/models/dequantize_linear_21.prototxt b/src/frontends/onnx/tests/models/dequantize_linear_21.prototxt new file mode 100644 index 00000000000000..0378ad13ce0ce9 --- /dev/null +++ b/src/frontends/onnx/tests/models/dequantize_linear_21.prototxt @@ -0,0 +1,63 @@ +ir_version: 3 +producer_name: "OpenVINO ONNX Frontend" +graph { + name: "test_dequantize_21" + initializer { + dims: 6 + dims: 3 + data_type: 21 + name: "data" + raw_data: "\x99\x99\x99\x99\x99\x99\x99\x99\x99" + } + initializer { + dims: 2 + dims: 3 + data_type: 1 + name: "scale" + raw_data: "\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f\x00\x00\x80\x3f" + } + initializer { + dims: 2 + dims: 3 + data_type: 21 + name: "zp" + raw_data: "\x78\x56\x34" + } + node { + input: "data" + input: "scale" + input: "zp" + output: "output" + name: "DequantizeNode" + op_type: "DequantizeLinear" + attribute { + name: "axis" + i: 0 + type: INT + } + attribute { + name: "block_size" + i: 3 + type: INT + } + } + output { + name: "output" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 6 + } + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 21 +} diff --git a/src/frontends/onnx/tests/onnx_import_quant.in.cpp b/src/frontends/onnx/tests/onnx_import_quant.in.cpp index c2d48c292cb8c1..ad85ef98ede8d9 100644 --- a/src/frontends/onnx/tests/onnx_import_quant.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_quant.in.cpp @@ -317,6 +317,16 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ test_case.run(); } +OPENVINO_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_opset21) { + auto model = convert_model("dequantize_linear_21.onnx"); + + auto test_case = ov::test::TestCase(model, s_device); + + test_case.add_expected_output({6, 3}, + std::vector{1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6, 4, 5, 6}); + test_case.run(); +} + OPENVINO_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_ignore_axis) { auto model = convert_model("dequantize_linear_scalar_ignore_axis.onnx"); From 6856813e5ddc844c8cd2be1d03ab99f36d3f8f02 Mon Sep 17 00:00:00 2001 From: Nashez Zubair <35090095+nashez@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:49:08 +0530 Subject: [PATCH 037/112] [CPU][ARM64] Add JIT emitter for Eltwise Sqrt operation (#27023) ### Details: - Added a jit_sqrt_emitter derived class in aarch64/jit_eltwise_emitters - Created entry Algorithm::EltwiseSqrt in the get_supported_precisions in nodes/kernels/aarch64 - Add the EltwiseSqrt entry in the aarch64 executors supported algorithms - Add the ActivationType::Sqrt in the getPrimitiveType in activations ### Tickets: - #24446 Signed-off-by: Nashez Zubair --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 45 +++++++++++++++++++ .../plugin/aarch64/jit_eltwise_emitters.hpp | 24 +++++++++- .../nodes/executors/aarch64/jit_eltwise.cpp | 1 + .../aarch64/jit_uni_eltwise_generic.cpp | 2 + .../single_layer_tests/classes/activation.cpp | 1 + 5 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index a4c99e2cc1fca7..d6208e0a43bbe1 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -1982,6 +1982,51 @@ std::set> jit_soft_sign_emitter::get_supported_precis return {{element::f32}}; } +/// SQUARE_ROOT /// +jit_sqrt_emitter::jit_sqrt_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); + } + +jit_sqrt_emitter::jit_sqrt_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); + } + +size_t jit_sqrt_emitter::get_inputs_count() const { + return 1; +} + +void jit_sqrt_emitter::emit_impl(const std::vector& in_vec_idxs, + const std::vector& out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_sqrt_emitter::emit_isa(const std::vector& in_vec_idxs, + const std::vector& out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + TReg src = TReg(in_vec_idxs[0]); + TReg dst = TReg(out_vec_idxs[0]); + + h->fsqrt(dst.s, src.s); +} + +std::set> jit_sqrt_emitter::get_supported_precisions( + const std::shared_ptr& node) { + return {{element::f32}}; +} + /// SUBTRACT /// jit_subtract_emitter::jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index ccd82bc5b628e7..afecd3029f58db 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -800,14 +800,34 @@ class jit_soft_sign_emitter : public jit_emitter { static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: - std::unique_ptr exp_emitter; - void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_sqrt_emitter : public jit_emitter { +public: + jit_sqrt_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_sqrt_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const override; + + template + void emit_isa(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const; +}; + class jit_subtract_emitter : public jit_emitter { public: jit_subtract_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 7848e479f175e4..586e7f0705643f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -48,6 +48,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseSelect, Algorithm::EltwiseSigmoid, Algorithm::EltwiseSoftSign, + Algorithm::EltwiseSqrt, Algorithm::EltwiseSubtract, Algorithm::EltwiseSwish, Algorithm::EltwiseTanh); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 59a5f812499481..98eb279bb26d48 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -670,6 +670,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseSelect, ov::intel_cpu::aarch64::jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, ov::intel_cpu::aarch64::jit_sigmoid_emitter), OV_CASE(Algorithm::EltwiseSoftSign, ov::intel_cpu::aarch64::jit_soft_sign_emitter), + OV_CASE(Algorithm::EltwiseSqrt, ov::intel_cpu::aarch64::jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, ov::intel_cpu::aarch64::jit_subtract_emitter), OV_CASE(Algorithm::EltwiseSwish, ov::intel_cpu::aarch64::jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, ov::intel_cpu::aarch64::jit_tanh_emitter)); @@ -847,6 +848,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseSelect, jit_select_emitter), OV_CASE(Algorithm::EltwiseSigmoid, jit_sigmoid_emitter), OV_CASE(Algorithm::EltwiseSoftSign, jit_soft_sign_emitter), + OV_CASE(Algorithm::EltwiseSqrt, jit_sqrt_emitter), OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), OV_CASE(Algorithm::EltwiseSwish, jit_swish_emitter), OV_CASE(Algorithm::EltwiseTanh, jit_tanh_emitter)); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 0f25351a020f60..307938fbfec17a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -193,6 +193,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::Relu) || (activation_type == utils::ActivationTypes::Sigmoid) || (activation_type == utils::ActivationTypes::SoftSign) || + (activation_type == utils::ActivationTypes::Sqrt) || (activation_type == utils::ActivationTypes::Swish) || (activation_type == utils::ActivationTypes::LogicalNot) || (activation_type == utils::ActivationTypes::Tanh))) { From 6b1081e3b86158671a3b07461f51b84a31b22cc7 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Wed, 16 Oct 2024 18:32:09 +0400 Subject: [PATCH 038/112] [GPU] Added empty LoRA adapters support (#26951) ### Details: - *Added functional support of empty LoRA adapters* - *Added fusing support for `crop` operation* - *Removed unnecessary `Assign` after `ReadValue`* - *Use enqueue_fill_mem instead enqueue_memcpy in gpu_usm::fill()* ### Tickets: - *[152852](https://jira.devtools.intel.com/browse/CVS-152852)* --- .../include/intel_gpu/runtime/memory.hpp | 8 ++--- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 5 ++- .../prepare_primitive_fusing.cpp | 14 +++++++- .../src/graph/impls/cpu/read_value.cpp | 2 +- .../intel_gpu/src/graph/impls/ocl/gemm.cpp | 7 ++++ .../src/graph/impls/ocl/non_zero.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 6 ++-- .../kernel_selector/cl_kernels/slice_ref.cl | 2 +- .../intel_gpu/src/plugin/ops/variable.cpp | 12 +++++++ .../intel_gpu/src/plugin/variable_state.cpp | 5 +++ .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 36 ++++++++++--------- .../intel_gpu/src/runtime/ocl/ocl_memory.hpp | 12 +++---- .../unit/test_cases/non_zero_gpu_test.cpp | 2 +- 14 files changed, 79 insertions(+), 36 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index f869feba4a5334..049e7a29cb9c23 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -48,8 +48,8 @@ struct memory { virtual ~memory() = default; virtual void* lock(const stream& stream, mem_lock_type type = mem_lock_type::read_write) = 0; virtual void unlock(const stream& stream) = 0; - virtual event::ptr fill(stream& stream, unsigned char pattern) = 0; - virtual event::ptr fill(stream& stream) = 0; + virtual event::ptr fill(stream& stream, unsigned char pattern, bool blocking = true) = 0; + virtual event::ptr fill(stream& stream, bool blocking = true) = 0; // only supports gpu_usm virtual void* buffer_ptr() const { return nullptr; } @@ -147,8 +147,8 @@ struct simple_attached_memory : memory { void* lock(const stream& /* stream */, mem_lock_type /* type */) override { return _pointer; } void unlock(const stream& /* stream */) override {} - event::ptr fill(stream& /* stream */, unsigned char) override { return nullptr; } - event::ptr fill(stream& /* stream */) override { return nullptr; } + event::ptr fill(stream& /* stream */, unsigned char, bool) override { return nullptr; } + event::ptr fill(stream& /* stream */, bool) override { return nullptr; } shared_mem_params get_internal_params() const override { return { shared_mem_type::shared_mem_empty, nullptr, nullptr, nullptr, #ifdef _WIN32 nullptr, diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index e17cc3e5552849..e3ff36ceae38a5 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -50,7 +50,7 @@ std::vector crop_inst::calc_output_layouts(const crop_node& /*node*/, co std::vector input_shapes = { impl_param.input_layouts[0].get(), }; - for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) { + for (size_t i = 1; i < desc->input.size(); ++i) { input_shapes.push_back(impl_param.input_layouts[i].get()); } int64_t axis = desc->axis; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index b7017c414c505f..7bdbc53ad54d16 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -461,7 +461,7 @@ bool crop_in_place_optimization::match(const program_node& node, return false; // if the node is marked as network output, prevent optimizations which would affect a form of its output, // unless debug flag is set - if (node.is_output() || crop_params.fused_desc.size() > 0 || node.is_in_shape_of_subgraph()) + if (node.is_output() || crop_params.has_fused_primitives() || node.is_in_shape_of_subgraph()) return false; const auto& crop_layout = crop_params.get_output_layout(); @@ -547,6 +547,9 @@ bool crop_in_place_optimization::optimize(crop_node& node) { auto input_layout = node.get_input_layout(0); auto crop_params = node.get_kernel_impl_params(); + if (crop_params->has_fused_primitives()) + return false; + // Regular crop // crop input buffer // |___________data____________| diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index b42ab89eafd61a..5e8380f35dcb93 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -736,6 +736,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { should_fuse |= input.is_type(); + should_fuse |= input.is_type(); + bool legacy_fusion = activation_node.get_dependencies().size() == 1 && !input.can_be_optimized() && !activation_node.is_constant() && @@ -920,7 +922,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (parents[i].first->is_type()) || (parents[i].first->is_type() && reduce_supports_fusings(parents[i].first->as())) || - (parents[i].first->is_type()); + (parents[i].first->is_type()) || + (parents[i].first->is_type()); } // Disable fusion to a node on constant path when second input is in data flow @@ -1045,6 +1048,15 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { std::swap(fused_idx, peer_idx); } + // Avoid fusing with GEMM from the LoRA pattern, that can be optimized in case of empty adapters + if (parents[fused_idx].first->is_type()) { + if (parents[peer_idx].first->is_type() || + (parents[peer_idx].first->is_type() && + parents[peer_idx].first->get_dependency(0).is_type())) { + std::swap(fused_idx, peer_idx); + } + } + auto fused_node = parents[fused_idx].first; auto peer_node = parents[peer_idx].first; if (lo.get_optimization_attributes().use_onednn_impls && lo.is_primitive_implemented_for_onednn(*fused_node)) { diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp index 6c16618ac816d0..5692b6037a09e0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp @@ -59,7 +59,7 @@ struct read_value_impl : public typed_primitive_impl { if (instance.get_impl_params()->input_layouts.size() > 0) { variable.get_memory()->copy_from(stream, instance.dep_memory(0), true); } else { - variable.get_memory()->fill(stream, 0); + variable.get_memory()->fill(stream); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp index 41934847f899de..174ea1fa1767a9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gemm.cpp @@ -154,6 +154,13 @@ struct gemm_impl : multi_stage_primitive { } event::ptr execute_impl(const std::vector& events, gemm_inst& instance) override { + if (instance.get_input_layout(0).count() == 0 || + instance.get_input_layout(1).count() == 0) { + stream& stream = instance.get_network().get_stream(); + stream.enqueue_barrier(); + return instance.output_memory_ptr()->fill(stream, false); + } + if (need_indirect_load(instance)) return execute_stage(events, instance, indirect_gemm); else diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp index d8f0e45c25146f..8c08afc0428432 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/non_zero.cpp @@ -37,7 +37,7 @@ struct count_nonzero_impl : typed_primitive_impl_ocl { event::ptr execute_impl(const std::vector& events, count_nonzero_inst& instance) override { if (instance.get_impl_params()->input_layouts[0].count() == 0) { // set count of non-zero elements to 0 in case if input tensor is empty to have correct memory alloc for gather_nonzero - return instance.output_memory(0).fill(instance.get_network().get_stream(), 0); + return instance.output_memory(0).fill(instance.get_network().get_stream()); } else { return parent::execute_impl(events, instance); } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 13634b49fd9d96..095dc5fd45fa52 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -655,7 +655,7 @@ event::ptr primitive_inst::realloc_if_needed() { } } - // Clear out memory if if was previously reused, but now primitive can't be optimized + // Clear out memory if was previously reused, but now primitive can't be optimized if (!_node->is_type() && (_node->is_runtime_skippable() || _node->is_type())) { if (can_be_optimized()) { _max_output_layout_count = _deps[0].first->_max_output_layout_count; @@ -663,7 +663,7 @@ event::ptr primitive_inst::realloc_if_needed() { return ev; } else if (_outputs[0] && dep_memory_ptr(0) && _network.get_engine().is_the_same_buffer(dep_memory(0), output_memory(0))) { - // Clear out memory if if was previously reused, but now primitive can't be optimized + // Clear out memory if was previously reused, but now primitive can't be optimized _outputs[0] = nullptr; _max_output_layout_count[0] = 0; } @@ -1527,7 +1527,7 @@ event::ptr primitive_inst::execute(const std::vector& events) { } if (can_skip_execution) { - auto ev = get_network().get_stream().create_user_event(true); + auto ev = get_network().get_stream().aggregate_events(events); update_shape_done_by_other = false; // reset return ev; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl index c9e2c0688e1968..ba36ee859412ec 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/slice_ref.cl @@ -24,7 +24,7 @@ out_name[4] = in_prefix##_VAL4; #endif -KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG +KERNEL(slice_ref)(OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* restrict input, START_BUFFER STEP_BUFFER diff --git a/src/plugins/intel_gpu/src/plugin/ops/variable.cpp b/src/plugins/intel_gpu/src/plugin/ops/variable.cpp index 9d7d6854009316..d655e297e4a2c6 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/variable.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/variable.cpp @@ -45,6 +45,12 @@ void CreateVariableAccessPrimitive(ProgramBuilder &p, const std::shared_ptr op) { + return ov::is_type(op) || + ov::is_type(op) || + ov::is_type(op); +} + void CreateReadValueOp(ProgramBuilder& p, const std::shared_ptr& op) { validate_inputs_count(op, {0, 1}); CreateVariableAccessPrimitive(p, op, op->get_variable_id()); @@ -57,6 +63,9 @@ void CreateReadValueOp(ProgramBuilder& p, const std::shared_ptr& op) { validate_inputs_count(op, {1}); + if (IsReadValueOp(op->get_input_node_shared_ptr(0))) { + return; + } CreateVariableAccessPrimitive(p, op, op->get_variable_id()); } @@ -67,6 +76,9 @@ void CreateReadValueOp(ProgramBuilder& p, const std::shared_ptr& op) { validate_inputs_count(op, {1}); + if (IsReadValueOp(op->get_input_node_shared_ptr(0))) { + return; + } CreateVariableAccessPrimitive(p, op, op->get_variable_id()); } diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index b24ddbd314a0cd..6b1c8d0cfc993f 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -70,6 +70,11 @@ void VariableState::set_state(const ov::SoPtr& state) { m_layout.set_partial_shape(src_shape); update_device_buffer(); + if (actual_size == 0) { + set(); + return; + } + // check whether the src tensor is padded std::vector src_stride_no_pad(src_rank, 1); std::vector upper_pad(src_rank, 0); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index a2ddc7dd2a4dff..f7e5ada9e24ef1 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -91,15 +91,15 @@ void gpu_buffer::unlock(const stream& stream) { } } -event::ptr gpu_buffer::fill(stream& stream) { +event::ptr gpu_buffer::fill(stream& stream, bool blocking) { if (_bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip EnqueueMemcpy for 0 size tensor" << std::endl; return stream.create_user_event(true); } - return fill(stream, 0); + return fill(stream, 0, blocking); } -event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern) { +event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern, bool blocking) { if (_bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip EnqueueMemcpy for 0 size tensor" << std::endl; return stream.create_user_event(true); @@ -109,6 +109,9 @@ event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern) { cl::Event& ev_ocl = downcast(ev.get())->get(); try { cl_stream.get_cl_queue().enqueueFillBuffer(_buffer, pattern, 0, size(), nullptr, &ev_ocl); + if (blocking) { + ev_ocl.wait(); + } } catch (cl::Error const& err) { OPENVINO_THROW(OCL_ERR_MSG_FMT(err)); } @@ -272,15 +275,15 @@ gpu_image2d::gpu_image2d(ocl_engine* engine, _slice_pitch = _buffer.getImageInfo(); } -event::ptr gpu_image2d::fill(stream& stream) { +event::ptr gpu_image2d::fill(stream& stream, bool blocking) { if (_bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip EnqueueMemcpy for 0 size tensor" << std::endl; return stream.create_user_event(true); } - return fill(stream, 0); + return fill(stream, 0, blocking); } -event::ptr gpu_image2d::fill(stream& stream, unsigned char pattern) { +event::ptr gpu_image2d::fill(stream& stream, unsigned char pattern, bool blocking) { if (_bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip EnqueueMemcpy for 0 size tensor" << std::endl; return stream.create_user_event(true); @@ -291,6 +294,9 @@ event::ptr gpu_image2d::fill(stream& stream, unsigned char pattern) { cl_uint4 pattern_uint4 = {{pattern, pattern, pattern, pattern}}; try { cl_stream.get_cl_queue().enqueueFillImage(_buffer, pattern_uint4, {0, 0, 0}, {_width, _height, 1}, 0, &ev_ocl); + if (blocking) { + ev_ocl.wait(); + } } catch (cl::Error const& err) { OPENVINO_THROW(OCL_ERR_MSG_FMT(err)); } @@ -509,7 +515,7 @@ void gpu_usm::unlock(const stream& /* stream */) { } } -event::ptr gpu_usm::fill(stream& stream, unsigned char pattern) { +event::ptr gpu_usm::fill(stream& stream, unsigned char pattern, bool blocking) { if (_bytes_count == 0) { GPU_DEBUG_TRACE_DETAIL << "Skip gpu_usm::fill for 0 size tensor" << std::endl; return stream.create_user_event(true); @@ -517,14 +523,12 @@ event::ptr gpu_usm::fill(stream& stream, unsigned char pattern) { auto& cl_stream = downcast(stream); auto ev = stream.create_base_event(); cl::Event& ev_ocl = downcast(ev.get())->get(); - // enqueueFillUsm call will never finish. Driver bug? Uncomment when fixed. Some older drivers doesn't support enqueueFillUsm call at all. - // cl_stream.get_usm_helper().enqueue_fill_mem(cl_stream.get_cl_queue(), _buffer.get(), pattern, _bytes_count, nullptr, &ev_ocl) - // Workarounded with enqeue_memcopy. ToDo: Remove below code. Uncomment above. - std::vector temp_buffer(_bytes_count, pattern); - // TODO: Do we really need blocking call here? Non-blocking one causes accuracy issues right now, but hopefully it can be fixed in more performant way. - const bool blocking = true; try { - cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), _buffer.get(), temp_buffer.data(), _bytes_count, blocking, nullptr, &ev_ocl); + cl_stream.get_usm_helper().enqueue_fill_mem( + cl_stream.get_cl_queue(), _buffer.get(), static_cast(&pattern), sizeof(unsigned char), _bytes_count, nullptr, &ev_ocl); + if (blocking) { + ev_ocl.wait(); + } } catch (cl::Error const& err) { OPENVINO_THROW(OCL_ERR_MSG_FMT(err)); } @@ -532,7 +536,7 @@ event::ptr gpu_usm::fill(stream& stream, unsigned char pattern) { return ev; } -event::ptr gpu_usm::fill(stream& stream) { +event::ptr gpu_usm::fill(stream& stream, bool blocking) { // event::ptr ev{ new base_event(_context), false }; // cl::Event ev_ocl = downcast(ev.get())->get(); // cl::usm::enqueue_set_mem(cl_stream.get_cl_queue(), _buffer.get(), 0, _bytes_count, nullptr, &ev_ocl); @@ -543,7 +547,7 @@ event::ptr gpu_usm::fill(stream& stream) { GPU_DEBUG_TRACE_DETAIL << "Skip EnqueueMemcpy for 0 size tensor" << std::endl; return stream.create_user_event(true); } - return fill(stream, 0); + return fill(stream, 0, blocking); } event::ptr gpu_usm::copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp index e2a68537cdc69e..e37518de3982a8 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp @@ -32,8 +32,8 @@ struct gpu_buffer : public lockable_gpu_mem, public memory { void* lock(const stream& stream, mem_lock_type type = mem_lock_type::read_write) override; void unlock(const stream& stream) override; - event::ptr fill(stream& stream, unsigned char pattern) override; - event::ptr fill(stream& stream) override; + event::ptr fill(stream& stream, unsigned char pattern, bool blocking = true) override; + event::ptr fill(stream& stream, bool blocking = true) override; shared_mem_params get_internal_params() const override; const cl::Buffer& get_buffer() const { assert(0 == _lock_count); @@ -58,8 +58,8 @@ struct gpu_image2d : public lockable_gpu_mem, public memory { void* lock(const stream& stream, mem_lock_type type = mem_lock_type::read_write) override; void unlock(const stream& stream) override; - event::ptr fill(stream& stream, unsigned char pattern) override; - event::ptr fill(stream& stream) override; + event::ptr fill(stream& stream, unsigned char pattern, bool blocking = true) override; + event::ptr fill(stream& stream, bool blocking = true) override; shared_mem_params get_internal_params() const override; const cl::Image2D& get_buffer() const { assert(0 == _lock_count); @@ -112,8 +112,8 @@ struct gpu_usm : public lockable_gpu_mem, public memory { cl::UsmMemory& get_buffer() { return _buffer; } void* buffer_ptr() const override { return _buffer.get(); } - event::ptr fill(stream& stream, unsigned char pattern) override; - event::ptr fill(stream& stream) override; + event::ptr fill(stream& stream, unsigned char pattern, bool blocking = true) override; + event::ptr fill(stream& stream, bool blocking = true) override; shared_mem_params get_internal_params() const override; event::ptr copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp index 37a1ba8b982414..80122193265ebc 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/non_zero_gpu_test.cpp @@ -556,7 +556,7 @@ TEST(non_zero_gpu, empty_input) { // Put some value into out buffer to ensure that it's non empty // That is needed to ensure that implementation correctly handles the cases when input tensor is empty and set count non zero to 0 - count_nonzero_inst->output_memory(0).fill(engine.get_service_stream(), 1); + count_nonzero_inst->output_memory(0).fill(engine.get_service_stream(), 1, true); engine.get_service_stream().finish(); auto count_nonzero_impl = count_nonzero_inst->get_impl(); From b5a0c60a9b4bf80d94a31045ad365d8f7bdbcdd8 Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Thu, 17 Oct 2024 04:35:03 +0900 Subject: [PATCH 039/112] [GPU] Support RoPE kernel for ChatGLM4 (#26799) * Add new RoPEFusion pattern in ngraph transformation * Add RoPE kernel ### Details: - *the input data order has changed from (**[seq_length, batch, 4608]**) in **ChatGLM3** to (**[batch, seq_length, 4608]**) in **ChatGLM4**. Within RoPE process, the data order changes from (**[seq_length, batch, head_count, head_size]**) to (**[batch, head_count, seq_length, head_size]**) by permute operation added in **ChatGLM4**.* - *the RoPE cache data order has changed from (**[seq_length, batch, head_count, 2]**) in ChatGLM3 to (**[batch, head_count, seq_length, 2]**) in **ChatGLM4**.* - *Consequently, the output of RoPE has also changed from (**[seq_length, batch, head_count, head_size]**) in **ChatGLM3** to (**[batch, head_count, seq_length, head_size]**) in **ChatGLM4*** - *Due to these changes, the RoPE pattern matching needs to create something new, something different from what already existed ChatGLM pattern matching. Additionally, new kernels need to be added to accommodate these changes* ### Tickets: - *149024* --- .../ov_ops/rotary_positional_embeddings.hpp | 2 + .../fuse_rotary_positional_embeddings.hpp | 8 +- .../ov_ops/rotary_positional_embeddings.cpp | 29 +++- .../fuse_rotary_positional_embeddings.cpp | 108 +++++++++++---- .../fuse_rotary_positional_embeddings.cpp | 124 ++++++++++++++++++ .../include/intel_gpu/primitives/rope.hpp | 10 +- .../intel_gpu/src/graph/impls/ocl/rope.cpp | 1 + src/plugins/intel_gpu/src/graph/rope.cpp | 16 ++- .../kernel_selector/cl_kernels/rope_ref.cl | 27 ++-- .../kernels/rope/rope_kernel_base.cpp | 17 ++- .../kernels/rope/rope_kernel_base.h | 1 + .../src/plugin/transformations_pipeline.cpp | 2 +- .../subgraph_tests/rotary_pos_emb.cpp | 6 + .../include/subgraph_tests/rotary_pos_emb.hpp | 7 + .../subgraph/rotary_pos_emb.hpp | 12 ++ .../src/subgraph/rotary_pos_emb.cpp | 124 ++++++++++++++++++ 16 files changed, 445 insertions(+), 49 deletions(-) diff --git a/src/common/transformations/include/ov_ops/rotary_positional_embeddings.hpp b/src/common/transformations/include/ov_ops/rotary_positional_embeddings.hpp index 5d839c19600340..dcb9aef187d2d9 100644 --- a/src/common/transformations/include/ov_ops/rotary_positional_embeddings.hpp +++ b/src/common/transformations/include/ov_ops/rotary_positional_embeddings.hpp @@ -27,6 +27,8 @@ class TRANSFORMATIONS_API RoPE : public Op { bool is_interleaved = false; // interleaved mode, implies trans0213 happens after RoPE size_t rotary_ndims = 0; // dimensions to be embedded (d in the description) bool is_chatglm = false; // chatglm is special which overrides other setting + bool support_2d_rope = false; // 2d rope mode, Support 2 dimentional rope which is independant of batch and + // each head. change input order to [batch, head_cnt, 4608] to support 2d rope bool is_qwen = false; // Qwen is special which overrides other setting size_t head_cnt = 0; size_t head_size = 0; diff --git a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp index 5cd99f88d13413..eb1c92bcf9607f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp @@ -38,7 +38,7 @@ class ov::pass::RoPEFusionGPTJ : public ov::pass::MatcherPass { class ov::pass::RoPEFusionChatGLM : public ov::pass::MatcherPass { public: OPENVINO_RTTI("RoPEFusionChatGLM", "0"); - RoPEFusionChatGLM(int split_output_id); + RoPEFusionChatGLM(int split_output_id, const bool support_2d_rope = false); }; class ov::pass::RoPEFusionQwen : public ov::pass::MatcherPass { @@ -84,7 +84,7 @@ class ov::pass::RoPEShareCosSin : public ov::pass::MatcherPass { class ov::pass::RoPEFusion : public ov::pass::GraphRewrite { public: OPENVINO_RTTI("RoPEFusion", "0"); - RoPEFusion() { + RoPEFusion(bool support_2d_rope = false) { add_matcher(); add_matcher(); // optional heads & tails are fused in separate matcher pass, @@ -95,6 +95,10 @@ class ov::pass::RoPEFusion : public ov::pass::GraphRewrite { add_matcher(0); add_matcher(1); + if (support_2d_rope) { + add_matcher(0, true); + add_matcher(1, true); + } add_matcher(0); add_matcher(1); diff --git a/src/common/transformations/src/ov_ops/rotary_positional_embeddings.cpp b/src/common/transformations/src/ov_ops/rotary_positional_embeddings.cpp index 915adecda0af68..3e75e2b88df266 100644 --- a/src/common/transformations/src/ov_ops/rotary_positional_embeddings.cpp +++ b/src/common/transformations/src/ov_ops/rotary_positional_embeddings.cpp @@ -45,13 +45,27 @@ void RoPE::validate_and_infer_types() { } if (m_config.is_chatglm) { - // chatGLM specific RoPE - // input [length, batch_size, (hidden_states_q + hidden_states_k + hidden_states_v)] - // output [length, batch_size, head_cnt, hidden_states_k] - set_output_type( - 0, - get_input_element_type(0), - {input_pshape[0], input_pshape[1], ov::Dimension(m_config.head_cnt), ov::Dimension(m_config.head_size)}); + if (m_config.support_2d_rope) { + // chatGLM specific RoPE + // input [batch_size, length, (hidden_states_q + hidden_states_k + hidden_states_v)] + // output [batch_size, head_cnt, length, hidden_states_k] + set_output_type(0, + get_input_element_type(0), + {input_pshape[0], + ov::Dimension(m_config.head_cnt), + input_pshape[1], + ov::Dimension(m_config.head_size)}); + } else { + // chatGLM specific RoPE + // input [length, batch_size, (hidden_states_q + hidden_states_k + hidden_states_v)] + // output [length, batch_size, head_cnt, hidden_states_k] + set_output_type(0, + get_input_element_type(0), + {input_pshape[0], + input_pshape[1], + ov::Dimension(m_config.head_cnt), + ov::Dimension(m_config.head_size)}); + } return; } @@ -79,6 +93,7 @@ bool RoPE::visit_attributes(ov::AttributeVisitor& visitor) { visitor.on_attribute("is_interleaved", m_config.is_interleaved); visitor.on_attribute("rotary_ndims", m_config.rotary_ndims); visitor.on_attribute("is_chatglm", m_config.is_chatglm); + visitor.on_attribute("support_2d_rope", m_config.support_2d_rope); visitor.on_attribute("is_qwen", m_config.is_qwen); visitor.on_attribute("head_cnt", m_config.head_cnt); visitor.on_attribute("head_size", m_config.head_size); diff --git a/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp index b6c19a0a0391fd..143603f0415373 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -417,12 +417,16 @@ ov::pass::RoPEFusionGPTJ::RoPEFusionGPTJ() { this->register_matcher(m, callback); } -ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { +ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id, const bool support_2d_rope) { MATCHER_SCOPE(RoPEFusionChatGLM); - auto qkv_linear = makePattern("[?,?,?]"); // [seq_length, batch_size, 4608] + // [seq_length, batch_size, input_size(will be cropped to match hidden state size)] + // [batch_size, seq_length, input_size] support_2d_rope + auto qkv_linear = makePattern("[?,?,?]"); auto seq_length = makePattern("i32[1]"); - auto cos_sin_cache = makePattern("[?,?,?,?]"); // [max_pos_embeddings, batch_size, 32, 2] + // [max_pos_embeddings, batch_size, half_rotary_dims, 2] + // [batch_size, max_pos_embeddings, half_rotary_dims, 2] support_2d_rope + auto cos_sin_cache = makePattern("[?,?,?,?]"); auto ndims = ov::gen_pattern::Symbol("ndims"); auto head_cnt = ov::gen_pattern::Symbol("head_cnt"); @@ -436,37 +440,76 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { auto qkv_proj = makePattern({qkv_linear, -1, {total_size_q, total_size_k, total_size_v}}); qkv_proj->set_output_size(3); - // get key [L, B, Hkv, S] auto cur_key = makePattern({qkv_proj->output(split_output_id), {0, 0, head_cnt, head_size}}, {{"special_zero", true}}); - auto slice_Slice_437 = GenSlice(cur_key, 0, ndims, 1, 3); - auto var_split_1 = makePattern({cur_key, 3, {ndims, ov::gen_pattern::Symbol("end")}}); + std::shared_ptr input_key = nullptr; + // Extended the RoPE to a two-dimensional form to accommodate the 2D positional encoding in GLM. + // Calculate positional embedding independent of batch and each head + if (support_2d_rope) { + // Get transposed key [batch, head_cnt, seq_length, head_size] + input_key = makePattern({cur_key, {0, 2, 1, 3}}); + } else { + // Get key [seq_length, batch, head_cnt, head_size] + input_key = std::move(cur_key); + } + + auto slice_Slice_437 = GenSlice(input_key, 0, ndims, 1, 3); + auto var_split_1 = makePattern({input_key, 3, {ndims, ov::gen_pattern::Symbol("end")}}); var_split_1->set_output_size(2); // rotate half - auto ListConstruct_452_Concat = - makePattern({seq_length, {-1}, {head_cnt}, {ndims / 2}, {2}}, {{"axis", 0}}); - auto const_target_shape_1 = makeConst({seq_len, batch, head_cnt, ndims / 2, 2}); - - auto ListConstruct_379_Concat = - makePattern({seq_length, {-1}, {1}, {ndims / 2}, {2}}, {{"axis", 0}}); - auto const_target_shape_2 = makeConst({seq_len, batch, 1, ndims / 2, 2}); - - auto reshape_Reshape_453 = makePattern( - {slice_Slice_437 | var_split_1->output(0), ListConstruct_452_Concat | const_target_shape_1}); + std::shared_ptr reshape_Reshape_453 = nullptr; + if (support_2d_rope) { + auto const_target_shape_1 = makeConst({0, head_cnt, 0, ndims / 2, 2}); + reshape_Reshape_453 = + makePattern({slice_Slice_437 | var_split_1->output(0), const_target_shape_1}, + {{"special_zero", true}}); + } else { + auto ListConstruct_452_Concat = + makePattern({seq_length, {-1}, {head_cnt}, {ndims / 2}, {2}}, {{"axis", 0}}); + auto const_target_shape_1 = makeConst({seq_len, batch, head_cnt, ndims / 2, 2}); + reshape_Reshape_453 = makePattern( + {slice_Slice_437 | var_split_1->output(0), ListConstruct_452_Concat | const_target_shape_1}); + } auto x_even = makePattern({reshape_Reshape_453, 0, -1}, {{"batch_dims", 0}}); auto x_odd = makePattern({reshape_Reshape_453, 1, -1}, {{"batch_dims", 0}}); - auto slice_Slice_449 = makePattern({cos_sin_cache, {0}, seq_length, {1}, {0}}); - auto slice_StridedSlice_449 = GenStridedSlice(cos_sin_cache, {0}, seq_length, {1}, 0); + auto var_split_2 = makePattern({cos_sin_cache, 0, {0, ov::gen_pattern::Symbol("end")}}); var_split_2->set_output_size(2); - auto view_Reshape_460 = - makePattern({slice_StridedSlice_449 | slice_Slice_449 | var_split_2->output(0), - ListConstruct_379_Concat | const_target_shape_2}, - {{"special_zero", false}}); + std::shared_ptr view_Reshape_460 = nullptr; + if (support_2d_rope) { + auto ListConstruct_379_Concat = + makePattern({{-1}, {1}, seq_length, {ndims / 2}, {2}}, {{"axis", 0}}); + auto const_target_shape_2 = makeConst({batch, 1, seq_len, ndims / 2, 2}); + + // Slice cos_sin_cache to support 2-dimentional RoPE + auto ScatterUpdate = makePattern({{0, 0}, {1}, seq_length, {0}}, {}); + auto slice_Slice_449_1d = makePattern({cos_sin_cache, {0}, seq_length, {1}, {1}}); + auto slice_Slice_449_2d = makePattern({cos_sin_cache, {0, 0}, ScatterUpdate, {1, 1}, {0}}); + auto slice_StridedSlice_449 = GenStridedSlice(cos_sin_cache, {0, 0}, ScatterUpdate, {1, 1}, 1); + + // [batch, 1, seq_length, half_rotary_dims, 2] + view_Reshape_460 = makePattern( + {slice_StridedSlice_449 | slice_Slice_449_1d | slice_Slice_449_2d | var_split_2->output(0), + ListConstruct_379_Concat | const_target_shape_2}, + {{"special_zero", false}}); + } else { + auto ListConstruct_379_Concat = + makePattern({seq_length, {-1}, {1}, {ndims / 2}, {2}}, {{"axis", 0}}); + auto const_target_shape_2 = makeConst({seq_len, batch, 1, ndims / 2, 2}); + + auto slice_Slice_449 = makePattern({cos_sin_cache, {0}, seq_length, {1}, {0}}); + auto slice_StridedSlice_449 = GenStridedSlice(cos_sin_cache, {0}, seq_length, {1}, 0); + + // [seq_length, 1, batch, half_rotary_dims, 2] + view_Reshape_460 = + makePattern({slice_StridedSlice_449 | slice_Slice_449 | var_split_2->output(0), + ListConstruct_379_Concat | const_target_shape_2}, + {{"special_zero", false}}); + } auto cos_tab = makePattern({view_Reshape_460, 0, -1}, {{"batch_dims", 0}}); auto x_even_cos = makePattern({x_even, cos_tab}, {{"auto_broadcast", "numpy"}}); @@ -487,11 +530,21 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { auto ShapeOf_135133 = makePattern({stack_481}); auto flatten_Slice_497 = GenSlice(ShapeOf_135133, 0, 3, 1, 0); auto flatten_Concat_500 = makePattern({flatten_Slice_497, {-1}}, {{"axis", 0}}); - auto const_target_shape_3 = makeConst({seq_len, batch, head_cnt, ndims}); - // [length, batch, head_cnt, half_rotary_dims, 2] - auto flatten_Reshape_501 = - makePattern({stack_481, flatten_Concat_500 | const_target_shape_3}, {{"special_zero", true}}); - auto slice_Slice_443 = GenSlice(cur_key, ndims, INT_MAX, 1, 3); + + std::shared_ptr const_target_shape_3 = nullptr; + std::shared_ptr flatten_Reshape_501 = nullptr; + if (support_2d_rope) { + // [batch, head_cnt, length, half_rotary_dims, 2] + const_target_shape_3 = makeConst({batch, head_cnt, seq_len, ndims}); + flatten_Reshape_501 = makePattern({stack_481, flatten_Concat_500 | const_target_shape_3}, + {{"special_zero", true}}); + } else { + // [length, batch, head_cnt, half_rotary_dims, 2] + const_target_shape_3 = makeConst({seq_len, batch, head_cnt, ndims}); + flatten_Reshape_501 = makePattern({stack_481, flatten_Concat_500 | const_target_shape_3}, + {{"special_zero", true}}); + } + auto slice_Slice_443 = GenSlice(input_key, ndims, INT_MAX, 1, 3); auto cat_Concat_505 = makePattern({flatten_Reshape_501, slice_Slice_443 | var_split_1->output(1)}, {{"axis", -1}}); @@ -510,6 +563,7 @@ ov::pass::RoPEFusionChatGLM::RoPEFusionChatGLM(int split_output_id) { OutputVector new_args; config.rotary_ndims = static_cast(validator["ndims"]); config.is_chatglm = true; + config.support_2d_rope = support_2d_rope; config.head_cnt = static_cast(validator["head_cnt"]); config.head_size = static_cast(validator["head_size"]); diff --git a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp index 5b54b4a7cce437..6eb0add525c815 100644 --- a/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp +++ b/src/common/transformations/tests/common_optimizations/fuse_rotary_positional_embeddings.cpp @@ -135,6 +135,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_LLama2_no_gather) { {"config.input_trans0213", true}, {"config.is_interleaved", false}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -170,6 +171,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_LLama2_with_gather) { {"config.input_trans0213", true}, {"config.is_interleaved", false}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -308,6 +310,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTNEOX_no_gather) { {"config.input_trans0213", true}, {"config.is_interleaved", false}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -342,6 +345,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTNEOX_with_gather) { {"config.input_trans0213", true}, {"config.is_interleaved", false}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -457,6 +461,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTJ) { {"config.input_trans0213", false}, {"config.is_interleaved", true}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -566,6 +571,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML) { {"config.is_interleaved", false}, {"config.rotary_ndims", rotary_ndims}, {"config.is_chatglm", true}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", num_heads}, {"config.head_size", ndims}, @@ -643,6 +649,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_chatGML_Slice) { {"config.is_interleaved", false}, {"config.rotary_ndims", rotary_ndims}, {"config.is_chatglm", true}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", num_heads}, {"config.head_size", ndims}, @@ -723,6 +730,7 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTJ_Slice) { {"config.input_trans0213", false}, {"config.is_interleaved", true}, {"config.is_chatglm", false}, + {"config.support_2d_rope", false}, {"config.is_qwen", false}, {"config.head_cnt", 0}, {"config.head_size", 0}, @@ -730,4 +738,120 @@ TEST_F(TransformationTestsF, ConvertToROPE_GPTJ_Slice) { {"config.gather_position_arg_id", 0}}); model_ref = std::make_shared(ov::NodeVector{rope}, ov::ParameterVector{input, cos_sin}); } +} + +TEST_F(TransformationTestsF, ConvertToROPE_chatGML_2d_rope) { + disable_rt_info_check(); + const int batch = 2; + const int seq_len = 7; + const int num_heads = 32; + const int ndims = 128; + const int rotary_ndims = 64; + const int max_pos_length = 2048; + { + auto input = std::make_shared(ov::element::f32, ov::PartialShape{batch, seq_len, 4608}); + auto cos_sin_cache = + std::make_shared(ov::element::f32, + ov::PartialShape{max_pos_length, (rotary_ndims / 2), 2}); + auto position_ids = std::make_shared(ov::element::i32, ov::PartialShape{batch, seq_len}); + + auto __module_transformer_index_67_Gather = + makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); + + auto ListUnpack_321 = makeOP({input, -1, {4096, 256, 256}}); + auto view_Reshape = makeOP({ListUnpack_321->output(0), {0, 0, num_heads, ndims}}, + {{"special_zero", true}}); + + auto permute_Transpose = makeOP({view_Reshape, {0, 2, 1, 3}}, {}); + + auto slice_Slice_357 = + makeOP({permute_Transpose, {0, 0, 0, 0}, {0, 0, 0, rotary_ndims}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + + auto aten_view_Reshape_1 = + makeOP({ListUnpack_321->output(1), {0, 0, 2, ndims}}, {{"special_zero", true}}); + auto aten_transpose_1 = makeOP({aten_view_Reshape_1, {0, 2, 1, 3}}); + auto shape_of_105249 = makeOP({aten_transpose_1}, {{"output_type", "i32"}}); + auto gather_105252 = makeOP({shape_of_105249, {2}, {0}}, {{"batch_dims", 0}}); + auto scatter_update_63441 = makeOP({{0, 0}, {1}, gather_105252, {0}}); + // connected to cos_sin_cache + auto slice_Slice_369 = makeOP( + {__module_transformer_index_67_Gather, {0, 0}, scatter_update_63441, {1, 1}}, + {{"begin_mask", {1, 0}}, + {"end_mask", {1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto list_construct_concat_1 = + makeOP({{-1}, {1}, gather_105252, {rotary_ndims / 2}, {2}}, {{"axis", 0}}); + + auto reshape_Reshape_373 = + makeOP({slice_Slice_357, {0, 32, 0, 32, 2}}, {{"special_zero", true}}); + auto select_Gather_384 = + makeOP({reshape_Reshape_373, 0, -1}, {{"batch_dims", 0}}); // x_even + auto select_Gather_381 = + makeOP({reshape_Reshape_373, 1, -1}, {{"batch_dims", 0}}); // x_odd + auto view_Reshape_380 = + makeOP({slice_Slice_369, list_construct_concat_1}, {{"special_zero", false}}); + auto select_Gather_385 = makeOP({view_Reshape_380, 0, -1}, {{"batch_dims", 0}}); // cos_tab + auto select_Gather_382 = makeOP({view_Reshape_380, 1, -1}, {{"batch_dims", 0}}); // sin_tab + + auto mul_Multiply_386 = makeOP({select_Gather_381, select_Gather_382}, + {{"auto_broadcast", "numpy"}}); // x_odd_sin + auto mul_Multiply_383 = makeOP({select_Gather_384, select_Gather_385}, + {{"auto_broadcast", "numpy"}}); // x_even_cos + auto Multiply_101315 = + makeOP({mul_Multiply_386, -1.000000f}, {{"auto_broadcast", "numpy"}}); + auto sub_Subtract_389 = + makeOP({mul_Multiply_383, Multiply_101315}, {{"auto_broadcast", "numpy"}}); + + auto mul_Multiply_391 = makeOP({select_Gather_381, select_Gather_385}, + {{"auto_broadcast", "numpy"}}); // x_odd_cos + auto mul_Multiply_393 = makeOP({select_Gather_384, select_Gather_382}, + {{"auto_broadcast", "numpy"}}); // x_even_sin + auto add_Add_396 = makeOP({mul_Multiply_391, mul_Multiply_393}, {{"auto_broadcast", "numpy"}}); + + auto Unsqueeze_62716 = makeOP({sub_Subtract_389, -1}, {}); + auto Unsqueeze_62717 = makeOP({add_Add_396, -1}, {}); + + auto stack_401 = makeOP({Unsqueeze_62716, Unsqueeze_62717}, {{"axis", -1}}); + auto flatten_Reshape_421 = + makeOP({stack_401, {0, num_heads, 0, rotary_ndims}}, {{"special_zero", true}}); + auto slice_Slice_363 = makeOP( + {permute_Transpose, {0, 0, 0, rotary_ndims}, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat_425 = makeOP({flatten_Reshape_421, slice_Slice_363}, {{"axis", -1}}); + model = std::make_shared(ov::NodeVector{cat_Concat_425}, + ov::ParameterVector{input, cos_sin_cache, position_ids}); + } + manager.register_pass(true); + { + auto input = std::make_shared(ov::element::f32, ov::Shape{batch, seq_len, 4608}); + auto cos_sin_cache = + std::make_shared(ov::element::f32, ov::Shape{max_pos_length, (rotary_ndims / 2), 2}); + auto position_ids = std::make_shared(ov::element::i32, ov::PartialShape{batch, seq_len}); + auto gather_cos_sin = makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); + auto rope = makeOP({input, gather_cos_sin, gather_cos_sin}, + {{"config.slice_start", 0}, + {"config.slice_stop", 4096}, + {"config.input_trans0213", false}, + {"config.is_interleaved", false}, + {"config.rotary_ndims", rotary_ndims}, + {"config.is_chatglm", true}, + {"config.support_2d_rope", true}, + {"config.is_qwen", false}, + {"config.head_cnt", num_heads}, + {"config.head_size", ndims}, + {"config.gather_position_arg_id", 0}}); + model_ref = + std::make_shared(ov::NodeVector{rope}, ov::ParameterVector{input, cos_sin_cache, position_ids}); + } } \ No newline at end of file diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp index 8f4ae2c66334ee..d7933e2180fe6f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/rope.hpp @@ -26,7 +26,11 @@ struct rope : public primitive_base { size_t gather_rank = 0) : primitive_base(id, inputs), config(config), - gather_rank(gather_rank) {} + gather_rank(gather_rank) { + OPENVINO_ASSERT((!config.support_2d_rope + || (config.support_2d_rope && config.is_chatglm)), + "2D RoPE is currently only supported in Chatglm!"); + } RoPE::Config config; size_t gather_rank = 0; @@ -38,6 +42,7 @@ struct rope : public primitive_base { seed = hash_combine(seed, config.head_size); seed = hash_combine(seed, config.input_trans0213); seed = hash_combine(seed, config.is_chatglm); + seed = hash_combine(seed, config.support_2d_rope); seed = hash_combine(seed, config.is_interleaved); seed = hash_combine(seed, config.is_qwen); seed = hash_combine(seed, config.rotary_ndims); @@ -58,6 +63,7 @@ struct rope : public primitive_base { config.head_size == rhs_casted.config.head_size && config.input_trans0213 == rhs_casted.config.input_trans0213 && config.is_chatglm == rhs_casted.config.is_chatglm && + config.support_2d_rope == rhs_casted.config.support_2d_rope && config.is_interleaved == rhs_casted.config.is_interleaved && config.is_qwen == rhs_casted.config.is_qwen && config.rotary_ndims == rhs_casted.config.rotary_ndims && @@ -73,6 +79,7 @@ struct rope : public primitive_base { ob << config.head_size; ob << config.input_trans0213; ob << config.is_chatglm; + ob << config.support_2d_rope; ob << config.is_interleaved; ob << config.is_qwen; ob << config.rotary_ndims; @@ -88,6 +95,7 @@ struct rope : public primitive_base { ib >> config.head_size; ib >> config.input_trans0213; ib >> config.is_chatglm; + ib >> config.support_2d_rope; ib >> config.is_interleaved; ib >> config.is_qwen; ib >> config.rotary_ndims; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp index f65768b8e6eb20..7764b7b0964d1c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rope.cpp @@ -53,6 +53,7 @@ struct rope_impl : typed_primitive_impl_ocl { params.is_qwen = primitive->config.is_qwen; params.is_chatglm = primitive->config.is_chatglm; + params.support_2d_rope = primitive->config.support_2d_rope; params.transposed_input = primitive->config.input_trans0213; for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) { diff --git a/src/plugins/intel_gpu/src/graph/rope.cpp b/src/plugins/intel_gpu/src/graph/rope.cpp index ea904916d4cf41..e168626f8d69a2 100644 --- a/src/plugins/intel_gpu/src/graph/rope.cpp +++ b/src/plugins/intel_gpu/src/graph/rope.cpp @@ -30,11 +30,24 @@ std::vector rope_inst::calc_output_layouts(rope_node const& node, kernel ShapeType output_shape = input0_shape; - if (desc->config.is_qwen || desc->config.is_chatglm) { + if (desc->config.is_qwen) { output_shape = { input0_shape[0], input0_shape[1], ov::Dimension(desc->config.head_cnt), ov::Dimension(desc->config.head_size) }; + } else if (desc->config.is_chatglm) { + if (desc->config.support_2d_rope) { + // input0_shape = [batch_size, seq_length] + output_shape = { input0_shape[0], + ov::Dimension(desc->config.head_cnt), + input0_shape[1], + ov::Dimension(desc->config.head_size) }; + } else { + output_shape = { input0_shape[0], + input0_shape[1], + ov::Dimension(desc->config.head_cnt), + ov::Dimension(desc->config.head_size) }; + } } else { auto input_slice_size = desc->config.slice_stop - desc->config.slice_start; if (input_slice_size > 0) { @@ -63,6 +76,7 @@ std::string rope_inst::to_string(rope_node const& node) { rope_info.add("head_size", desc->config.head_size); rope_info.add("input_trans0213", desc->config.input_trans0213); rope_info.add("is_chatglm", desc->config.is_chatglm); + rope_info.add("support_2d_rope", desc->config.support_2d_rope); rope_info.add("is_interleaved", desc->config.is_interleaved); rope_info.add("is_qwen", desc->config.is_qwen); rope_info.add("rotary_ndims", desc->config.rotary_ndims); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl index 36d4306b59ba79..38066b4461def4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/rope_ref.cl @@ -11,12 +11,22 @@ KERNEL(rope_ref)( const __global INPUT1_TYPE* cos_sin, __global OUTPUT_TYPE* output) { +#ifdef SUPPORT_2D_ROPE + const uint p = get_global_id(0) / HEAD_COUNT; + const uint h = get_global_id(0) % HEAD_COUNT; + const uint b = get_global_id(1);//sequence length + const uint rf = get_global_id(2);//max(HALF_ROTARY_NDIMS, HEAD_SIZE - ROTARY_NDIMS) + uint output_idx = OUTPUT_GET_INDEX(p, h, b, 0); +#else const uint p = get_global_id(0); const uint b = get_global_id(1); const uint h = (uint)get_global_id(2) % HEAD_COUNT; const uint rf = (uint)get_global_id(2) / HEAD_COUNT; + uint output_idx = OUTPUT_GET_INDEX(p, b, h, 0); +#endif + uint r = rf < HALF_ROTARY_NDIMS ? rf * 2 : 0; - uint f = rf < HEAD_SIZE - ROTARY_NDIMS ? rf : 0; + uint f = rf < HEAD_SIZE - ROTARY_NDIMS ? rf * 2 : 0; #ifdef ENABLE_SLICE uint input_idx = GET_DATA_INDEX(SLICED_INPUT0, p, b, h * HEAD_SIZE, 0); @@ -30,19 +40,18 @@ KERNEL(rope_ref)( uint cos_sin_b = b < INPUT1_FEATURE_NUM ? b : 0; uint cos_sin_idx = INPUT1_GET_INDEX(cos_sin_p, cos_sin_b, 0, 0); - uint output_idx = OUTPUT_GET_INDEX(p, b, h, 0); - - INPUT1_TYPE cosv = cos_sin[cos_sin_idx + r]; - INPUT1_TYPE sinv = cos_sin[cos_sin_idx + r + 1]; + float cosv = convert_float(cos_sin[cos_sin_idx + r]); + float sinv = convert_float(cos_sin[cos_sin_idx + r + 1]); - INPUT0_TYPE in1 = input[input_idx + r]; - INPUT0_TYPE in2 = input[input_idx + r + 1]; + float in1 = convert_float(input[input_idx + r]); + float in2 = convert_float(input[input_idx + r + 1]); - output[output_idx + r] = cosv * in1 - sinv * in2; - output[output_idx + r + 1] = sinv * in1 + cosv * in2; + output[output_idx + r] = TO_OUTPUT_TYPE(cosv * in1 - sinv * in2); + output[output_idx + r + 1] = TO_OUTPUT_TYPE(sinv * in1 + cosv * in2); #ifdef ENABLE_IO_COPY output[output_idx + ROTARY_NDIMS + f] = input[input_idx + ROTARY_NDIMS + f]; + output[output_idx + ROTARY_NDIMS + f + 1] = input[input_idx + ROTARY_NDIMS + f + 1]; #endif } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp index a9e0818aeae2f5..a48632f6c45509 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.cpp @@ -70,6 +70,9 @@ JitConstants RoPEKernelBase::GetJitConstants(const rope_params& params, RoPEKern if (params.is_qwen) { jit.AddConstant(MakeJitConstant("QWEN", true)); } else if (params.is_chatglm) { + if (params.support_2d_rope) { + jit.AddConstant(MakeJitConstant("SUPPORT_2D_ROPE", true)); + } jit.AddConstant(MakeJitConstant("CHATGLM", true)); } else { jit.AddConstant(MakeJitConstant("RotateHalf", true)); @@ -85,10 +88,22 @@ RoPEKernelBase::DispatchData RoPEKernelBase::SetDefault(const rope_params& param std::vector> dims_by_gws = {{ Tensor::DataChannelName::BATCH }, { Tensor::DataChannelName::FEATURE }, { Tensor::DataChannelName::Y, Tensor::DataChannelName::X }}; - if (params.is_chatglm || params.is_qwen) { + if (params.is_qwen) { dispatchData.gws = {input.Batch().v, input.Feature().v, params.head_cnt * std::max(params.rotary_ndims / 2ul, params.head_size - params.rotary_ndims)}; + } else if (params.is_chatglm) { + if (params.support_2d_rope) { + // input [batch_size, seq_length] + // output [batch_size, head_count, seq_length, half_rotary_ndims] + dispatchData.gws = {input.Batch().v * params.head_cnt, + input.Feature().v, + params.rotary_ndims / 2ul}; + } else { + dispatchData.gws = {input.Batch().v, + input.Feature().v, + params.head_cnt * (params.rotary_ndims / 2ul)}; + } } else { dispatchData.gws = {output.Batch().v, output.Feature().v, diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.h index 5d55fd082765e8..472131eba5d82f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/rope/rope_kernel_base.h @@ -24,6 +24,7 @@ struct rope_params : public base_params { bool is_qwen = false; bool is_chatglm = false; + bool support_2d_rope = false; bool transposed_input = false; }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 40c7ab48c486cb..f173e378fca3f9 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -862,7 +862,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const size_t zp_pad_size = device_info.supports_immad ? 16 : 32; manager.register_pass(zp_pad_size, device_info.supports_immad); - manager.register_pass(); + manager.register_pass(true); pass_config->disable(); pass_config->disable(); pass_config->disable(); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp index 9565036f7b452d..741014b461e7f0 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp @@ -44,5 +44,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_RoPETestLlama2, ::testing::Values(ov::test::utils::DEVICE_GPU), RoPETestLlama2Slice::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLM, + RoPETestChatGLM2DRoPEStridedSlice, + ::testing::Values(ov::test::utils::DEVICE_GPU), + RoPETestChatGLM2DRoPEStridedSlice::getTestCaseName); + + } // namespace test } // namespace ov diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp index f2b19a6748a6a7..7100ddca1083e3 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/rotary_pos_emb.hpp @@ -87,5 +87,12 @@ TEST_P(RoPETestGPTJSlice, CompareWithRefs) { CheckNumberOfNodesWithType(function, {"RoPE"}, 1); }; +TEST_P(RoPETestChatGLM2DRoPEStridedSlice, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + run(); + auto function = compiledModel.get_runtime_model(); + CheckNumberOfNodesWithType(function, {"RoPE"}, 1); +}; + } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp index 2663a6f5ad3fab..e1182bd3b16e13 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/rotary_pos_emb.hpp @@ -115,5 +115,17 @@ class RoPETestGPTJSlice : public RoPETestGPTJStridedSlice { void SetUp() override; }; +class RoPETestChatGLM2DRoPEStridedSlice : public SubgraphBaseTest, public testing::WithParamInterface { +private: + std::shared_ptr buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims); +protected: + ov::Tensor create_i32_tensor(const ov::Shape& shape, int start, int step = 1); + void generate_inputs(const std::vector& targetInputStaticShapes) override; + void SetUp() override; + +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); +}; + } // namespace test } // namespace ov diff --git a/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp b/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp index 46ea730ac32a8c..a1848903bb76a2 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/rotary_pos_emb.cpp @@ -1027,5 +1027,129 @@ std::shared_ptr RoPETestGPTJSlice::buildROPE_GPTJ(int num_head, return std::make_shared(model_output, ov::ParameterVector{input, sincos}); } +std::shared_ptr RoPETestChatGLM2DRoPEStridedSlice::buildROPE_ChatGLM(int batch, int head_cnt, int rotary_dims) { + auto input = std::make_shared(ov::element::f32, PartialShape{batch, -1, 4096 + 256 + 256}); + auto cos_sin_cache = std::make_shared(ov::element::f32, PartialShape{32768, 32, 2}); + auto position_ids = std::make_shared(ov::element::i32, PartialShape{-1, -1}); + + auto __module_transformer_index_67_Gather = + makeOP({cos_sin_cache, position_ids, 0}, {{"batch_dims", 0}}); + + auto ListUnpack_321 = makeOP({input, -1, {4096, 256, 256}}); + auto view_Reshape = makeOP({ListUnpack_321->output(0), {0, 0, 32, 128}}, {{"special_zero", true}}); + + auto permute_Transpose = makeOP({view_Reshape, {0, 2, 1, 3}}, {}); + + auto slice_Slice_357 = + makeOP({permute_Transpose, {0, 0, 0, 0}, {0, 0, 0, 64}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + + auto aten_view_Reshape_1 = makeOP({ListUnpack_321->output(1), {0, 0, 2, 128}}, {{"special_zero", true}}); + auto aten_transpose_1 = makeOP({aten_view_Reshape_1, {0, 2, 1, 3}}); + auto shape_of_105249 = makeOP({aten_transpose_1}, {{"output_type", "i32"}}); + auto gather_105252 = makeOP({shape_of_105249, {2}, {0}}, {{"batch_dims", 0}}); + auto scatter_update_63441 = makeOP({{0, 0}, {1}, gather_105252, {0}}); + // connected to cos_sin_cache + auto slice_Slice_369 = + makeOP({__module_transformer_index_67_Gather, {0, 0}, scatter_update_63441, {1, 1}}, + {{"begin_mask", {1, 0}}, + {"end_mask", {1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto list_construct_concat_1 = makeOP({{-1}, {1}, gather_105252, {32}, {2}}, {{"axis", 0}}); + + auto reshape_Reshape_373 = + makeOP({slice_Slice_357, {0, 32, 0, 32, 2}}, {{"special_zero", true}}); + auto select_Gather_384 = makeOP({reshape_Reshape_373, 0, -1}, {{"batch_dims", 0}});//x_even + auto select_Gather_381 = makeOP({reshape_Reshape_373, 1, -1}, {{"batch_dims", 0}});//x_odd + + auto view_Reshape_380 = + makeOP({slice_Slice_369, list_construct_concat_1}, {{"special_zero", false}}); + auto select_Gather_385 = makeOP({view_Reshape_380, 0, -1}, {{"batch_dims", 0}});//cos_tab + auto select_Gather_382 = makeOP({view_Reshape_380, 1, -1}, {{"batch_dims", 0}});//sin_tab + + auto mul_Multiply_386 = + makeOP({select_Gather_381, select_Gather_382}, {{"auto_broadcast", "numpy"}});//x_odd_sin + auto mul_Multiply_383 = + makeOP({select_Gather_384, select_Gather_385}, {{"auto_broadcast", "numpy"}});//x_even_cos + auto sub_Subtract_389 = + makeOP({mul_Multiply_383, mul_Multiply_386}, {{"auto_broadcast", "numpy"}}); + + auto mul_Multiply_391 = + makeOP({select_Gather_381, select_Gather_385}, {{"auto_broadcast", "numpy"}});//x_odd_cos + auto mul_Multiply_393 = + makeOP({select_Gather_384, select_Gather_382}, {{"auto_broadcast", "numpy"}});//x_even_sin + auto add_Add_396 = makeOP({mul_Multiply_391, mul_Multiply_393}, {{"auto_broadcast", "numpy"}}); + + auto Unsqueeze_62716 = makeOP({sub_Subtract_389, -1}, {}); + auto Unsqueeze_62717 = makeOP({add_Add_396, -1}, {}); + + auto stack_401 = makeOP({Unsqueeze_62716, Unsqueeze_62717}, {{"axis", -1}}); + auto flatten_Reshape_421 = makeOP({stack_401, {0, 32, 0, 64}}, {{"special_zero", true}}); + auto slice_Slice_363 = + makeOP({permute_Transpose, {0, 0, 0, 64}, {0, 0, 0, INT_MAX}, {1, 1, 1, 1}}, + {{"begin_mask", {1, 1, 1, 0}}, + {"end_mask", {1, 1, 1, 0}}, + {"new_axis_mask", {}}, + {"shrink_axis_mask", {}}, + {"ellipsis_mask", {}}}); + auto cat_Concat_425 = makeOP({flatten_Reshape_421, slice_Slice_363}, {{"axis", -1}}); + return std::make_shared(ov::NodeVector{cat_Concat_425}, + ov::ParameterVector{input, cos_sin_cache, position_ids}); +} + +ov::Tensor RoPETestChatGLM2DRoPEStridedSlice::create_i32_tensor(const ov::Shape& shape, int start, int step) { + auto tensor = ov::Tensor(ov::element::i32, shape); + auto* ptr = static_cast(tensor.data()); + for (size_t i = 0; i < tensor.get_size(); i++) { + ptr[i] = start; + start += step; + } + return tensor; +} + +void RoPETestChatGLM2DRoPEStridedSlice::generate_inputs(const std::vector& targetInputStaticShapes) { + const auto& funcInputs = function->inputs(); + + auto& input_shape = targetInputStaticShapes[0]; + auto batch = input_shape[0]; + auto seq_length = input_shape[1]; + + ov::Tensor t_input = utils::create_and_fill_tensor(funcInputs[0].get_element_type(), input_shape, 2, -1.0f, 32768); + ov::Tensor t_cos_sin_cache = + utils::create_and_fill_tensor(funcInputs[1].get_element_type(), {32768, 32, 2}, 2, -1.0f, 32768); + ov::Tensor t_position_ids = create_i32_tensor(ov::Shape({batch, seq_length}), 15); + + inputs.clear(); + inputs.insert({funcInputs[0].get_node_shared_ptr(), t_input}); + inputs.insert({funcInputs[1].get_node_shared_ptr(), t_cos_sin_cache}); + inputs.insert({funcInputs[2].get_node_shared_ptr(), t_position_ids}); +} + +void RoPETestChatGLM2DRoPEStridedSlice::SetUp() { + targetDevice = this->GetParam(); + + const int batch = 2; + const int seq_length = 7; + const int num_head = 32; + const int rotary_dims = 64; + + InputShape inpShape = {{batch, -1, 4096 + 256 + 256}, {{batch, seq_length, 4096 + 256 + 256}}}; + init_input_shapes({inpShape}); + function = buildROPE_ChatGLM(-1, num_head, rotary_dims); +} + +std::string RoPETestChatGLM2DRoPEStridedSlice::getTestCaseName(const testing::TestParamInfo& obj) { + std::string targetDevice = obj.param; + std::ostringstream result; + result << "targetDevice=" << targetDevice; + return result.str(); +} + } // namespace test } // namespace ov From 15072abc782bc056e9c164b0e9a034e7021f3568 Mon Sep 17 00:00:00 2001 From: Wang Wangwang Date: Thu, 17 Oct 2024 04:38:37 +0800 Subject: [PATCH 040/112] [GPU] limit skip of reorder if input is on constant path (#26509) ### Details: - *Fix remove_redundant_recorders with all dependencies are constants* ### Tickets: - *CVS-150768* --------- Co-authored-by: Chen Peter --- .../remove_redundant_reorders.cpp | 2 +- .../unit/fusions/eltwise_fusion_test.cpp | 30 +++++++++++++++++-- .../tests/unit/fusions/fusion_test_common.hpp | 3 +- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index dff6b16d30a2ad..28ee84c4a4ec02 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -434,7 +434,7 @@ void remove_redundant_reorders::run(program& p) { (input.is_type() || input.is_type() || input.is_type() || input.is_type() || input.is_type() || input.is_type() || input.is_type() || input.is_type() || input.is_type() || - input.is_type() || input.is_type()) && !input.is_constant(); if (!same_data_type && !allowed_dt_conversion_fuse) continue; diff --git a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp index d4c50ec84ac78a..5d259a1a1862fc 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp @@ -38,8 +38,10 @@ class EltwiseFusingTest : public ::BaseFusingTest { network network_fused(this->engine, this->topology_fused, cfg_fused); auto inputs = network_fused.get_input_ids(); - network_fused.set_input_data("input", input_prim); - network_not_fused.set_input_data("input", input_prim); + if (std::find(inputs.begin(), inputs.end(), "input") != inputs.end()) { + network_fused.set_input_data("input", input_prim); + network_not_fused.set_input_data("input", input_prim); + } if (std::find(inputs.begin(), inputs.end(), "input2") != inputs.end()) { network_fused.set_input_data("input2", input_prim2); network_not_fused.set_input_data("input2", input_prim2); @@ -699,3 +701,27 @@ TEST_P(eltwise_fusing_reorders, reorders_for_data_type) { INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_fusing_reorders, ::testing::ValuesIn(std::vector{ eltwise_test_params{ { 1, 16, 16, 2 }, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::max, 4, 6 }, })); + +class eltwise_with_constant_input : public EltwiseFusingTest {}; +TEST_P(eltwise_with_constant_input, basic) { + auto p = GetParam(); + create_topologies(data("eltwise_data", get_mem(get_input_layout2(p), -10, 10)), + data("eltwise_data1", get_mem(get_input_layout2(p), -10, 10)), + eltwise("eltwise", {input_info("eltwise_data"), input_info("eltwise_data1")}, p.mode, p.default_type), + reorder("out", + input_info("eltwise"), + p.default_format, + data_types::f32, + std::vector(), + cldnn::reorder_mean_mode::subtract, + cldnn::padding(), + true) + ); + + tolerance = default_tolerance(p.input_type); + execute(p, true); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_with_constant_input, ::testing::ValuesIn(std::vector{ + eltwise_test_params{ CASE_ELTWISE_FP16_1, 0, 0}, +})); diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp index a590fb9299a777..eb0f63c651e50d 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp @@ -81,8 +81,9 @@ class BaseFusingTest : public ::testing::TestWithParam { ASSERT_EQ(outputs_ref.size(), outputs_fused.size()); ASSERT_EQ(outputs_ref.size(), size_t(1)); + std::vector val_opt; auto val_ref = get_output_values_to_float(not_fused, outputs_ref.begin()->second); - auto val_opt = get_output_values_to_float(fused, outputs_fused.begin()->second); + ASSERT_NO_THROW(val_opt = get_output_values_to_float(fused, outputs_fused.begin()->second)); ASSERT_EQ(val_ref.size(), val_opt.size()); for (size_t i = 0; i < val_ref.size(); i++) { ASSERT_NEAR(val_ref[i], val_opt[i], tolerance) From 324a2827a8dac055ab2eb10e6b220ad29231df0b Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Wed, 16 Oct 2024 23:36:29 +0100 Subject: [PATCH 041/112] NPUW: Regularized REP pipeline (#27089) ### Details: - Introduce a pattern-guided REG pipeline and make it default. If there's no known patterns, it falls back into REP. ### Tickets: - *E-142355* --- .../al/include/intel_npu/al/config/npuw.hpp | 2 +- .../al/include/npuw_private_properties.hpp | 4 +- .../npuw/partitioning/online/compiler.cpp | 56 ++++++++++++++++--- .../plugin/npuw/partitioning/online/group.cpp | 4 ++ .../plugin/npuw/partitioning/online/group.hpp | 1 + .../npuw/partitioning/online/snapshot.cpp | 28 ++++++++-- .../npuw/partitioning/online/snapshot.hpp | 5 +- 7 files changed, 82 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp index 65ec475df6b986..fef9470545482a 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp @@ -30,7 +30,7 @@ void registerNPUWOptions(OptionsDesc& desc); DEFINE_OPT(NPU_USE_NPUW, bool, false, use_npuw, CompileTime); DEFINE_OPT(NPUW_DEVICES, std::string, "NPU,CPU", npuw::devices, CompileTime); DEFINE_OPT(NPUW_SUBMODEL_DEVICE, std::string, "", npuw::submodel_device, CompileTime); -DEFINE_OPT(NPUW_ONLINE_PIPELINE, std::string, "REP", npuw::partitioning::online::pipeline, CompileTime); +DEFINE_OPT(NPUW_ONLINE_PIPELINE, std::string, "REG", npuw::partitioning::online::pipeline, CompileTime); DEFINE_OPT(NPUW_ONLINE_AVOID, std::string, "", npuw::partitioning::online::avoid, CompileTime); DEFINE_OPT(NPUW_ONLINE_ISOLATE, std::string, "", npuw::partitioning::online::isolate, CompileTime); DEFINE_OPT(NPUW_ONLINE_NO_FOLD, std::string, "", npuw::partitioning::online::nofold, CompileTime); diff --git a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp index 31fa52c3878598..059977ee47a063 100644 --- a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp @@ -67,8 +67,8 @@ namespace online { * @brief * Type: std::string. * Specify which partitioning pipeline to run. - * Possible values: "NONE", "INIT", "JUST", "REP", "COMPUTE". - * Default value: "REP". + * Possible values: "NONE", "INIT", "JUST", "REP", "REG", "COMPUTE". + * Default value: "REG". */ static constexpr ov::Property pipeline{"NPUW_ONLINE_PIPELINE"}; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 46b6cb7b12681d..a66159e6b4d1b7 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -40,7 +40,6 @@ std::vector getIsolates(const std::string& isolates_unparsed); std::vector getNoFolds(::intel_npu::Config& cfg); std::vector getNoFolds(const std::string& nofolds_unparsed); // Set default predefined values for COMPUTE pipeline -void setComputeConfig(PassContext& ctx); void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to); size_t getMinGraphSize(::intel_npu::Config& cfg) { @@ -204,12 +203,6 @@ std::vector getNoFolds(const std::string& nofolds_unparsed) { return nofolds; } -void setComputeConfig(PassContext& ctx) { - // FIXME: initialize via a dedicated function instead of parsing - ctx.isolates = detail::getIsolates(ISOL_PRESETS.at("COMPUTE")); - ctx.nofolds = detail::getNoFolds("compute"); -} - void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) { pugi::xml_document doc; @@ -277,10 +270,21 @@ class Compiler { NONE, // Partitioning will consist of a single group with all the Ops INIT, // Initialize only. The hardest mode, every group has just 1 layer inside JUST, // "justParitioning" - combination of LHF + Remnants - REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants - default configuration + REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants + REG, // Regularized repeated blocks pipeline -same as REP, but with some strong hints first COMPUTE // Separates non-foldable compute subgraphs from the model based on predefined rules + REP }; + template + void warn_unused() { + const auto& val = m_cfg.get(); + if (val != C::defaultValue()) { + LOG_WARN("User-specified configuration {" << C::key() << " : " << val + << "} is ignored in the current pipeline " + << m_cfg.get<::intel_npu::NPUW_ONLINE_PIPELINE>()); + } + } + Pipeline currentPipeline() { std::string pipeline_opt = m_cfg.getString<::intel_npu::NPUW_ONLINE_PIPELINE>(); if (pipeline_opt == "NONE") { @@ -291,6 +295,8 @@ class Compiler { return Pipeline::JUST; } else if (pipeline_opt == "REP") { return Pipeline::REP; + } else if (pipeline_opt == "REG") { + return Pipeline::REG; } else if (pipeline_opt == "COMPUTE") { return Pipeline::COMPUTE; } else { @@ -346,6 +352,23 @@ class Compiler { LOG_INFO("Done"); } + void reg() { + LOG_INFO("Online partitioning: compiling regularized repeated blocks pipeline..."); + LOG_BLOCK(); + + m_snapshot->earlyAvoids(); + m_snapshot->earlyRegroup(); + m_snapshot->repeatedBlocks([&]() { + // This callback is called when repeatingBlocks algorithm thinks it is done + m_snapshot->stripTag("compute"); + }); + m_snapshot->repeat([&] { + m_snapshot->fuseRemnantsExtended(); + }); + + LOG_INFO("Done"); + } + public: Compiler(const std::shared_ptr& model, ::intel_npu::Config& cfg) : m_model(model), @@ -384,9 +407,24 @@ class Compiler { case Pipeline::REP: rep(); break; + case Pipeline::REG: + warn_unused<::intel_npu::NPUW_ONLINE_ISOLATE>(); + + // Only get isolates here. + // NB: We ignore NO_FOLD everywhere except pipeline COMPUTE - this needs + // to be aligned in the future + ctx.isolates = detail::getIsolates(detail::ISOL_PRESETS.at("COMPUTE")); + m_snapshot->setCtx(ctx); + reg(); + break; case Pipeline::COMPUTE: + warn_unused<::intel_npu::NPUW_ONLINE_ISOLATE>(); + warn_unused<::intel_npu::NPUW_ONLINE_NO_FOLD>(); + // Manually set predefined isolates and nofolds then do rep() pipeline - detail::setComputeConfig(ctx); + // FIXME: initialize via a dedicated function instead of parsing + ctx.isolates = detail::getIsolates(detail::ISOL_PRESETS.at("COMPUTE")); + ctx.nofolds = detail::getNoFolds("compute"); m_snapshot->setCtx(ctx); rep(); break; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp index 991330663bbe48..cfa9e451ffb149 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp @@ -443,6 +443,10 @@ void Group::isolate(const std::string& tag) { m_isol_tag = tag; } +void Group::dontIsolate() { + m_isol_tag = ""; +} + const std::string& Group::isolatedTag() const { return m_isol_tag; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp index 69688248a0b9ac..538eeb03bc851c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp @@ -77,6 +77,7 @@ class Group : public std::enable_shared_from_this { // FIXME: unify avoid and isolate void avoid(const std::string& device); void isolate(const std::string& tag); + void dontIsolate(); const std::set& avoidedTargets() const; const std::string& isolatedTag() const; std::string specialTags() const; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 82856cece3de40..4cdc92ffc92d25 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -436,18 +436,27 @@ void Snapshot::earlyRegroup() { LOG_INFO("DONE."); } -void Snapshot::repeatedBlocks() { +void Snapshot::repeatedBlocks(Snapshot::CB&& on_done) { LOG_INFO("Online partitioning: executing repeatedBlocks pass group..."); LOG_BLOCK(); identifyUniques(); repeat([&] { repeat([&] { - mergeUniques(); + repeat([&] { + mergeUniques(); + }); + mergeTriangles(); + markInternalCompute(); + resetExcludedRep(); }); - mergeTriangles(); - markInternalCompute(); - resetExcludedRep(); + // While the current process is entirely done, let the caller + // influence the partitioning - so the algorithm could continue. + if (on_done) { + on_done(); + } else { + return; // FROM top-level repeat! + } }); cleanUpUniques(); @@ -1086,3 +1095,12 @@ void Snapshot::repeat(detail::Pass&& pass) { void Snapshot::setCtx(const ov::npuw::online::PassContext& ctx) { m_ctx = ctx; } + +void Snapshot::stripTag(const std::string& tag) { + for (auto&& nh : m_graph->nodes()) { + auto gptr = m_graph->meta(nh).get(); + if (gptr->isolatedTag() == tag) { + gptr->dontIsolate(); + } + } +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp index e7e5121b1240e7..6da1a6d98939bb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp @@ -46,10 +46,13 @@ class Snapshot : public std::enable_shared_from_this { void fuseInputs(); // Advanced passes for repeated blocks algorithm - void repeatedBlocks(); + using CB = std::function; + void repeatedBlocks(CB&& on_done = {}); void earlyAvoids(); void earlyRegroup(); + void stripTag(const std::string& tag); + // Utility std::shared_ptr getGraph() const; const detail::OVPortsMap& getPortsMap() const; From 262f9202a00cede8795582383773fff149d5deb2 Mon Sep 17 00:00:00 2001 From: James Bartlett Date: Wed, 16 Oct 2024 23:57:24 -0700 Subject: [PATCH 042/112] [GPU] Fix bug in GatherND reference GPU kernel (#27041) There is a bug in the GPU plugin's GatherND kernel where the output indices are incorrect for 5D index inputs when the 4th dimension of the index input is not of size 1 and `batch_dims == 0`. ### Details: - When `batch_dims <= 1` the current implementation of GatherND in the GPU plugin does not shift the output indices as expected. For example, `out_x` is set to `idx_x` (the x index for the second input). However, this is not correct since the last dimension of the second input to GatherND is the indices to use and does not contribute to the shape of the output tensor (beyond deciding how many of the original input dimensions to keep). The current logic happens to work in most cases, but is not generally correct. - This PR adds a test case that demonstrates the bug (it fails on main), and the changes in this PR cause the test case to pass. - The fix in this PR is to just use the same logic as the case where `batch_dims > 1` and not `BATCH_MERGED_OUTPUT`. ### Tickets: Signed-off-by: James Bartlett --- .../cl_kernels/gather_nd_ref.cl | 83 +++++++++---------- .../unit/test_cases/gather_nd_gpu_test.cpp | 45 ++++++++++ 2 files changed, 82 insertions(+), 46 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_nd_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_nd_ref.cl index 663d2248ff4d19..bc6bc0ee55a611 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_nd_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_nd_ref.cl @@ -135,55 +135,46 @@ KERNEL(gather_nd_ref)( const uint data_idx = GET_UPDATES_INDEX(INPUT0, IN_ORDER); // Calculate output index - #if BATCH_DIMS <= 1 - const uint out_x = idx_x; - const uint out_y = idx_y; - const uint out_z = idx_z; - const uint out_w = idx_w; - const uint out_f = idx_f; - const uint out_b = idx_b; - #else - #if BATCH_MERGED_OUTPUT - uint pitch_acc = 1; - uint output_batch_size = 0; - for (int i = BATCH_DIMS - 1; i >= 0; i--) { - output_batch_size += (idx_arr[i] * pitch_acc); - pitch_acc *= idx_dim[i]; - } + #if BATCH_MERGED_OUTPUT && BATCH_DIMS > 1 + uint pitch_acc = 1; + uint output_batch_size = 0; + for (int i = BATCH_DIMS - 1; i >= 0; i--) { + output_batch_size += (idx_arr[i] * pitch_acc); + pitch_acc *= idx_dim[i]; + } - #if OUTPUT_DIMS == 4 - const uint out_x = idx_arr[BATCH_DIMS+2]; - const uint out_y = idx_arr[BATCH_DIMS+1]; - #elif OUTPUT_DIMS == 5 - const uint out_x = idx_arr[BATCH_DIMS+3]; - const uint out_y = idx_arr[BATCH_DIMS+2]; - const uint out_z = idx_arr[BATCH_DIMS+1]; - #else - const uint out_x = idx_arr[BATCH_DIMS+4]; - const uint out_y = idx_arr[BATCH_DIMS+3]; - const uint out_z = idx_arr[BATCH_DIMS+2]; - const uint out_w = idx_arr[BATCH_DIMS+1]; - #endif - const uint out_f = idx_arr[BATCH_DIMS+0]; - const uint out_b = output_batch_size; + #if OUTPUT_DIMS == 4 + const uint out_x = idx_arr[BATCH_DIMS+2]; + const uint out_y = idx_arr[BATCH_DIMS+1]; + #elif OUTPUT_DIMS == 5 + const uint out_x = idx_arr[BATCH_DIMS+3]; + const uint out_y = idx_arr[BATCH_DIMS+2]; + const uint out_z = idx_arr[BATCH_DIMS+1]; #else - #if OUTPUT_DIMS == 4 - const uint out_x = idx_arr[3]; - const uint out_y = idx_arr[2]; - #elif OUTPUT_DIMS == 5 - const uint out_x = idx_arr[4]; - const uint out_y = idx_arr[3]; - const uint out_z = idx_arr[2]; - #else - const uint out_x = idx_arr[5]; - const uint out_y = idx_arr[4]; - const uint out_z = idx_arr[3]; - const uint out_w = idx_arr[2]; - #endif - const uint out_f = idx_arr[1]; - const uint out_b = idx_arr[0]; - + const uint out_x = idx_arr[BATCH_DIMS+4]; + const uint out_y = idx_arr[BATCH_DIMS+3]; + const uint out_z = idx_arr[BATCH_DIMS+2]; + const uint out_w = idx_arr[BATCH_DIMS+1]; #endif + const uint out_f = idx_arr[BATCH_DIMS+0]; + const uint out_b = output_batch_size; + #else + #if OUTPUT_DIMS == 4 + const uint out_x = idx_arr[3]; + const uint out_y = idx_arr[2]; + #elif OUTPUT_DIMS == 5 + const uint out_x = idx_arr[4]; + const uint out_y = idx_arr[3]; + const uint out_z = idx_arr[2]; + #else + const uint out_x = idx_arr[5]; + const uint out_y = idx_arr[4]; + const uint out_z = idx_arr[3]; + const uint out_w = idx_arr[2]; + #endif + const uint out_f = idx_arr[1]; + const uint out_b = idx_arr[0]; + #endif const uint output_idx = GET_OUTPUT_INDEX(OUT_ORDER); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gather_nd_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gather_nd_gpu_test.cpp index 3f2919fd8011ec..7229f902c3e3a7 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gather_nd_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gather_nd_gpu_test.cpp @@ -752,6 +752,51 @@ TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) { DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 3, 1, 1, 1 }); } +TEST(gather_nd_gpu_fp16, d1333_i11164_ir5_batch0) { + auto& engine = get_test_engine(); + + const int indices_rank = 5; + const int batch_dims = 0; + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 3, 3, 3 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 1, 4, 6, 1 } }); // indices + // expected output dim: {1,1,1,6} + + set_values(input0, { + ov::float16(0), ov::float16(1), ov::float16(2), + ov::float16(3), ov::float16(4), ov::float16(5), + ov::float16(6), ov::float16(7), ov::float16(8), + + ov::float16(10), ov::float16(11), ov::float16(12), + ov::float16(13), ov::float16(14), ov::float16(15), + ov::float16(16), ov::float16(17), ov::float16(18), + + ov::float16(20), ov::float16(21), ov::float16(22), + ov::float16(23), ov::float16(24), ov::float16(25), + ov::float16(26), ov::float16(27), ov::float16(28), + }); + + set_values(input1, { + ov::float16(0), ov::float16(0), ov::float16(0), ov::float16(0), + ov::float16(0), ov::float16(0), ov::float16(0), ov::float16(1), + ov::float16(0), ov::float16(0), ov::float16(0), ov::float16(2), + ov::float16(0), ov::float16(0), ov::float16(1), ov::float16(0), + ov::float16(0), ov::float16(0), ov::float16(1), ov::float16(1), + ov::float16(0), ov::float16(0), ov::float16(1), ov::float16(2), + }); + + std::vector expected_results = { + ov::float16(0), + ov::float16(1), + ov::float16(2), + ov::float16(3), + ov::float16(4), + ov::float16(5), + }; + + DoTestV5(engine,input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 1, 1, 6, 1 }); + DoTestV8(engine, input0, input1, expected_results, indices_rank, batch_dims, format::bfyx, { 1, 1, 6, 1 }); +} + TEST(gather_nd_gpu_fp16, export_import) { auto& engine = get_test_engine(); From c092fb8dc4cd0b5ffa03d699b6664b82ba740093 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Thu, 17 Oct 2024 14:58:23 +0800 Subject: [PATCH 043/112] [CPU] Improve instruction generating algorithm for pow(a, large_N) (#27076) ### Details: - *Improve instruction generating algorithm for pow(a, N), where N is very large. Besides, fix overflow during converting from float to int32.* ### Tickets: - *[154601](https://jira.devtools.intel.com/browse/CVS-154601)* --- .../src/emitters/plugin/x64/jit_eltwise_emitters.cpp | 10 +++++++--- .../convert_to_plugin_specific_node.cpp | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp index 1c90af2d48e85a..fb74c196f6a289 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp @@ -1576,10 +1576,14 @@ void jit_power_static_emitter::emit_isa(const std::vector &in_vec_idxs, } } } else if (std::floor(power) == power && power != 0) { - int ipower = std::abs(static_cast(power)); + int64_t ipower = std::abs(static_cast(power)) - 1; h->uni_vmovups(vmm_aux0, vmm_dst); - for (int i = 1; i < ipower; i++) { - h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux0); + while (ipower > 0) { + if (ipower & 0x1) + h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux0); + if (ipower > 1) + h->uni_vmulps(vmm_aux0, vmm_aux0, vmm_aux0); + ipower = ipower >> 1; } if (power < 0.f) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/convert_to_plugin_specific_node.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/convert_to_plugin_specific_node.cpp index 9e75e302ad58f8..2b1bb87fff888a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/convert_to_plugin_specific_node.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/convert_to_plugin_specific_node.cpp @@ -4,6 +4,7 @@ #include "common_test_utils/node_builders/eltwise.hpp" #include "common_test_utils/node_builders/constant.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "utils/cpu_test_utils.hpp" @@ -55,7 +56,7 @@ class ConvertToPluginSpecificNode : public testing::WithParamInterface(prc, ov::Shape(nonConstShape)); - const auto constNode = ov::test::utils::make_constant(prc, constShape); + const auto constNode = ov::test::utils::make_constant(prc, constShape, utils::InputGenerateData(1, 9e8, 1, 1)); OutputVector inputs(2); inputs[port] = constNode; inputs[1 - port] = param; From b3c3d3b08b651a0c1a47f8970b16ae77930d32a1 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Thu, 17 Oct 2024 15:04:56 +0800 Subject: [PATCH 044/112] Throws ov::Cancelled to uplevel instead of ov::Exception (#26962) ### Details: - *fix random capi test failure (ov_infer_request/ov_infer_request_test.cancel/0)* - *CAPI `ov_infer_request_test.cancel` expects a ov::Cancelled exception, but `Graph::ExecuteNodeWithCatch` throws ov::Exception* ### Tickets: - *151441* --- src/plugins/intel_cpu/src/graph.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index dc0f953efe70ab..fa3502468a9cf3 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -44,6 +44,7 @@ #include #include "common/primitive_desc_iface.hpp" +#include "openvino/runtime/exception.hpp" #include "openvino/runtime/threading/cpu_streams_executor.hpp" #include "openvino/core/parallel.hpp" @@ -1330,6 +1331,8 @@ inline void Graph::ExecuteNodeWithCatch(const NodePtr& node, SyncInferRequest* r try { ExecuteNode(node, request, numaId); + } catch (const ov::Cancelled&) { + throw; } catch (const std::exception& exp) { OPENVINO_THROW(*node, exp.what()); } From e4ce1bba8507966e5e17ea13b47718dd670a0d80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 07:08:36 +0000 Subject: [PATCH 045/112] Bump actions/cache from 4.0.2 to 4.1.1 (#26964) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/cache](https://github.com/actions/cache) from 4.0.2 to 4.1.1.

Release notes

Sourced from actions/cache's releases.

v4.1.1

What's Changed

Full Changelog: https://github.com/actions/cache/compare/v4.1.0...v4.1.1

v4.1.0

What's Changed

New Contributors

Full Changelog: https://github.com/actions/cache/compare/v4.0.2...v4.1.0

Changelog

Sourced from actions/cache's changelog.

Releases

4.1.1

  • Restore original behavior of cache-hit output - #1467

4.1.0

  • Ensure cache-hit output is set when a cache is missed - #1404
  • Deprecate save-always input - #1452

4.0.2

  • Fixed restore fail-on-cache-miss not working.

4.0.1

  • Updated isGhes check

4.0.0

  • Updated minimum runner version support from node 12 -> node 20

3.3.3

  • Updates @​actions/cache to v3.2.3 to fix accidental mutated path arguments to getCacheVersion actions/toolkit#1378
  • Additional audit fixes of npm package(s)

3.3.2

  • Fixes bug with Azure SDK causing blob downloads to get stuck.

3.3.1

  • Reduced segment size to 128MB and segment timeout to 10 minutes to fail fast in case the cache download is stuck.

3.3.0

  • Added option to lookup cache without downloading it.

3.2.6

  • Fix zstd not being used after zstd version upgrade to 1.5.4 on hosted runners.

3.2.5

  • Added fix to prevent from setting MYSYS environment variable globally.

3.2.4

  • Added option to fail job on cache miss.

... (truncated)

Commits
  • 3624ceb Restore original behavior of cache-hit output (#1467)
  • 2cdf405 Prepare 4.1.0 release (#1464)
  • a11fb02 restore action's README now references v4 instead of v3 (#1445)
  • cf7a75e Fix typo: depening -> depending (#1462)
  • c74ca40 Deprecate save-always input (#1452)
  • f8a7ab4 Merge pull request #1463 from actions/Jcambass-patch-1
  • 45b7be0 Add workflow file for publishing releases to immutable action package
  • 81382a7 Merge pull request #1311 from todgru/todgru/v4-documentation-update
  • c4ee99a Merge branch 'main' into todgru/v4-documentation-update
  • 57b8e40 Clarify that the restore-keys input is a string in the docs (#1434)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/cache&package-manager=github_actions&previous-version=4.0.2&new-version=4.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build_doc.yml | 2 +- .github/workflows/job_cpu_functional_tests.yml | 4 ++-- .github/workflows/mo.yml | 2 +- .github/workflows/ovc.yml | 2 +- .github/workflows/windows_conditional_compilation.yml | 2 +- .github/workflows/windows_vs2019_release.yml | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index c3c7fc296ade61..53f3eba9a749bf 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -63,7 +63,7 @@ jobs: - name: Cache documentation id: cache_sphinx_docs - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: build/docs/_build/.doctrees key: sphinx-docs-cache diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 24c8542ae80140..6848871df6e81e 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -89,7 +89,7 @@ jobs: run: python3 -m pip install -r ${INSTALL_TEST_DIR}/functional_test_utils/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -109,7 +109,7 @@ jobs: timeout-minutes: 25 - name: Save tests execution time - uses: actions/cache/save@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index 75ce4adae9496a..151227f111c9e0 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -32,7 +32,7 @@ jobs: python-version: '3.10' - name: Cache pip - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('tools/mo/requirements*.txt') }} diff --git a/.github/workflows/ovc.yml b/.github/workflows/ovc.yml index 2e6986a96e3f29..ee5f3e58e363e6 100644 --- a/.github/workflows/ovc.yml +++ b/.github/workflows/ovc.yml @@ -27,7 +27,7 @@ jobs: python-version: '3.10' - name: Cache pip - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('src/bindings/python/requirements*.txt') }} diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index ebe91212142f00..9c026f01e47233 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -387,7 +387,7 @@ jobs: run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index a0871712285bf6..8cac2b88078d15 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -481,7 +481,7 @@ jobs: run: python3 -m pip install -r ${{ github.workspace }}\install\tests\functional_test_utils\layer_tests_summary\requirements.txt - name: Restore tests execution time - uses: actions/cache/restore@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache/restore@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 with: path: ${{ env.PARALLEL_TEST_CACHE }} key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} @@ -495,7 +495,7 @@ jobs: timeout-minutes: 60 - name: Save tests execution time - uses: actions/cache/save@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache/save@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 if: github.ref_name == 'master' with: path: ${{ env.PARALLEL_TEST_CACHE }} From 6198275ac80104adf33abffdf4e3eae313465f7a Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Thu, 17 Oct 2024 11:39:08 +0400 Subject: [PATCH 046/112] [GPU] Added tests for LoRA with empty adapters and handling of incorrect fusings (#27093) ### Tickets: - *[152852](https://jira.devtools.intel.com/browse/CVS-152852)* --- .../prepare_primitive_fusing.cpp | 22 +++++++++++++------ .../intel_gpu/src/graph/input_layout.cpp | 6 ++++- .../intel_gpu/src/graph/primitive_inst.cpp | 14 +++++++++++- .../subgraph_tests/lora_pattern.cpp | 21 ++++++++++++++++++ 4 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 5e8380f35dcb93..c38fa70e86ccef 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1048,17 +1048,25 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { std::swap(fused_idx, peer_idx); } + auto fused_node = parents[fused_idx].first; + auto peer_node = parents[peer_idx].first; + // Avoid fusing with GEMM from the LoRA pattern, that can be optimized in case of empty adapters - if (parents[fused_idx].first->is_type()) { - if (parents[peer_idx].first->is_type() || - (parents[peer_idx].first->is_type() && - parents[peer_idx].first->get_dependency(0).is_type())) { - std::swap(fused_idx, peer_idx); + if (fused_node->is_type()) { + bool is_fc_lora = peer_node->is_type() || + (peer_node->is_type() && + peer_node->get_dependency(0).is_type()); + + bool is_conv_lora = peer_node->is_type(); + + bool is_gemm_lora = peer_node->is_type() && + fused_node->get_input_pshape().rbegin()->is_dynamic(); + + if (is_fc_lora || is_conv_lora || is_gemm_lora) { + std::swap(peer_node, fused_node); } } - auto fused_node = parents[fused_idx].first; - auto peer_node = parents[peer_idx].first; if (lo.get_optimization_attributes().use_onednn_impls && lo.is_primitive_implemented_for_onednn(*fused_node)) { auto eltw_in_size = peer_node->get_output_layout(); if (eltw_in_size.is_dynamic() diff --git a/src/plugins/intel_gpu/src/graph/input_layout.cpp b/src/plugins/intel_gpu/src/graph/input_layout.cpp index 69cf2e7f834d2d..042744517a7c3e 100644 --- a/src/plugins/intel_gpu/src/graph/input_layout.cpp +++ b/src/plugins/intel_gpu/src/graph/input_layout.cpp @@ -37,7 +37,11 @@ input_layout_inst::typed_primitive_inst(network& network, input_layout_node cons event::ptr input_layout_inst::set_data(memory::ptr mem) { auto ol = get_node_output_layout(); - check_memory_to_set(*mem, ol); + bool empty_mem = mem->size() == 0 && (ol.is_dynamic() || ol.count() == 0); + if (!empty_mem) { + check_memory_to_set(*mem, ol); + } + event::ptr ev = nullptr; auto& engine = get_network().get_engine(); auto& stream = get_network().get_stream(); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 095dc5fd45fa52..f90d4e34b08cc2 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1553,8 +1553,13 @@ event::ptr primitive_inst::execute(const std::vector& events) { auto allocated_mem = d.first->output_memory_ptr(); auto actual_input_layout = d.first->get_output_layout(); auto& engine = _network.get_engine(); + cldnn::memory_ptr actual_mem = nullptr; // Need to use actual layout, not the fake aligned memory layout - auto actual_mem = engine.reinterpret_buffer(*allocated_mem, actual_input_layout); + if (actual_input_layout.count() != 0) { + actual_mem = engine.reinterpret_buffer(*allocated_mem, actual_input_layout); + } else { + actual_mem = engine.allocate_memory(actual_input_layout); + } subgraph->set_input_data(d.first->id(), std::move(actual_mem)); } } @@ -2324,6 +2329,13 @@ bool primitive_inst::is_valid_fusion() const { if (fused_eltwise_prims.empty()) return true; + if (_node->is_type() || _node->is_type() || _node->is_type()) { + if (_impl_params->input_layouts[0].count() == 0 || + _impl_params->input_layouts[1].count() == 0) { + return false; + } + } + if (_node->is_type() && _node->get_preferred_impl_type() == impl_types::ocl) { // TODO: Only fc_bf_tiled_kernel & ref kernel are verified for fused eltwise. To support more fc kernels for eltwise fusion if (!_node->get_selected_impl()) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp new file mode 100644 index 00000000000000..7bb6fbd610df29 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/subgraph_tests/lora_pattern.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/lora_pattern.hpp" + +using namespace ov::test; + +namespace { + +INSTANTIATE_TEST_SUITE_P(smoke, + LoraPatternConvolution, + ::testing::Values(ov::test::utils::DEVICE_GPU), + LoraPatternBase::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke, + LoraPatternMatmul, + ::testing::Values(ov::test::utils::DEVICE_GPU), + LoraPatternBase::getTestCaseName); + +} // namespace From 675dc6e6cb30760c31a162f4f2f2ac5e99afa183 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 17 Oct 2024 15:43:10 +0800 Subject: [PATCH 047/112] [CPU]support glm4 rope (#27094) ### Details: - *Support Rope kernel of GLM4* - *the input data order has changed from (**[seq_length, batch, 4608]**) in **ChatGLM3** to (**[batch, seq_length, 4608]**) in **ChatGLM4**. Within RoPE process, the data order changes from (**[seq_length, batch, head_count, head_size]**) to (**[batch, head_count, seq_length, head_size]**) by permute operation added in **ChatGLM4**.* - *the RoPE cache data order has changed from (**[seq_length, batch, head_count, 2]**) in ChatGLM3 to (**[batch, head_count, seq_length, 2]**) in **ChatGLM4**.* - *Consequently, the output of RoPE has also changed from (**[seq_length, batch, head_count, head_size]**) in **ChatGLM3** to (**[batch, head_count, seq_length, head_size]**) in **ChatGLM4*** - *Due to these changes, the RoPE pattern matching needs to create something new, something different from what already existed ChatGLM pattern matching. Additionally, new kernels need to be added to accommodate these changes* ### Tickets: - *ticket-id* --- src/plugins/intel_cpu/src/nodes/rope.cpp | 83 +++++++++++++------ .../transformation_pipeline.cpp | 4 +- .../subgraph_tests/rotary_pos_emb.cpp | 6 ++ 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/rope.cpp b/src/plugins/intel_cpu/src/nodes/rope.cpp index ac95b0f31213de..f089b67a122beb 100644 --- a/src/plugins/intel_cpu/src/nodes/rope.cpp +++ b/src/plugins/intel_cpu/src/nodes/rope.cpp @@ -244,34 +244,67 @@ struct RoPE::RoPEExecutorChatGLM : public RoPE::Executor { if (m_config.slice_stop - m_config.slice_start > 0) { t_src = t_src.slice(2, m_config.slice_start, m_config.slice_stop); } - auto seq_len = t_src.size(0); - auto batch_size = t_src.size(1); - - auto head_cnt = m_config.head_cnt; - auto head_size = m_config.head_size; - - auto rotary_dims = m_config.rotary_ndims; - - parallel_for3d(seq_len, batch_size, head_cnt, [&](size_t p, size_t b, size_t h) { - auto* src = t_src.ptr(p, b, h * head_size); - // [length, batch_size, ndims//2, 2] - auto* cos_sin = &t_cos_sin.at({p, b, 0, 0}, true); - auto* dst = t_dst.ptr(p, b, h, 0); + if (m_config.support_2d_rope) { + // src [batch, length, H x S] + auto seq_len = t_src.size(1); + auto batch_size = t_src.size(0); + + auto head_cnt = m_config.head_cnt; + auto head_size = m_config.head_size; + + auto rotary_dims = m_config.rotary_ndims; + + parallel_for3d(batch_size, head_cnt, seq_len, [&](size_t b, size_t h, size_t p) { + // src [batch, length, H x S] + auto* src = t_src.ptr(b, p, h * head_size); + // [batch_size, length, ndims//2, 2] + auto* cos_sin = &t_cos_sin.at({b, p, 0, 0}, true); + auto* dst = t_dst.ptr(b, h, p, 0); + + if (m_rotaryKernel) { + execJitKernel(m_rotaryKernel, src, dst, cos_sin, nullptr); + } else { + size_t i = 0; + for (; i < rotary_dims; i += 2) { + auto cosv = cos_sin[i]; + auto sinv = cos_sin[i + 1]; + dst[i] = cosv * src[i] - sinv * src[i + 1]; + dst[i + 1] = sinv * src[i] + cosv * src[i + 1]; + } + } - if (m_rotaryKernel) { - execJitKernel(m_rotaryKernel, src, dst, cos_sin, nullptr); - } else { - size_t i = 0; - for (; i < rotary_dims; i += 2) { - auto cosv = cos_sin[i]; - auto sinv = cos_sin[i + 1]; - dst[i] = cosv * src[i] - sinv * src[i + 1]; - dst[i + 1] = sinv * src[i] + cosv * src[i + 1]; + memcpy(dst + rotary_dims, src + rotary_dims, (head_size - rotary_dims) * sizeof(T)); + }); + } else { + auto seq_len = t_src.size(0); + auto batch_size = t_src.size(1); + + auto head_cnt = m_config.head_cnt; + auto head_size = m_config.head_size; + + auto rotary_dims = m_config.rotary_ndims; + + parallel_for3d(seq_len, batch_size, head_cnt, [&](size_t p, size_t b, size_t h) { + auto* src = t_src.ptr(p, b, h * head_size); + // [length, batch_size, ndims//2, 2] + auto* cos_sin = &t_cos_sin.at({p, b, 0, 0}, true); + auto* dst = t_dst.ptr(p, b, h, 0); + + if (m_rotaryKernel) { + execJitKernel(m_rotaryKernel, src, dst, cos_sin, nullptr); + } else { + size_t i = 0; + for (; i < rotary_dims; i += 2) { + auto cosv = cos_sin[i]; + auto sinv = cos_sin[i + 1]; + dst[i] = cosv * src[i] - sinv * src[i + 1]; + dst[i + 1] = sinv * src[i] + cosv * src[i + 1]; + } } - } - memcpy(dst + rotary_dims, src + rotary_dims, (head_size - rotary_dims) * sizeof(T)); - }); + memcpy(dst + rotary_dims, src + rotary_dims, (head_size - rotary_dims) * sizeof(T)); + }); + } } }; diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 0e683482a97934..04808baaebec54 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -835,8 +835,8 @@ void Transformations::PostLpt() { // Execute before snippets. Otherwise FQ will be converted to Subgraph CPU_REGISTER_PASS_X64(postLPTPassManager, ConvertFqRnnToQuantizedRnn); - CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::RoPEFusion); - CPU_REGISTER_PASS_ARM64(postLPTPassManager, ov::pass::RoPEFusion); + CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::RoPEFusion, true); + CPU_REGISTER_PASS_ARM64(postLPTPassManager, ov::pass::RoPEFusion, true); CPU_REGISTER_PASS_X64(postLPTPassManager, CausalMaskPreprocessFusion); // MLP & QKV fusion optimizations is focused on throughput, only enabled on AMX-bf16 & LLM serving use cases. diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp index 7fd916e4300768..8cd8707e047878 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/subgraph_tests/rotary_pos_emb.cpp @@ -50,5 +50,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_RoPETestGPTJSlice, ::testing::Combine(::testing::Values(true, false), ::testing::Values(ov::test::utils::DEVICE_CPU)), RoPETestGPTJSlice::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_RoPETestChatGLM, + RoPETestChatGLM2DRoPEStridedSlice, + ::testing::Values(ov::test::utils::DEVICE_CPU), + RoPETestChatGLM2DRoPEStridedSlice::getTestCaseName); + } // namespace test } // namespace ov From 487b3910f6d611fc84c5cd78a96afb6c0f9bf914 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Thu, 17 Oct 2024 12:36:41 +0200 Subject: [PATCH 048/112] [PyOV] Extend Python API with SliceScatter-15 (#27090) ### Details: - *Extend Python API with SliceScatter-15* - *...* ### Tickets: - *CVS-155151* --------- Co-authored-by: Michal Lukaszewski --- .../src/openvino/runtime/opset15/__init__.py | 1 + .../src/openvino/runtime/opset15/ops.py | 29 +++++++++++++++++++ .../python/tests/test_graph/test_create_op.py | 28 ++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 1349508e84b381..96643a7e93d596 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -15,3 +15,4 @@ from openvino.runtime.opset15.ops import string_tensor_unpack from openvino.runtime.opset15.ops import bitwise_left_shift from openvino.runtime.opset15.ops import bitwise_right_shift +from openvino.runtime.opset15.ops import slice_scatter diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index 777fc165443f7f..116f63726bfeb6 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -274,3 +274,32 @@ def bitwise_right_shift( "auto_broadcast": auto_broadcast.upper(), }, ) + + +@nameable_op +def slice_scatter( + data: NodeInput, + updates: NodeInput, + start: NodeInput, + stop: NodeInput, + step: NodeInput, + axes: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Return a node which generates SliceScatter operation. + + :param data: The node providing input data. + :param updates: The node providing updates data. + :param start: The node providing start indices (inclusively). + :param stop: The node providing stop indices (exclusively). + :param step: The node providing step values. + :param axes: The optional node providing axes to slice, default [0, 1, ..., len(start)-1]. + :param name: The optional name for the created output node. + :return: The new node performing SliceScatter operation. + """ + if axes is None: + inputs = as_nodes(data, updates, start, stop, step, name=name) + else: + inputs = as_nodes(data, updates, start, stop, step, axes, name=name) + + return _get_node_factory_opset15().create("SliceScatter", inputs) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index dcdb8592390ad4..c5023588f5d55b 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -2458,6 +2458,34 @@ def test_topk_opset11(op_name): assert list(node.get_output_shape(1)) == [1, 3, 3] +def test_slice_scatter(): + data_shape = [10, 7, 2, 13] + data = ov.parameter(data_shape, name="input", dtype=np.float32) + updates = ov.parameter([4, 7, 2, 13], name="updates", dtype=np.float32) + start = ov.constant(np.array([2, 0, 0], dtype=np.int32)) + stop = ov.constant(np.array([9, 7, 2], dtype=np.int32)) + step = ov.constant(np.array([2, 1, 1], dtype=np.int32)) + + node_default_axes = ov_opset15.slice_scatter(data, updates, start, stop, step) + + assert node_default_axes.get_type_name() == "SliceScatter" + assert node_default_axes.get_output_size() == 1 + assert node_default_axes.get_output_element_type(0) == Type.f32 + assert node_default_axes.get_output_shape(0) == data_shape + + start = ov.constant(np.array([0, 2], dtype=np.int32)) + stop = ov.constant(np.array([2, 9], dtype=np.int32)) + step = ov.constant(np.array([1, 2], dtype=np.int32)) + axes = ov.constant(np.array([-2, 0], dtype=np.int32)) + + node = ov_opset15.slice_scatter(data, updates, start, stop, step, axes) + + assert node.get_type_name() == "SliceScatter" + assert node.get_output_size() == 1 + assert node.get_output_element_type(0) == Type.f32 + assert node_default_axes.get_output_shape(0) == data_shape + + def test_parameter_get_attributes(): parameter = ov.parameter([2, 2], dtype=np.float32, name="InputData") parameter_attributes = parameter.get_attributes() From a356945d15013eba28221d2ad5b1b17d52b570a0 Mon Sep 17 00:00:00 2001 From: pravin25 Date: Thu, 17 Oct 2024 07:02:38 -0400 Subject: [PATCH 049/112] Details: (#27008) ### Details: GPU tensorflow_tests/test_tf_Unique.py test was failing, Added in slice_ref_kernel following output datatype which was missing. k. EnableOutputDataType(Datatype::UINT8); And updated tensorflow_tests/test_tf_Unique.py, removed skip for GPU. ### Tickets: - https://jira.devtools.intel.com/browse/CVS-105900?focusedId=25293445#comment-25293445 --- .../src/kernel_selector/kernels/slice/slice_kernel_ref.cpp | 1 + tests/layer_tests/tensorflow_tests/test_tf_Unique.py | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp index 34279dd7de148c..4aff7736ff85fe 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/slice/slice_kernel_ref.cpp @@ -90,6 +90,7 @@ ParamsKey SliceKernelRef::GetSupportedKey() const { k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT32); k.EnableOutputDataType(Datatype::INT64); + k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); k.EnableOutputLayout(DataLayout::bfyx); diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Unique.py b/tests/layer_tests/tensorflow_tests/test_tf_Unique.py index 6e18c900328aa3..2a3082abebbc6f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_Unique.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_Unique.py @@ -42,8 +42,6 @@ def test_unique_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if use_legacy_frontend: pytest.skip("Unique operation is not supported via legacy frontend.") - if ie_device == 'GPU': - pytest.skip("GPU error: Could not find a suitable kernel for slice") self._test(*self.create_unique_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) @@ -59,8 +57,6 @@ def test_unique_other_types(self, params, ie_device, precision, ir_version, temp use_legacy_frontend): if use_legacy_frontend: pytest.skip("Unique operation is not supported via legacy frontend.") - if ie_device == 'GPU': - pytest.skip("GPU error: Could not find a suitable kernel for slice") self._test(*self.create_unique_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) From 6644fc81f5942cdab2e8d863cd6b721620980faa Mon Sep 17 00:00:00 2001 From: Piotr Kowalczyk Date: Thu, 17 Oct 2024 13:09:35 +0200 Subject: [PATCH 050/112] [Spec]: Added initial specification of Searchsorted op (#26887) ### Details: - Based on https://pytorch.org/docs/stable/generated/torch.searchsorted.html ### Tickets: - *CVS-154060* --------- Co-authored-by: Roman Kazantsev Co-authored-by: Przemyslaw Wysocki Co-authored-by: Katarzyna Mitrus --- .../operation-sets/operation-specs.rst | 1 + .../operation-specs/sort/search-sorted-15.rst | 73 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst index 2d03cf7cdce069..7ac47116595621 100644 --- a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs.rst @@ -197,6 +197,7 @@ Operation Specifications ScatterElementsUpdate-12 ScatterNDUpdate-3 ScatterUpdate-3 + SearchSorted-15 Select-1 Selu-1 ShapeOf-1 diff --git a/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst new file mode 100644 index 00000000000000..81c592d3341a35 --- /dev/null +++ b/docs/articles_en/documentation/openvino-ir-format/operation-sets/operation-specs/sort/search-sorted-15.rst @@ -0,0 +1,73 @@ +SearchSorted +=============== + + +.. meta:: + :description: Learn about SearchSorted - a sorting and maximization + operation, which requires two input tensors. + + +**Versioned name**: *SearchSorted-15* + +**Category**: *Sorting and maximization* + +**Short description**: Determines the indices in the innermost dimension of a sorted sequence where elements should be inserted to maintain order. + +**Detailed description**: *SearchSorted* operation determines the indices in the innermost dimension of a sorted sequence where elements should be inserted to maintain order. The operation is based on the binary search algorithm. The operation is performed on two input tensors: the first tensor contains a monotonically increasing sequence on the innermost dimension, and the second tensor contains the search values. The operation returns a tensor with the same shape as the second input tensor, containing the indices. + +**Attributes** + +* *right* + + * **Description**: If False, set the first suitable index. If True, return the last suitable index for given value. Default is False. + * **Range of values**: true or false + * **Type**: boolean + * **Default value**: false + * **Required**: *no* + +**Inputs**: + +* **1**: ``sorted`` - ND input tensor of type *T* - cannot be a scalar, containing monotonically increasing sequence on the innermost dimension. **Required.** + +* **2**: ``values`` - ND input tensor of type *T*, containing the search values. If sorted sequence is 1D, then the values can have any shape, otherwise the rank should be equal to the rank of sorted input. **Required.** + +**Outputs**: + +* **1**: Tensor of type *TOut*, with the same shape as second input tensor, containing the indices. + +**Types** + +* *T*: any supported floating-point and integer type. + +* *TOut*: int64. + +**Example** + +.. code-block:: xml + :force: + + + + + + 7 + 256 + 200 + 200 + + + 7 + 256 + 200 + 10 + + + + + 7 + 256 + 200 + 10 + + + From 6dbca1f07afb290660efcd0fcb4552c4a4fe3eb3 Mon Sep 17 00:00:00 2001 From: Ekaterina Shiryaeva Date: Thu, 17 Oct 2024 13:43:08 +0200 Subject: [PATCH 051/112] NPUW: Disable AVX2 code with ENABLE_AVX2=OFF (#26890) ### Details: - Disable AVX2 code with ENABLE_AVX2=OFF ### Tickets: - E-141645 --- .../intel_npu/src/plugin/CMakeLists.txt | 11 +- .../intel_npu/src/plugin/npuw/util.cpp | 1402 +--------------- .../intel_npu/src/plugin/npuw/util_xarch.cpp | 1429 +++++++++++++++++ .../intel_npu/src/plugin/npuw/util_xarch.hpp | 88 + 4 files changed, 1548 insertions(+), 1382 deletions(-) create mode 100644 src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp create mode 100644 src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp diff --git a/src/plugins/intel_npu/src/plugin/CMakeLists.txt b/src/plugins/intel_npu/src/plugin/CMakeLists.txt index 4b91e6d594cc20..749819b457c82c 100644 --- a/src/plugins/intel_npu/src/plugin/CMakeLists.txt +++ b/src/plugins/intel_npu/src/plugin/CMakeLists.txt @@ -66,9 +66,12 @@ target_include_directories(${TARGET_NAME} $ ) -if(ENABLE_AVX2) - ov_avx2_optimization_flags(avx2_flags) - target_compile_options(${TARGET_NAME} PRIVATE "${avx2_flags}") -endif() +cross_compiled_file(${TARGET_NAME} + ARCH AVX2 ANY + npuw/util_xarch.cpp + API npuw/util_xarch.hpp + NAME unpack_i4i8 unpack_u4i8 unpack_i4f16 unpack_i4f16_scale unpack_i4f16_z unpack_u4f16 unpack_u4f16_scale_zp unpack_u4f16_asymm_zp unpack_u4f16_z unpack_u4f32 unpack_i8f16 unpack_i8f16_scale unpack_u8f16 to_f16 + NAMESPACE ov::npuw::util::XARCH +) ov_add_api_validator_post_build_step(TARGET ${NPU_PLUGIN_TARGET}) diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index 59851b00a5407b..1de8f4de4bdb4f 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -4,8 +4,6 @@ #include "util.hpp" -#include - #include #include #include @@ -17,10 +15,7 @@ #include "openvino/op/transpose.hpp" #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/make_tensor.hpp" // get_tensor_impl - -#ifdef UNPACK_PROFILING -# include "tbb/concurrent_unordered_map.h" -#endif +#include "util_xarch.hpp" bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) { if (opt.empty() || opt == "NO") { @@ -39,6 +34,16 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) { return false; } +namespace { +inline uint8_t hi4(uint8_t x) { + return x >> 4; +} + +inline uint8_t lo4(uint8_t x) { + return x & 0xF; +} +} // namespace + ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr& node) { NPUW_ASSERT(ov::op::util::is_constant(node)); NPUW_ASSERT(node->outputs().size() == 1); @@ -61,1346 +66,6 @@ std::string ov::npuw::util::fmt(std::size_t number, std::size_t total) { return ss.str(); } -namespace { - -inline int8_t hi4(int8_t x) { - return ((x & (1 << 7)) >> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4); -} - -inline int8_t lo4(int8_t x) { - return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0)); -} - -inline uint8_t hi4(uint8_t x) { - return x >> 4; -} - -inline uint8_t lo4(uint8_t x) { - return x & 0xF; -} - -inline int8_t upc(int8_t h) { - return h | (-((h & (1 << 3)) >> 3) & (-8)); -} - -// NOTE: This routine implements the NEW ORDER -#define avx2_i4toi8(vinput, vout0, vout1) \ - { \ - __m256i himask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0xF0)); \ - __m256i lomask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0x0F)); \ - __m256i vsgmask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 1 << 3)); \ - __m256i vzero = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0)); \ - __m256i vextend = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, (-8))); \ - \ - __m256i vht = _mm256_and_si256(vinput, himask); \ - __m256i vhi = _mm256_srli_epi16(vht, 4); \ - __m256i vlo = _mm256_and_si256(vinput, lomask); \ - \ - __m256i vsghi = _mm256_srli_epi16(_mm256_and_si256(vhi, vsgmask), 3); \ - __m256i vsglo = _mm256_srli_epi16(_mm256_and_si256(vlo, vsgmask), 3); \ - __m256i vsubhi = _mm256_sub_epi8(vzero, vsghi); \ - __m256i vsublo = _mm256_sub_epi8(vzero, vsglo); \ - __m256i vhires = _mm256_or_si256(vhi, _mm256_and_si256(vsubhi, vextend)); \ - __m256i vlores = _mm256_or_si256(vlo, _mm256_and_si256(vsublo, vextend)); \ - \ - __m256i vunlo = _mm256_unpacklo_epi8(vlores, vhires); \ - __m256i vunhi = _mm256_unpackhi_epi8(vlores, vhires); \ - *vout0 = _mm256_permute2x128_si256(vunlo, vunhi, 0x20); \ - *vout1 = _mm256_permute2x128_si256(vunlo, vunhi, 0x31); \ - } - -inline __m128i avx2_i8tof16(__m128i vi8) { - __m256i i32vec = _mm256_cvtepi8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] - __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] - return _mm256_cvtps_ph(f32vec, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] -} - -inline __m128i avx2_i8tof16(__m128i vi8, __m256 s) { - __m256i i32vec = _mm256_cvtepi8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] - __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] - __m256 f32scl = _mm256_mul_ps(f32vec, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] - return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] -} - -inline __m128i avx2_u8tof16_hi(__m128i vu8, __m256 z, __m256 s) { - __m256i u32vec = _mm256_cvtepu8_epi32(vu8); // extend: 8 x u8 -> 8 x i32 [256b of 256b] - __m256 f32vec = _mm256_cvtepi32_ps(u32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] - __m256 f32sub = _mm256_sub_ps(f32vec, z); // subtract: 8 x f32 -> 8 x f32 [256b of 256b] - __m256 f32scl = _mm256_mul_ps(f32sub, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] - return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] -} - -inline __m128i avx2_u8tof16_lo(__m128i vu8, __m256 z, __m256 s) { - __m128i vu8h = _mm_bsrli_si128(vu8, 8); - return avx2_u8tof16_hi(vu8h, z, s); -} - -inline __m128i avx2_u8tof16(__m128i vi8, __m256 z, __m256 s) { - __m256i i32vec = _mm256_cvtepu8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] - __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] - __m256 f32sub = _mm256_sub_ps(f32vec, z); // subtract: 8 x f32 -> 8 x f32 [256b of 256b] - __m256 f32scl = _mm256_mul_ps(f32sub, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] - return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] -} - -// NOTE: This routine implements the NEW ORDER -inline void avx2_u4tof16(__m256i vinput, __m128i vout[8], __m256 zvalVec, __m256 svalVec[8]) { - // vinput - 64 x u4 elements - 256 bits - // vout[] - 64 (8x8) x f16 elements - - // NOTE: This is largely a copy of unpack_u4f16() {{ - __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); - __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); - - // unpacking with interleaving - __m256i vht = _mm256_and_si256(vinput, himask); - __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 - Extracting High Nibbles - __m256i xmmUnpackedHi = _mm256_and_si256(vinput, lomask); // 32 x i8 - Extracting Low Nibbles - - // need 4 portions of 16 x i8 elements - __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 - Lower 16 of High Nibbles - __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 - Higher 16 of High Nibbles - - __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 - Lower 16 of Low Nibbles - __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 - Higher 16 of Low Nibbles - - // Rearranging of scales - __m256i indices = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); - // Extracting all 64 scales as per the indices specified above - __m256 scale_v_rearranged[] = {_mm256_permutevar8x32_ps(svalVec[0], indices), - _mm256_permutevar8x32_ps(svalVec[1], indices), - _mm256_permutevar8x32_ps(svalVec[2], indices), - _mm256_permutevar8x32_ps(svalVec[3], indices), - _mm256_permutevar8x32_ps(svalVec[4], indices), - _mm256_permutevar8x32_ps(svalVec[5], indices), - _mm256_permutevar8x32_ps(svalVec[6], indices), - _mm256_permutevar8x32_ps(svalVec[7], indices)}; - - // Scaling should happen like this: - // low_nibble[0]->scale[0], high_nibble[0]->scale[1]...low_nibble[31]->scale[60],high_nibble[31]->scale[61] - - // Extracting all the even-indexed scales for the low nibbles - __m256 scale_v_even[] = { - _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x20), - _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x20), - _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x20), - _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x20), - }; - - // Extracting all the odd-indexed scales for the high nibbles - __m256 scale_v_odd[] = { - _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x31), - _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x31), - _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x31), - _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x31), - }; - - // converting to 64 x f16 - // Higher 16 of High Nibbles - __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, scale_v_odd[2]), - avx2_u8tof16_lo(unpacked32LoLo, zvalVec, scale_v_odd[3])}; - // Lower 16 of High Nibbles - __m128i f16LoHi[] = {avx2_u8tof16_hi(unpacked32LoHi, zvalVec, scale_v_odd[0]), - avx2_u8tof16_lo(unpacked32LoHi, zvalVec, scale_v_odd[1])}; - // Higher 16 of Low Nibbles - __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, scale_v_even[2]), - avx2_u8tof16_lo(unpacked32HiLo, zvalVec, scale_v_even[3])}; - // Lower 16 of Low Nibbles - __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, scale_v_even[0]), - avx2_u8tof16_lo(unpacked32HiHi, zvalVec, scale_v_even[1])}; - - // interleaving back: - // Interleaving lower 8 of low nibbles with lower 8 of high nibbles and so on - vout[0] = _mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]); - vout[1] = _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]); - vout[2] = _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]); - vout[3] = _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]); - vout[4] = _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]); - vout[5] = _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]); - vout[6] = _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]); - vout[7] = _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1]); -} - -inline __m256 avx2_load_scale(const int8_t* data, ov::element::Type type) { - if (type == ov::element::f32) { - return _mm256_set1_ps(*reinterpret_cast(data)); - } else { - NPUW_ASSERT(type == ov::element::f16); - float val{}; - _mm_store_ss(&val, _mm_cvtph_ps(_mm_cvtsi32_si128(*reinterpret_cast(data)))); - return _mm256_set1_ps(val); - } -} - -inline float avx2_load_f32(const int8_t* data, ov::element::Type type) { - if (type == ov::element::f32) { - return *reinterpret_cast(data); - } else { - NPUW_ASSERT(type == ov::element::f16); - float val{}; - _mm_store_ss(&val, _mm_cvtph_ps(_mm_cvtsi32_si128(*reinterpret_cast(data)))); - return val; - } -} - -#ifdef UNPACK_PROFILING -class UnpackStat { - tbb::concurrent_unordered_map> inferenceTimes; - -public: - UnpackStat() {} - void addRecord(size_t sz, size_t time) { - inferenceTimes[sz].first++; - inferenceTimes[sz].second += time; - } - ~UnpackStat() { - for (auto&& r : inferenceTimes) { - std::cout << "work: " << r.first //<< ", stride: " << stride - << " overall_time = " << r.second.second / 1000 << " [ms]" - << " avg_atime = " << r.second.second / r.second.first << " [µs]\n"; - } - } -}; - -static UnpackStat ustat; -# define UNPACK_START_TICK() std::chrono::steady_clock::time_point _begin_tick = std::chrono::steady_clock::now(); -# define UNPACK_SAVE_TICK() \ - std::chrono::steady_clock::time_point _end_tick = std::chrono::steady_clock::now(); \ - ustat.addRecord(total, std::chrono::duration_cast(_end_tick - _begin_tick).count()); -#else -# define UNPACK_START_TICK() -# define UNPACK_SAVE_TICK() -#endif - -void unpack_i4i8(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - // with vectorization above, we: - // - read 256 bits (= 32 bytes, = 64 i4 elements) - // - write 512 bits (= 64 bytes, = 64 i8 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const std::size_t total = from->get_size(); - int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements - int8_t* pDst = static_cast(to->data()); // 1 x i8 element - size_t stride = 64; - - auto unpack_body = [pSrc, pDst](size_t index, size_t stride) { - size_t halfStride = stride >> 1; - int8_t const* pSrcLocal = pSrc + halfStride * index; - int8_t* pDstLocal = pDst + stride * index; - - for (size_t j = 0; j < stride; j += 64) { - __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); - __m256i* outv0 = reinterpret_cast<__m256i*>(pDstLocal); - __m256i* outv1 = reinterpret_cast<__m256i*>(pDstLocal + 32); - - __m256i vout0, vout1; - avx2_i4toi8(inv, &vout0, &vout1); - - _mm256_storeu_si256(outv0, vout0); - _mm256_storeu_si256(outv1, vout1); - - pSrcLocal += 32; - pDstLocal += 64; - } - }; - // ov work index / 64 - if (unpack_options.nPartitions) { - std::size_t minPartitions; - if (!unpack_options.bStrictPartitioning) { - // some heuristics that every tbb thread workload has to have 2048 elements at least, - // so in terms of stride, it should be 64 * 2048 - minPartitions = total / (64 * 2048); - minPartitions = std::max(1u, minPartitions); - minPartitions = std::min(minPartitions, unpack_options.nPartitions); - } else { - minPartitions = unpack_options.nPartitions; - } - - // calculating stride in elements - this stride give us nPartitions + 1 partitions - stride = static_cast(total / minPartitions); - - // stride has to be 64 elements aligned to avoid gaps between workloads - stride = (stride >> 6) << 6; - // if number of partitions to large comparing to workload, min supported stride still have to be clamped to 64 - stride = stride < 64 ? 64 : stride; - } - - UNPACK_START_TICK(); - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(total / stride, [unpack_body, stride](size_t index) { - unpack_body(index, stride); - }); - } else { - for (std::size_t index = 0; index < total / stride; index++) { - unpack_body(index, stride); - } - } - // handle tail - size_t tailOffset = (static_cast(total / stride) * stride); - pSrc = static_cast(from->data()) + (tailOffset >> 1); - pDst = static_cast(to->data()) + tailOffset; - - for (std::size_t index = 0; index < ((total % 64) >> 1); index++) { - *(pDst++) = upc(lo4(*(pSrc))); - *(pDst++) = upc(hi4(*(pSrc))); - pSrc++; - } - UNPACK_SAVE_TICK(); -} - -void unpack_u4i8(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - uint8_t const* pSrc = static_cast(from->data()); // 2 x u4 elements - int8_t* pDst = static_cast(to->data()); // 1 x i8 element - - const std::size_t total = from->get_size(); - for (std::size_t index = 0; index < total; index += 2) { - pDst[0] = static_cast(lo4(*pSrc)); // LSB is [0] -- since OpenVINO 24.0! - pDst[1] = static_cast(hi4(*pSrc)); // MSB is [1] -- since OpenVINO 24.0! - pSrc++; - pDst += 2; - } -} - -void unpack_i4f16(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - // This conversion combines i4toi8 (above) and i8tof16 (below). Here we - // - read 256 bits (= 32 bytes, = 64 i4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - std::size_t total = to->get_size(); - int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements - int16_t* pDst = static_cast(to->data()); // 1 x f16 element - // bool tailOnly = total < 64; - - auto unpack_body = [pSrc, pDst](size_t index) { - int8_t const* pSrcLocal = pSrc + 32 * index; - int16_t* pDstLocal = pDst + 64 * index; - - __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); - __m128i* outv[8] = { - reinterpret_cast<__m128i*>(pDstLocal), - reinterpret_cast<__m128i*>(pDstLocal + 8), - reinterpret_cast<__m128i*>(pDstLocal + 16), - reinterpret_cast<__m128i*>(pDstLocal + 24), - reinterpret_cast<__m128i*>(pDstLocal + 32), - reinterpret_cast<__m128i*>(pDstLocal + 40), - reinterpret_cast<__m128i*>(pDstLocal + 48), - reinterpret_cast<__m128i*>(pDstLocal + 56), - }; - - __m256i vout0, vout1; - avx2_i4toi8(inv, &vout0, &vout1); - - int8_t tmp[64]; // FIXME: Avoid it - __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); - __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); - _mm256_storeu_si256(tmpv0, vout0); - _mm256_storeu_si256(tmpv1, vout1); - - __m128i i8vecs[8] = { - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), - }; - - __m128i vresults[8] = {avx2_i8tof16(i8vecs[0]), - avx2_i8tof16(i8vecs[1]), - avx2_i8tof16(i8vecs[2]), - avx2_i8tof16(i8vecs[3]), - avx2_i8tof16(i8vecs[4]), - avx2_i8tof16(i8vecs[5]), - avx2_i8tof16(i8vecs[6]), - avx2_i8tof16(i8vecs[7])}; - - _mm_storeu_si128(outv[0], vresults[0]); - _mm_storeu_si128(outv[1], vresults[1]); - _mm_storeu_si128(outv[2], vresults[2]); - _mm_storeu_si128(outv[3], vresults[3]); - _mm_storeu_si128(outv[4], vresults[4]); - _mm_storeu_si128(outv[5], vresults[5]); - _mm_storeu_si128(outv[6], vresults[6]); - _mm_storeu_si128(outv[7], vresults[7]); - }; - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(total / 64, [&unpack_body](size_t index) { - unpack_body(index); - }); - } else { - for (std::size_t index = 0; index < total / 64; index++) { - unpack_body(index); - } - } - - // handle tail that is < 64 elements - size_t tailOffset = ((total >> 6) << 6); - pSrc = static_cast(from->data()) + (tailOffset >> 1); - pDst = static_cast(to->data()) + tailOffset; - - constexpr std::size_t VECSIZE = 8; - - total = ((total % 64) >> 1); - int8_t unpackedToI8[VECSIZE] = {0}; - size_t unpackedIdx = 0; - for (std::size_t index = 0; index < total; index++) { - unpackedToI8[unpackedIdx++] = upc(lo4(*(pSrc))); - unpackedToI8[unpackedIdx++] = upc(hi4(*(pSrc))); - if (unpackedIdx == VECSIZE) { - __m128i i8vec = _mm_loadl_epi64(reinterpret_cast<__m128i*>(unpackedToI8)); - __m128i f16vec = avx2_i8tof16(i8vec); - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst), f16vec); - pDst += VECSIZE; - unpackedIdx = 0; - } - pSrc += 1; - } - - // handle tail that is < 8 - if (unpackedIdx != 0) { - int16_t tmp[VECSIZE]; - __m128i i8vec = _mm_loadl_epi64(reinterpret_cast<__m128i*>(unpackedToI8)); - __m128i f16vec = avx2_i8tof16(i8vec); - _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), f16vec); - for (size_t i = 0; i != unpackedIdx; i++) { - pDst[i] = tmp[i]; - } - } -} - -void unpack_i4f16(const ov::SoPtr& from, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - const auto& from_shape = from->get_shape(); - NPUW_ASSERT(from_shape.back() % 64 == 0); - - // 2-channel (Symmetric) and 3-channel (group-wise) - // scale factors are supported. The scale/value loop - // iteration is based on stotal, so should work for - // both cases. - const auto& scale_shape = scale->get_shape(); - NPUW_ASSERT(scale_shape.size() == 3 || scale_shape.size() == 2); - if (scale_shape.size() == 3) { - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[1] == from_shape[1]); - NPUW_ASSERT(scale_shape[2] == 1); - } else { - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[1] == 1); - } - - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); - - // This conversion combines i4toi8 (above) and i8tof16 (below). Here we - // - read 256 bits (= 32 bytes, = 64 i4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const std::size_t total = to->get_size(); - const std::size_t stotal = scale->get_size(); - const std::size_t elementsPerScale = total / stotal; - - // TODO: handle tails - NPUW_ASSERT(elementsPerScale % 64 == 0); - - const int8_t* const pSrc = static_cast(from->data()); // 2 x i4 elements - const int8_t* const pScl = static_cast(scale->data()); // either f16 or f32 - const int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - auto unpack_body = [pSrc, pDst, pScl, elementsPerScale, scale_elem_type, stotal](std::size_t sindex, - std::size_t stride) { - // number of vectorized operations per scale - size_t elementsPerScaleVectorized = elementsPerScale / 64; - - int8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; - int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; - int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; - - // if it is last iteration current stride can be smaller - lets check that - sindex *= stride; - const auto jobFinish = std::min(sindex + stride, stotal); - - for (; sindex != jobFinish; sindex++) { - __m256 svec = avx2_load_scale(pSclLocal, scale_elem_type); - for (std::size_t index = 0; index < elementsPerScale; index += 64) { - __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); - __m128i* outv[8] = { - reinterpret_cast<__m128i*>(pDstLocal), - reinterpret_cast<__m128i*>(pDstLocal + 8), - reinterpret_cast<__m128i*>(pDstLocal + 16), - reinterpret_cast<__m128i*>(pDstLocal + 24), - reinterpret_cast<__m128i*>(pDstLocal + 32), - reinterpret_cast<__m128i*>(pDstLocal + 40), - reinterpret_cast<__m128i*>(pDstLocal + 48), - reinterpret_cast<__m128i*>(pDstLocal + 56), - }; - - __m256i vout0, vout1; - avx2_i4toi8(inv, &vout0, &vout1); - - int8_t tmp[64]; // FIXME: Avoid it - __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); - __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); - _mm256_storeu_si256(tmpv0, vout0); - _mm256_storeu_si256(tmpv1, vout1); - - __m128i i8vecs[8] = { - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), - }; - - __m128i vresults[8] = {avx2_i8tof16(i8vecs[0], svec), - avx2_i8tof16(i8vecs[1], svec), - avx2_i8tof16(i8vecs[2], svec), - avx2_i8tof16(i8vecs[3], svec), - avx2_i8tof16(i8vecs[4], svec), - avx2_i8tof16(i8vecs[5], svec), - avx2_i8tof16(i8vecs[6], svec), - avx2_i8tof16(i8vecs[7], svec)}; - - _mm_storeu_si128(outv[0], vresults[0]); - _mm_storeu_si128(outv[1], vresults[1]); - _mm_storeu_si128(outv[2], vresults[2]); - _mm_storeu_si128(outv[3], vresults[3]); - _mm_storeu_si128(outv[4], vresults[4]); - _mm_storeu_si128(outv[5], vresults[5]); - _mm_storeu_si128(outv[6], vresults[6]); - _mm_storeu_si128(outv[7], vresults[7]); - - pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x i4 - pDstLocal += 64; // note pDst is int16_t - } - pSclLocal += scale_elem_type.size(); - } - }; - size_t stride{1}; - - // since scaling is always 64 elements aligned operations, lets partition only in scale shape - if (unpack_options.nPartitions) { - std::size_t minPartitions; - if (!unpack_options.bStrictPartitioning) { - // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, - // so in terms of stride, it should be nElementsPerscale/64 * 2048 - const auto nIntrinsicsPerScale = elementsPerScale / 64u; - auto minScaleStride = 2048u / nIntrinsicsPerScale; - minScaleStride = std::max(1u, minScaleStride); - minPartitions = stotal / minScaleStride; - minPartitions = std::max(1u, minPartitions); - minPartitions = std::min(minPartitions, unpack_options.nPartitions); - } else { - minPartitions = unpack_options.nPartitions; - } - - // calculating stride in scale elements space - stride = static_cast(stotal / minPartitions); - } - - const size_t numWork = (stotal + stride - 1) / stride; - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(numWork, [unpack_body, stride](size_t index) { - unpack_body(index, stride); - }); - } else { - for (std::size_t index = 0; index < numWork; index++) { - unpack_body(index, stride); - } - } -} - -void unpack_i4f16_z(const ov::SoPtr& from, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - const auto& from_shape = from->get_shape(); - NPUW_ASSERT(from_shape.back() % 64 == 0); - - const auto& scale_shape = scale->get_shape(); - NPUW_ASSERT(scale_shape.size() == 3); - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[2] == from_shape[2]); - NPUW_ASSERT(scale_shape[1] == 1); - - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(scale_elem_type == ov::element::f32); - - // This conversion combines i4tof32 and f32tof16. Here we - // - read 256 bits (= 32 bytes, = 64 u4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const size_t C = from_shape[from_shape.size() - 3]; - const size_t H = from_shape[from_shape.size() - 2]; - const size_t W = from_shape[from_shape.size() - 1]; - - const int8_t* const pSrc = static_cast(from->data()); // 2 x i4 elements - const float* const pScl = static_cast(scale->data()); // 1 x f32 element - int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - auto unpack_body = [&](size_t job_index, size_t stride) { - size_t start_c = job_index * stride; - size_t end_c = std::min(C, start_c + stride); - - for (size_t c = start_c; c < end_c; ++c) { - for (size_t h = 0; h < H; ++h) { - for (size_t w = 0; w < W; w += 64) { - const int8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; - __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); - __m256i vout0, vout1; - avx2_i4toi8(vinput, &vout0, &vout1); - int8_t tmp[64]; // FIXME: Avoid it - __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); - __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); - _mm256_storeu_si256(tmpv0, vout0); - _mm256_storeu_si256(tmpv1, vout1); - __m128i i8vecs[8] = { - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), - _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), - }; - - const float* pScl_iter = pScl + w + W * c; - __m256 svalVec[8]; - for (int i = 0; i < 8; ++i) { - svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); - } - - __m128i vresults[8] = {avx2_i8tof16(i8vecs[0], svalVec[0]), - avx2_i8tof16(i8vecs[1], svalVec[1]), - avx2_i8tof16(i8vecs[2], svalVec[2]), - avx2_i8tof16(i8vecs[3], svalVec[3]), - avx2_i8tof16(i8vecs[4], svalVec[4]), - avx2_i8tof16(i8vecs[5], svalVec[5]), - avx2_i8tof16(i8vecs[6], svalVec[6]), - avx2_i8tof16(i8vecs[7], svalVec[7])}; - - int16_t* pDst_iter = pDst + w + W * h + W * H * c; - for (int i = 0; i < 8; ++i) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), vresults[i]); - } - } - } - } - }; - - size_t stride = C; - size_t num_jobs = 1; - - if (unpack_options.nPartitions) { - if (unpack_options.bStrictPartitioning) { - stride = (C + unpack_options.nPartitions - 1) / unpack_options.nPartitions; - num_jobs = unpack_options.nPartitions; - } else { - stride = std::max(1, C / unpack_options.nPartitions); - num_jobs = (C + stride - 1) / stride; - } - } - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(num_jobs, [&](size_t job_index) { - unpack_body(job_index, stride); - }); - } else { - for (size_t job_index = 0; job_index < num_jobs; ++job_index) { - unpack_body(job_index, stride); - } - } -} - -void unpack_u4f16(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - NPUW_ASSERT(from->get_size() % 64 == 0); - - // This conversion combines u4i8 and i8tof16 unpacks. Here we - // - read 256 bits (= 32 bytes, = 64 i4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const std::size_t total = to->get_size(); - int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements - int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - for (std::size_t index = 0; index < total; index += 64) { - __m128i* outv[8] = { - reinterpret_cast<__m128i*>(pDst), - reinterpret_cast<__m128i*>(pDst + 8), - reinterpret_cast<__m128i*>(pDst + 16), - reinterpret_cast<__m128i*>(pDst + 24), - reinterpret_cast<__m128i*>(pDst + 32), - reinterpret_cast<__m128i*>(pDst + 40), - reinterpret_cast<__m128i*>(pDst + 48), - reinterpret_cast<__m128i*>(pDst + 56), - }; - - int8_t tmp[64]; // FIXME: Avoid it - for (std::size_t ii = 0; ii < 32; ii++) { - tmp[ii * 2] = static_cast(lo4(pSrc[ii])); // LSB is [0] -- since OpenVINO 24.0! - tmp[ii * 2 + 1] = static_cast(hi4(pSrc[ii])); // MSB is [1] -- since OpenVINO 24.0! - } - - __m128i vresults[8] = { - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48))), - avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56))), - }; - - _mm_storeu_si128(outv[0], vresults[0]); - _mm_storeu_si128(outv[1], vresults[1]); - _mm_storeu_si128(outv[2], vresults[2]); - _mm_storeu_si128(outv[3], vresults[3]); - _mm_storeu_si128(outv[4], vresults[4]); - _mm_storeu_si128(outv[5], vresults[5]); - _mm_storeu_si128(outv[6], vresults[6]); - _mm_storeu_si128(outv[7], vresults[7]); - - pSrc += 32; // shift pSrc only by 32 since it is 64 x i4 - pDst += 64; // note pDst is int16_t - } -} - -void unpack_u4f16(const ov::SoPtr& from, - const ov::SoPtr& zerop, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(zerop->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - // Only single-size ZP is supported - NPUW_ASSERT(zerop->get_size() == 1); - - const auto& from_shape = from->get_shape(); - NPUW_ASSERT(from_shape.back() % 64 == 0); - - // 2-channel (Symmetric) and 3-channel (group-wise) - // scale factors are supported. The scale/value loop - // iteration is based on stotal, so should work for - // both cases. - const auto& scale_shape = scale->get_shape(); - NPUW_ASSERT(scale_shape.size() == 3 || scale_shape.size() == 2); - if (scale_shape.size() == 3) { - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[1] == from_shape[1]); - NPUW_ASSERT(scale_shape[2] == 1); - } else { - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[1] == 1); - } - - const auto zerop_elem_type = zerop->get_element_type(); - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(zerop_elem_type == ov::element::u4); - NPUW_ASSERT(scale_elem_type == ov::element::f16); - - // This conversion combines u4tof32 and f32tof16. Here we - // - read 256 bits (= 32 bytes, = 64 u4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const std::size_t total = to->get_size(); - const std::size_t stotal = scale->get_size(); - const std::size_t elementsPerScale = total / stotal; - - const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements - const uint8_t* const pZer = static_cast(zerop->data()); // 1 x u4 element - const int8_t* const pScl = static_cast(scale->data()); // 1 x f16 element - const int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - const float zval = static_cast(lo4(*pZer)); // MSB - since OpenVINO 24.0! - - __m256 zvalVec = _mm256_set1_ps(zval); - - auto unpack_body = [pSrc, pDst, pScl, zvalVec, elementsPerScale, scale_elem_type, stotal](std::size_t sindex, - std::size_t stride) { - // number of vectorized operations per scale - size_t elementsPerScaleVectorized = elementsPerScale / 64; - - uint8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; - int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; - int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; - - // if it is last iteration current stride can be smaller - lets check that - sindex *= stride; - const auto jobFinish = std::min(sindex + stride, stotal); - - for (; sindex < jobFinish; sindex++) { - __m256 svalVec = avx2_load_scale(pSclLocal, scale_elem_type); - - for (std::size_t index = 0; index < elementsPerScale; index += 64) { - __m128i* outv[] = { - reinterpret_cast<__m128i*>(pDstLocal), - reinterpret_cast<__m128i*>(pDstLocal + 8), - reinterpret_cast<__m128i*>(pDstLocal + 16), - reinterpret_cast<__m128i*>(pDstLocal + 24), - reinterpret_cast<__m128i*>(pDstLocal + 32), - reinterpret_cast<__m128i*>(pDstLocal + 40), - reinterpret_cast<__m128i*>(pDstLocal + 48), - reinterpret_cast<__m128i*>(pDstLocal + 56), - }; - __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); - __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); - - // loading 256 bit u4 into unalligned memory , so 64 elements - // cannot use aligned version here like _mm256_load_si256 - segfault even on unit tests - __m256i xmmData = _mm256_lddqu_si256(reinterpret_cast<__m256i const*>(pSrcLocal)); - - // unpacking with interleaving - __m256i vht = _mm256_and_si256(xmmData, himask); - __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 - __m256i xmmUnpackedHi = _mm256_and_si256(xmmData, lomask); // 32 x i8 - - // need 4 portions of 8 x i8 elements - __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 - __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 - - __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 - __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 - - // converting to 32 x f16 - __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32LoLo, zvalVec, svalVec)}; - - __m128i f16LoHi[] = { - avx2_u8tof16_hi(unpacked32LoHi, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32LoHi, zvalVec, svalVec), - }; - - __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32HiLo, zvalVec, svalVec)}; - __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32HiHi, zvalVec, svalVec)}; - - // interleaving back - __m128i interleaved[] = {_mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]), - _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]), - _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]), - _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]), - _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]), - _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]), - _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]), - _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1])}; - - // store the results - _mm_storeu_si128(outv[0], interleaved[0]); - _mm_storeu_si128(outv[1], interleaved[1]); - _mm_storeu_si128(outv[2], interleaved[2]); - _mm_storeu_si128(outv[3], interleaved[3]); - _mm_storeu_si128(outv[4], interleaved[4]); - _mm_storeu_si128(outv[5], interleaved[5]); - _mm_storeu_si128(outv[6], interleaved[6]); - _mm_storeu_si128(outv[7], interleaved[7]); - - pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x u4 - pDstLocal += 64; // note pDst is int16_t, so 64 x f16 -> 64 elements - } // for(index) - pSclLocal += scale_elem_type.size(); - } // for(sindex) - }; - - size_t stride{1}; - - // since scaling is always 64 elements aligned operations, lets partition only in scale shape - if (unpack_options.nPartitions) { - std::size_t minPartitions; - if (!unpack_options.bStrictPartitioning) { - // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, - // so in terms of stride, it should be nElementsPerscale/64 * 2048 - const auto nIntrinsicsPerScale = elementsPerScale / 64u; - auto minScaleStride = 2048u / nIntrinsicsPerScale; - minScaleStride = std::max(1u, minScaleStride); - minPartitions = stotal / minScaleStride; - minPartitions = std::max(1u, minPartitions); - minPartitions = std::min(minPartitions, unpack_options.nPartitions); - } else { - minPartitions = unpack_options.nPartitions; - } - - // calculating stride in scale elements space - stride = static_cast(stotal / minPartitions); - } - - const size_t numWork = (stotal + stride - 1) / stride; - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(numWork, [unpack_body, stride](size_t index) { - unpack_body(index, stride); - }); - } else { - for (std::size_t index = 0; index < numWork; index++) { - unpack_body(index, stride); - } - } -} - -void unpack_u4f16_asymm_zp(const ov::SoPtr& from, - const ov::SoPtr& zerop, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(zerop->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - const auto& from_shape = from->get_shape(); - NPUW_ASSERT(from_shape.back() % 64 == 0); - - // 3-channel (group-wise) scale factors are - // supported. - - const auto& scale_shape = scale->get_shape(); - NPUW_ASSERT(scale_shape.size() == 3); - if (scale_shape.size() == 3) { - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[1] == from_shape[1]); - NPUW_ASSERT(scale_shape[2] == 1); - } - - const auto& zerop_shape = zerop->get_shape(); - NPUW_ASSERT(zerop_shape.size() == 3); - if (zerop_shape.size() == 3) { - NPUW_ASSERT(zerop_shape[0] == from_shape[0]); - NPUW_ASSERT(zerop_shape[1] == from_shape[1]); - NPUW_ASSERT(zerop_shape[2] == 1); - } - - const auto zerop_elem_type = zerop->get_element_type(); - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(zerop_elem_type == ov::element::u4); - NPUW_ASSERT(scale_elem_type == ov::element::f16); - - // This conversion combines u4tof32 and f32tof16. Here we - // - read 256 bits (= 32 bytes, = 64 u4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const std::size_t total = to->get_size(); - const std::size_t stotal = scale->get_size(); - const std::size_t elementsPerScale = total / stotal; - - const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements - const uint8_t* const pZer = static_cast(zerop->data()); // 2 x u4 element - const int8_t* const pScl = static_cast(scale->data()); // 1 x f16 element - const int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - auto unpack_body = [pSrc, pDst, pScl, pZer, elementsPerScale, scale_elem_type, zerop_elem_type, stotal]( - std::size_t sindex, - std::size_t stride) { - // number of vectorized operations per scale - size_t elementsPerScaleVectorized = elementsPerScale / 64; - - uint8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; - int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; - uint8_t const* pZerLocal = pZer + zerop_elem_type.size() * sindex * stride / 2; - int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; - - // if it is last iteration current stride can be smaller - lets check that - sindex *= stride; - const auto jobFinish = std::min(sindex + stride, stotal); - - for (; sindex < jobFinish; sindex++) { - __m256 svalVec = avx2_load_scale(pSclLocal, scale_elem_type); - __m256 zvalVec = _mm256_set1_ps(static_cast((sindex % 2 == 0) ? lo4(*pZerLocal) : hi4(*pZerLocal))); - - for (std::size_t index = 0; index < elementsPerScale; index += 64) { - __m128i* outv[] = { - reinterpret_cast<__m128i*>(pDstLocal), - reinterpret_cast<__m128i*>(pDstLocal + 8), - reinterpret_cast<__m128i*>(pDstLocal + 16), - reinterpret_cast<__m128i*>(pDstLocal + 24), - reinterpret_cast<__m128i*>(pDstLocal + 32), - reinterpret_cast<__m128i*>(pDstLocal + 40), - reinterpret_cast<__m128i*>(pDstLocal + 48), - reinterpret_cast<__m128i*>(pDstLocal + 56), - }; - __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); - __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); - - // loading 256 bit u4 into unalligned memory , so 64 elements - // cannot use aligned version here like _mm256_load_si256 - segfault even on unit tests - __m256i xmmData = _mm256_lddqu_si256(reinterpret_cast<__m256i const*>(pSrcLocal)); - - // unpacking with interleaving - __m256i vht = _mm256_and_si256(xmmData, himask); - __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 - __m256i xmmUnpackedHi = _mm256_and_si256(xmmData, lomask); // 32 x i8 - - // need 4 portions of 8 x i8 elements - __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 - __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 - - __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 - __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 - - // converting to 32 x f16 - __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32LoLo, zvalVec, svalVec)}; - - __m128i f16LoHi[] = { - avx2_u8tof16_hi(unpacked32LoHi, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32LoHi, zvalVec, svalVec), - }; - - __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32HiLo, zvalVec, svalVec)}; - __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, svalVec), - avx2_u8tof16_lo(unpacked32HiHi, zvalVec, svalVec)}; - - // interleaving back - __m128i interleaved[] = {_mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]), - _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]), - _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]), - _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]), - _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]), - _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]), - _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]), - _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1])}; - - // store the results - _mm_storeu_si128(outv[0], interleaved[0]); - _mm_storeu_si128(outv[1], interleaved[1]); - _mm_storeu_si128(outv[2], interleaved[2]); - _mm_storeu_si128(outv[3], interleaved[3]); - _mm_storeu_si128(outv[4], interleaved[4]); - _mm_storeu_si128(outv[5], interleaved[5]); - _mm_storeu_si128(outv[6], interleaved[6]); - _mm_storeu_si128(outv[7], interleaved[7]); - - pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x u4 - pDstLocal += 64; // note pDst is int16_t, so 64 x f16 -> 64 elements - } // for(index) - pSclLocal += scale_elem_type.size(); - if (sindex % 2 == 1) { - pZerLocal += zerop_elem_type.size(); - } - } // for(sindex) - }; - - size_t stride{1}; - - // since scaling is always 64 elements aligned operations, lets partition only in scale shape - if (unpack_options.nPartitions) { - std::size_t minPartitions; - if (!unpack_options.bStrictPartitioning) { - // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, - // so in terms of stride, it should be nElementsPerscale/64 * 2048 - const auto nIntrinsicsPerScale = elementsPerScale / 64u; - auto minScaleStride = 2048u / nIntrinsicsPerScale; - minScaleStride = std::max(1u, minScaleStride); - minPartitions = stotal / minScaleStride; - minPartitions = std::max(1u, minPartitions); - minPartitions = std::min(minPartitions, unpack_options.nPartitions); - } else { - minPartitions = unpack_options.nPartitions; - } - - // calculating stride in scale elements space - stride = static_cast(stotal / minPartitions); - } - - const size_t numWork = (stotal + stride - 1) / stride; - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(numWork, [unpack_body, stride](size_t index) { - unpack_body(index, stride); - }); - } else { - for (std::size_t index = 0; index < numWork; index++) { - unpack_body(index, stride); - } - } -} - -void unpack_u4f16_z(const ov::SoPtr& from, - const ov::SoPtr& zerop, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(zerop->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - // Only single-size ZP is supported - NPUW_ASSERT(zerop->get_size() == 1); - - const auto& from_shape = from->get_shape(); - NPUW_ASSERT(from_shape.back() % 64 == 0); - - const auto& scale_shape = scale->get_shape(); - NPUW_ASSERT(scale_shape.size() == 3); - NPUW_ASSERT(scale_shape[0] == from_shape[0]); - NPUW_ASSERT(scale_shape[2] == from_shape[2]); - NPUW_ASSERT(scale_shape[1] == 1); - - const auto zerop_elem_type = zerop->get_element_type(); - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(zerop_elem_type == ov::element::f32); - NPUW_ASSERT(scale_elem_type == ov::element::f32); - - // This conversion combines u4tof32 and f32tof16. Here we - // - read 256 bits (= 32 bytes, = 64 u4 elements) - // - write 1024 bits (= 128 bytes, = 64 f16 elements) - // per every iteration, what translates to (from->size() / 64) iterations - - const size_t C = from_shape[from_shape.size() - 3]; - const size_t H = from_shape[from_shape.size() - 2]; - const size_t W = from_shape[from_shape.size() - 1]; - - const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements - const float* const pScl = static_cast(scale->data()); // 1 x f32 element - int16_t* pDst = static_cast(to->data()); // 1 x f16 element - - const float zval = avx2_load_f32(reinterpret_cast(zerop->data()), zerop_elem_type); - __m256 zvalVec = _mm256_set1_ps(zval); - - auto unpack_body = [&](size_t job_index, size_t stride) { - size_t start_c = job_index * stride; - size_t end_c = std::min(C, start_c + stride); - - for (size_t c = start_c; c < end_c; ++c) { - for (size_t h = 0; h < H; ++h) { - for (size_t w = 0; w < W; w += 64) { - const uint8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; - __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); - const float* pScl_iter = pScl + w + W * c; - int16_t* pDst_iter = pDst + w + W * h + W * H * c; - - __m256 svalVec[8]; - for (int i = 0; i < 8; ++i) { - svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); - } - - // vectorized unpack u4 to f16 - __m128i htmp[8]; // 64 x f16 - avx2_u4tof16(vinput, htmp, zvalVec, svalVec); - - for (int i = 0; i < 8; ++i) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), htmp[i]); - } - } - } - } - }; - - size_t stride = C; - size_t num_jobs = 1; - - if (unpack_options.nPartitions) { - if (unpack_options.bStrictPartitioning) { - stride = (C + unpack_options.nPartitions - 1) / unpack_options.nPartitions; - num_jobs = unpack_options.nPartitions; - } else { - stride = std::max(1, C / unpack_options.nPartitions); - num_jobs = (C + stride - 1) / stride; - } - } - - if (unpack_options.bUseOvParallelFor) { - ov::parallel_for(num_jobs, [&](size_t job_index) { - unpack_body(job_index, stride); - }); - } else { - for (size_t job_index = 0; job_index < num_jobs; ++job_index) { - unpack_body(job_index, stride); - } - } -} - -void unpack_u4f32(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - - uint8_t const* pSrc = static_cast(from->data()); // 2 x u4 elements - float* pDst = static_cast(to->data()); // 1 x f32 element - - const std::size_t total = from->get_size(); - for (std::size_t index = 0; index < total; index += 2) { - pDst[0] = static_cast(lo4(*pSrc)); // LSB is [0] - since OpenVINO 2024.0! - pDst[1] = static_cast(hi4(*pSrc)); // MSB is [1] - since OpenVINO 2024.0! - pSrc++; - pDst += 2; - } -} - -void unpack_i8f16(const ov::SoPtr& from, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - NPUW_ASSERT(from->get_size() % 8 == 0); - - constexpr std::size_t VECSIZE = 8; - - const std::size_t total = from->get_size(); - int8_t const* pSrc = from->data(); - int16_t* pDst = static_cast(to->data()); - - for (std::size_t index = 0; index < total; index += VECSIZE) { - const __m128i* pSrcV = reinterpret_cast(pSrc); - __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); - __m128i i8vec = _mm_loadl_epi64(pSrcV); // load: 8 x i8 [ 64b of 128b] - __m128i f16vec = avx2_i8tof16(i8vec); - _mm_store_si128(pDstV, f16vec); // store: 8 x f16 [128b] - pSrc += 8; - pDst += 8; - } -} - -void unpack_i8f16(const ov::SoPtr& from, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& unpack_options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - NPUW_ASSERT(from->get_size() % 8 == 0); - NPUW_ASSERT(scale->get_shape()[0] == from->get_shape()[0]); - NPUW_ASSERT(scale->get_shape()[1] == 1); - - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); - - constexpr std::size_t VECSIZE = 8; - - const std::size_t total = from->get_size(); - const std::size_t stotal = scale->get_size(); - int8_t const* pSrc = from->data(); - int8_t const* pScl = static_cast(scale->data()); - int16_t* pDst = static_cast(to->data()); - - for (std::size_t sindex = 0u; sindex < stotal; sindex++) { - __m256 svec = avx2_load_scale(pScl, scale_elem_type); - for (std::size_t index = 0u; index < (total / stotal); index += VECSIZE) { - __m128i const* pSrcV = reinterpret_cast(pSrc); - __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); - __m128i i8vec = _mm_loadl_epi64(pSrcV); // load: 8 x i8 [ 64b of 128b] - __m128i f16vec = avx2_i8tof16(i8vec, svec); // convert & scale - _mm_store_si128(pDstV, f16vec); // store: 8 x f16 [128b] - pSrc += 8; - pDst += 8; - } // index - pScl += scale_elem_type.size(); - } // sindex -} - -void unpack_u8f16(const ov::SoPtr& from, - const ov::SoPtr& zerop, - const ov::SoPtr& scale, - const ov::SoPtr& to, - const ov::npuw::util::UnpackOptions& _options) { - NPUW_ASSERT(from->is_continuous()); - NPUW_ASSERT(zerop->is_continuous()); - NPUW_ASSERT(scale->is_continuous()); - NPUW_ASSERT(to->is_continuous()); - NPUW_ASSERT(from->get_size() == to->get_size()); - NPUW_ASSERT(from->get_size() % 8 == 0); - NPUW_ASSERT(scale->get_shape()[0] == from->get_shape()[0]); - NPUW_ASSERT(scale->get_shape()[1] == 1); - NPUW_ASSERT(zerop->get_shape()[0] == from->get_shape()[0]); - NPUW_ASSERT(zerop->get_shape()[1] == 1); - - const auto scale_elem_type = scale->get_element_type(); - NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); - - const auto zerop_elem_type = zerop->get_element_type(); - NPUW_ASSERT(zerop_elem_type == ov::element::u8); - - constexpr std::size_t VECSIZE = 8; - - const std::size_t total = from->get_size(); - const std::size_t stotal = scale->get_size(); - uint8_t const* pSrc = from->data(); - uint8_t const* pZrp = zerop->data(); - int8_t const* pScl = static_cast(scale->data()); - int16_t* pDst = static_cast(to->data()); - - for (std::size_t sindex = 0u; sindex < stotal; sindex++) { - __m256 svec = avx2_load_scale(pScl, scale_elem_type); - __m128i u8zp = _mm_set1_epi8(*pZrp); // bcast: 8 x u8 - __m256i u32zp = _mm256_cvtepu8_epi32(u8zp); // i32 zero point - __m256 f32zp = _mm256_cvtepi32_ps(u32zp); // f32 zero point - for (std::size_t index = 0u; index < (total / stotal); index += VECSIZE) { - __m128i const* pSrcV = reinterpret_cast(pSrc); - __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); - __m128i u8in = _mm_loadl_epi64(pSrcV); // load: 8 x u8 - __m128i f16vec = avx2_u8tof16(u8in, f32zp, svec); // convert & scale - _mm_store_si128(pDstV, f16vec); // store: 8 x f16 - pSrc += VECSIZE; - pDst += VECSIZE; - } // index - pScl += scale_elem_type.size(); - pZrp++; - } // sindex -} - -} // namespace - void ov::npuw::util::unpack(const ov::SoPtr& from, const ov::SoPtr& to, const UnpackOptions& unpack_options) { @@ -1411,9 +76,9 @@ void ov::npuw::util::unpack(const ov::SoPtr& from, namespace ove = ov::element; #define CAST(x) static_cast((x).operator ove::Type_t()) #define PAIR(f, t) (CAST(f) << 16 | CAST(t)) -#define HNDL(f, t) \ - case PAIR(ove::f, ove::t): \ - unpack_##f##t(from, to, unpack_options); \ +#define HNDL(f, t) \ + case PAIR(ove::f, ove::t): \ + ov::npuw::util::XARCH::unpack_##f##t(from, to, unpack_options); \ break; switch (PAIR(type_from, type_to)) { HNDL(i4, i8); @@ -1445,16 +110,16 @@ void ov::npuw::util::unpack(const ov::SoPtr& from, if (type_from == ov::element::i4) { if (from_shape.size() == 3) { if (scale_shape[2] == from_shape[2]) { - unpack_i4f16_z(from, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_i4f16_z(from, scale, to, unpack_options); } else { - unpack_i4f16(from, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_i4f16_scale(from, scale, to, unpack_options); } } else { NPUW_ASSERT(from_shape.size() == 2); - unpack_i4f16(from, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_i4f16_scale(from, scale, to, unpack_options); } } else if (type_from == ov::element::i8) { - unpack_i8f16(from, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_i8f16_scale(from, scale, to, unpack_options); } else { NPUW_ASSERT(false && "Unsupported combination"); } @@ -1507,23 +172,23 @@ void ov::npuw::util::unpack(const ov::SoPtr& from, if (type_from == ov::element::u4) { if (scale_shape.size() == 3 && scale_shape[0] == from_shape[0] && scale_shape[1] == 1 && scale_shape[2] == from_shape[2]) { - unpack_u4f16_z(from, zerop, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_u4f16_z(from, zerop, scale, to, unpack_options); } else if (scale_shape.size() == 3 && scale_shape[0] == from_shape[0] && scale_shape[1] == from_shape[1] && scale_shape[2] == 1) { if (zerop->get_size() == 1) { - unpack_u4f16(from, zerop, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_u4f16_scale_zp(from, zerop, scale, to, unpack_options); } else { - unpack_u4f16_asymm_zp(from, zerop, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_u4f16_asymm_zp(from, zerop, scale, to, unpack_options); } } else if (scale_shape.size() == 2 && scale_shape[0] == from_shape[0] && scale_shape[1] == 1) { - unpack_u4f16(from, zerop, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_u4f16_scale_zp(from, zerop, scale, to, unpack_options); } else { NPUW_ASSERT(false); } } else if (type_from == ov::element::u8) { // Only support CW for now if (scale_shape.size() == 2 && scale_shape[0] == from_shape[0] && scale_shape[1] == 1) { - unpack_u8f16(from, zerop, scale, to, unpack_options); + ov::npuw::util::XARCH::unpack_u8f16(from, zerop, scale, to, unpack_options); } else { NPUW_ASSERT(false); } @@ -1667,26 +332,7 @@ void ov::npuw::util::to_f32(const ov::Tensor& in, ov::Tensor& out) { } ov::Tensor ov::npuw::util::to_f16(const ov::Tensor& t) { - ov::Shape shape = t.get_shape(); - NPUW_ASSERT(t.get_element_type() == ov::element::f32); - NPUW_ASSERT(t.get_size() % 8 == 0); - NPUW_ASSERT(t.is_continuous()); - - ov::Tensor tnew(ov::element::f16, shape); - - const float* psrc = t.data(); - uint8_t* pdst = static_cast(tnew.data()); - - for (std::size_t i = 0; i < t.get_size() / 8; i++) { - __m256 vsrc = _mm256_loadu_ps(psrc); - __m128i vout = _mm256_cvtps_ph(vsrc, _MM_FROUND_TO_NEAREST_INT); - __m128i* pout = reinterpret_cast<__m128i*>(pdst); - _mm_storeu_si128(pout, vout); - psrc += 8; // offset in sizeof(float) - pdst += (8 * 2); // offset in bytes - } - - return tnew; + return ov::npuw::util::XARCH::to_f16(t); } inline uint8_t tread_4b(const ov::Tensor& t, std::size_t r, std::size_t c, std::size_t COLS) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp new file mode 100644 index 00000000000000..37c4770b9d9fa3 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.cpp @@ -0,0 +1,1429 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#if defined(HAVE_AVX2) +# include +#endif + +#include + +#include "util.hpp" +#include "util_xarch.hpp" + +#ifdef UNPACK_PROFILING +# include "tbb/concurrent_unordered_map.h" +#endif + +namespace { +#if defined(HAVE_AVX2) +inline int8_t hi4(int8_t x) { + return ((x & (1 << 7)) >> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4); +} + +inline int8_t lo4(int8_t x) { + return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0)); +} +#endif + +inline uint8_t hi4(uint8_t x) { + return x >> 4; +} + +inline uint8_t lo4(uint8_t x) { + return x & 0xF; +} + +#if defined(HAVE_AVX2) +inline int8_t upc(int8_t h) { + return h | (-((h & (1 << 3)) >> 3) & (-8)); +} + +// NOTE: This routine implements the NEW ORDER +# define avx2_i4toi8(vinput, vout0, vout1) \ + { \ + __m256i himask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0xF0)); \ + __m256i lomask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0x0F)); \ + __m256i vsgmask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 1 << 3)); \ + __m256i vzero = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0)); \ + __m256i vextend = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, (-8))); \ + \ + __m256i vht = _mm256_and_si256(vinput, himask); \ + __m256i vhi = _mm256_srli_epi16(vht, 4); \ + __m256i vlo = _mm256_and_si256(vinput, lomask); \ + \ + __m256i vsghi = _mm256_srli_epi16(_mm256_and_si256(vhi, vsgmask), 3); \ + __m256i vsglo = _mm256_srli_epi16(_mm256_and_si256(vlo, vsgmask), 3); \ + __m256i vsubhi = _mm256_sub_epi8(vzero, vsghi); \ + __m256i vsublo = _mm256_sub_epi8(vzero, vsglo); \ + __m256i vhires = _mm256_or_si256(vhi, _mm256_and_si256(vsubhi, vextend)); \ + __m256i vlores = _mm256_or_si256(vlo, _mm256_and_si256(vsublo, vextend)); \ + \ + __m256i vunlo = _mm256_unpacklo_epi8(vlores, vhires); \ + __m256i vunhi = _mm256_unpackhi_epi8(vlores, vhires); \ + *vout0 = _mm256_permute2x128_si256(vunlo, vunhi, 0x20); \ + *vout1 = _mm256_permute2x128_si256(vunlo, vunhi, 0x31); \ + } + +inline __m128i avx2_i8tof16(__m128i vi8) { + __m256i i32vec = _mm256_cvtepi8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] + __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] + return _mm256_cvtps_ph(f32vec, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] +} + +inline __m128i avx2_i8tof16(__m128i vi8, __m256 s) { + __m256i i32vec = _mm256_cvtepi8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] + __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] + __m256 f32scl = _mm256_mul_ps(f32vec, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] + return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] +} + +inline __m128i avx2_u8tof16_hi(__m128i vu8, __m256 z, __m256 s) { + __m256i u32vec = _mm256_cvtepu8_epi32(vu8); // extend: 8 x u8 -> 8 x i32 [256b of 256b] + __m256 f32vec = _mm256_cvtepi32_ps(u32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] + __m256 f32sub = _mm256_sub_ps(f32vec, z); // subtract: 8 x f32 -> 8 x f32 [256b of 256b] + __m256 f32scl = _mm256_mul_ps(f32sub, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] + return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] +} + +inline __m128i avx2_u8tof16_lo(__m128i vu8, __m256 z, __m256 s) { + __m128i vu8h = _mm_bsrli_si128(vu8, 8); + return avx2_u8tof16_hi(vu8h, z, s); +} + +inline __m128i avx2_u8tof16(__m128i vi8, __m256 z, __m256 s) { + __m256i i32vec = _mm256_cvtepu8_epi32(vi8); // extend: 8 x i8 -> 8 x i32 [256b of 256b] + __m256 f32vec = _mm256_cvtepi32_ps(i32vec); // convert: 8 x i32 -> 8 x f32 [256b of 256b] + __m256 f32sub = _mm256_sub_ps(f32vec, z); // subtract: 8 x f32 -> 8 x f32 [256b of 256b] + __m256 f32scl = _mm256_mul_ps(f32sub, s); // scale: 8 x f32 -> 8 x f32 [256b of 256b] + return _mm256_cvtps_ph(f32scl, _MM_FROUND_TO_NEAREST_INT); // convert: 8 x f32 -> 8 x f16 [128b] +} + +// NOTE: This routine implements the NEW ORDER +inline void avx2_u4tof16(__m256i vinput, __m128i vout[8], __m256 zvalVec, __m256 svalVec[8]) { + // vinput - 64 x u4 elements - 256 bits + // vout[] - 64 (8x8) x f16 elements + + // NOTE: This is largely a copy of unpack_u4f16() {{ + __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); + __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); + + // unpacking with interleaving + __m256i vht = _mm256_and_si256(vinput, himask); + __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 - Extracting High Nibbles + __m256i xmmUnpackedHi = _mm256_and_si256(vinput, lomask); // 32 x i8 - Extracting Low Nibbles + + // need 4 portions of 16 x i8 elements + __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 - Lower 16 of High Nibbles + __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 - Higher 16 of High Nibbles + + __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 - Lower 16 of Low Nibbles + __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 - Higher 16 of Low Nibbles + + // Rearranging of scales + __m256i indices = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); + // Extracting all 64 scales as per the indices specified above + __m256 scale_v_rearranged[] = {_mm256_permutevar8x32_ps(svalVec[0], indices), + _mm256_permutevar8x32_ps(svalVec[1], indices), + _mm256_permutevar8x32_ps(svalVec[2], indices), + _mm256_permutevar8x32_ps(svalVec[3], indices), + _mm256_permutevar8x32_ps(svalVec[4], indices), + _mm256_permutevar8x32_ps(svalVec[5], indices), + _mm256_permutevar8x32_ps(svalVec[6], indices), + _mm256_permutevar8x32_ps(svalVec[7], indices)}; + + // Scaling should happen like this: + // low_nibble[0]->scale[0], high_nibble[0]->scale[1]...low_nibble[31]->scale[60],high_nibble[31]->scale[61] + + // Extracting all the even-indexed scales for the low nibbles + __m256 scale_v_even[] = { + _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x20), + }; + + // Extracting all the odd-indexed scales for the high nibbles + __m256 scale_v_odd[] = { + _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x31), + }; + + // converting to 64 x f16 + // Higher 16 of High Nibbles + __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, scale_v_odd[2]), + avx2_u8tof16_lo(unpacked32LoLo, zvalVec, scale_v_odd[3])}; + // Lower 16 of High Nibbles + __m128i f16LoHi[] = {avx2_u8tof16_hi(unpacked32LoHi, zvalVec, scale_v_odd[0]), + avx2_u8tof16_lo(unpacked32LoHi, zvalVec, scale_v_odd[1])}; + // Higher 16 of Low Nibbles + __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, scale_v_even[2]), + avx2_u8tof16_lo(unpacked32HiLo, zvalVec, scale_v_even[3])}; + // Lower 16 of Low Nibbles + __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, scale_v_even[0]), + avx2_u8tof16_lo(unpacked32HiHi, zvalVec, scale_v_even[1])}; + + // interleaving back: + // Interleaving lower 8 of low nibbles with lower 8 of high nibbles and so on + vout[0] = _mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]); + vout[1] = _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]); + vout[2] = _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]); + vout[3] = _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]); + vout[4] = _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]); + vout[5] = _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]); + vout[6] = _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]); + vout[7] = _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1]); +} + +inline __m256 avx2_load_scale(const int8_t* data, ov::element::Type type) { + if (type == ov::element::f32) { + return _mm256_set1_ps(*reinterpret_cast(data)); + } else { + NPUW_ASSERT(type == ov::element::f16); + float val{}; + _mm_store_ss(&val, _mm_cvtph_ps(_mm_cvtsi32_si128(*reinterpret_cast(data)))); + return _mm256_set1_ps(val); + } +} + +inline float avx2_load_f32(const int8_t* data, ov::element::Type type) { + if (type == ov::element::f32) { + return *reinterpret_cast(data); + } else { + NPUW_ASSERT(type == ov::element::f16); + float val{}; + _mm_store_ss(&val, _mm_cvtph_ps(_mm_cvtsi32_si128(*reinterpret_cast(data)))); + return val; + } +} +#endif + +#ifdef UNPACK_PROFILING +class UnpackStat { + tbb::concurrent_unordered_map> inferenceTimes; + +public: + UnpackStat() {} + void addRecord(size_t sz, size_t time) { + inferenceTimes[sz].first++; + inferenceTimes[sz].second += time; + } + ~UnpackStat() { + for (auto&& r : inferenceTimes) { + std::cout << "work: " << r.first //<< ", stride: " << stride + << " overall_time = " << r.second.second / 1000 << " [ms]" + << " avg_atime = " << r.second.second / r.second.first << " [µs]\n"; + } + } +}; + +static UnpackStat ustat; +# define UNPACK_START_TICK() std::chrono::steady_clock::time_point _begin_tick = std::chrono::steady_clock::now(); +# define UNPACK_SAVE_TICK() \ + std::chrono::steady_clock::time_point _end_tick = std::chrono::steady_clock::now(); \ + ustat.addRecord(total, std::chrono::duration_cast(_end_tick - _begin_tick).count()); +#else +# define UNPACK_START_TICK() +# define UNPACK_SAVE_TICK() +#endif +} // namespace + +void ov::npuw::util::XARCH::unpack_i4i8(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + +#if defined(HAVE_AVX2) + // with vectorization above, we: + // - read 256 bits (= 32 bytes, = 64 i4 elements) + // - write 512 bits (= 64 bytes, = 64 i8 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const std::size_t total = from->get_size(); + int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements + int8_t* pDst = static_cast(to->data()); // 1 x i8 element + size_t stride = 64; + + auto unpack_body = [pSrc, pDst](size_t index, size_t stride) { + size_t halfStride = stride >> 1; + int8_t const* pSrcLocal = pSrc + halfStride * index; + int8_t* pDstLocal = pDst + stride * index; + + for (size_t j = 0; j < stride; j += 64) { + __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); + __m256i* outv0 = reinterpret_cast<__m256i*>(pDstLocal); + __m256i* outv1 = reinterpret_cast<__m256i*>(pDstLocal + 32); + + __m256i vout0, vout1; + avx2_i4toi8(inv, &vout0, &vout1); + + _mm256_storeu_si256(outv0, vout0); + _mm256_storeu_si256(outv1, vout1); + + pSrcLocal += 32; + pDstLocal += 64; + } + }; + + // ov work index / 64 + if (unpack_options.nPartitions) { + std::size_t minPartitions; + if (!unpack_options.bStrictPartitioning) { + // some heuristics that every tbb thread workload has to have 2048 elements at least, + // so in terms of stride, it should be 64 * 2048 + minPartitions = total / (64 * 2048); + minPartitions = std::max(1u, minPartitions); + minPartitions = std::min(minPartitions, unpack_options.nPartitions); + } else { + minPartitions = unpack_options.nPartitions; + } + + // calculating stride in elements - this stride give us nPartitions + 1 partitions + stride = static_cast(total / minPartitions); + + // stride has to be 64 elements aligned to avoid gaps between workloads + stride = (stride >> 6) << 6; + // if number of partitions to large comparing to workload, min supported stride still have to be clamped to 64 + stride = stride < 64 ? 64 : stride; + } + + UNPACK_START_TICK(); + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(total / stride, [unpack_body, stride](size_t index) { + unpack_body(index, stride); + }); + } else { + for (std::size_t index = 0; index < total / stride; index++) { + unpack_body(index, stride); + } + } + // handle tail + size_t tailOffset = (static_cast(total / stride) * stride); + pSrc = static_cast(from->data()) + (tailOffset >> 1); + pDst = static_cast(to->data()) + tailOffset; + + for (std::size_t index = 0; index < ((total % 64) >> 1); index++) { + *(pDst++) = upc(lo4(*(pSrc))); + *(pDst++) = upc(hi4(*(pSrc))); + pSrc++; + } + UNPACK_SAVE_TICK(); +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4i8(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + uint8_t const* pSrc = static_cast(from->data()); // 2 x u4 elements + int8_t* pDst = static_cast(to->data()); // 1 x i8 element + + const std::size_t total = from->get_size(); + for (std::size_t index = 0; index < total; index += 2) { + pDst[0] = static_cast(lo4(*pSrc)); // LSB is [0] -- since OpenVINO 24.0! + pDst[1] = static_cast(hi4(*pSrc)); // MSB is [1] -- since OpenVINO 24.0! + pSrc++; + pDst += 2; + } +} + +void ov::npuw::util::XARCH::unpack_i4f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + +#if defined(HAVE_AVX2) + // This conversion combines i4toi8 (above) and i8tof16 (below). Here we + // - read 256 bits (= 32 bytes, = 64 i4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + std::size_t total = to->get_size(); + int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements + int16_t* pDst = static_cast(to->data()); // 1 x f16 element + // bool tailOnly = total < 64; + + auto unpack_body = [pSrc, pDst](size_t index) { + int8_t const* pSrcLocal = pSrc + 32 * index; + int16_t* pDstLocal = pDst + 64 * index; + + __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); + __m128i* outv[8] = { + reinterpret_cast<__m128i*>(pDstLocal), + reinterpret_cast<__m128i*>(pDstLocal + 8), + reinterpret_cast<__m128i*>(pDstLocal + 16), + reinterpret_cast<__m128i*>(pDstLocal + 24), + reinterpret_cast<__m128i*>(pDstLocal + 32), + reinterpret_cast<__m128i*>(pDstLocal + 40), + reinterpret_cast<__m128i*>(pDstLocal + 48), + reinterpret_cast<__m128i*>(pDstLocal + 56), + }; + + __m256i vout0, vout1; + avx2_i4toi8(inv, &vout0, &vout1); + + int8_t tmp[64]; // FIXME: Avoid it + __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); + __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); + _mm256_storeu_si256(tmpv0, vout0); + _mm256_storeu_si256(tmpv1, vout1); + + __m128i i8vecs[8] = { + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), + }; + + __m128i vresults[8] = {avx2_i8tof16(i8vecs[0]), + avx2_i8tof16(i8vecs[1]), + avx2_i8tof16(i8vecs[2]), + avx2_i8tof16(i8vecs[3]), + avx2_i8tof16(i8vecs[4]), + avx2_i8tof16(i8vecs[5]), + avx2_i8tof16(i8vecs[6]), + avx2_i8tof16(i8vecs[7])}; + + _mm_storeu_si128(outv[0], vresults[0]); + _mm_storeu_si128(outv[1], vresults[1]); + _mm_storeu_si128(outv[2], vresults[2]); + _mm_storeu_si128(outv[3], vresults[3]); + _mm_storeu_si128(outv[4], vresults[4]); + _mm_storeu_si128(outv[5], vresults[5]); + _mm_storeu_si128(outv[6], vresults[6]); + _mm_storeu_si128(outv[7], vresults[7]); + }; + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(total / 64, [&unpack_body](size_t index) { + unpack_body(index); + }); + } else { + for (std::size_t index = 0; index < total / 64; index++) { + unpack_body(index); + } + } + + // handle tail that is < 64 elements + size_t tailOffset = ((total >> 6) << 6); + pSrc = static_cast(from->data()) + (tailOffset >> 1); + pDst = static_cast(to->data()) + tailOffset; + + constexpr std::size_t VECSIZE = 8; + + total = ((total % 64) >> 1); + int8_t unpackedToI8[VECSIZE] = {0}; + size_t unpackedIdx = 0; + for (std::size_t index = 0; index < total; index++) { + unpackedToI8[unpackedIdx++] = upc(lo4(*(pSrc))); + unpackedToI8[unpackedIdx++] = upc(hi4(*(pSrc))); + if (unpackedIdx == VECSIZE) { + __m128i i8vec = _mm_loadl_epi64(reinterpret_cast<__m128i*>(unpackedToI8)); + __m128i f16vec = avx2_i8tof16(i8vec); + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst), f16vec); + pDst += VECSIZE; + unpackedIdx = 0; + } + pSrc += 1; + } + + // handle tail that is < 8 + if (unpackedIdx != 0) { + int16_t tmp[VECSIZE]; + __m128i i8vec = _mm_loadl_epi64(reinterpret_cast<__m128i*>(unpackedToI8)); + __m128i f16vec = avx2_i8tof16(i8vec); + _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), f16vec); + for (size_t i = 0; i != unpackedIdx; i++) { + pDst[i] = tmp[i]; + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_i4f16_scale(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + // 2-channel (Symmetric) and 3-channel (group-wise) + // scale factors are supported. The scale/value loop + // iteration is based on stotal, so should work for + // both cases. + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3 || scale_shape.size() == 2); + if (scale_shape.size() == 3) { + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[1] == from_shape[1]); + NPUW_ASSERT(scale_shape[2] == 1); + } else { + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[1] == 1); + } + + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); + +#if defined(HAVE_AVX2) + // This conversion combines i4toi8 (above) and i8tof16 (below). Here we + // - read 256 bits (= 32 bytes, = 64 i4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const std::size_t total = to->get_size(); + const std::size_t stotal = scale->get_size(); + const std::size_t elementsPerScale = total / stotal; + + // TODO: handle tails + NPUW_ASSERT(elementsPerScale % 64 == 0); + + const int8_t* const pSrc = static_cast(from->data()); // 2 x i4 elements + const int8_t* const pScl = static_cast(scale->data()); // either f16 or f32 + const int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + auto unpack_body = [pSrc, pDst, pScl, elementsPerScale, scale_elem_type, stotal](std::size_t sindex, + std::size_t stride) { + // number of vectorized operations per scale + size_t elementsPerScaleVectorized = elementsPerScale / 64; + + int8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; + int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; + int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; + + // if it is last iteration current stride can be smaller - lets check that + sindex *= stride; + const auto jobFinish = std::min(sindex + stride, stotal); + + for (; sindex != jobFinish; sindex++) { + __m256 svec = avx2_load_scale(pSclLocal, scale_elem_type); + for (std::size_t index = 0; index < elementsPerScale; index += 64) { + __m256i inv = _mm256_lddqu_si256(reinterpret_cast(pSrcLocal)); + __m128i* outv[8] = { + reinterpret_cast<__m128i*>(pDstLocal), + reinterpret_cast<__m128i*>(pDstLocal + 8), + reinterpret_cast<__m128i*>(pDstLocal + 16), + reinterpret_cast<__m128i*>(pDstLocal + 24), + reinterpret_cast<__m128i*>(pDstLocal + 32), + reinterpret_cast<__m128i*>(pDstLocal + 40), + reinterpret_cast<__m128i*>(pDstLocal + 48), + reinterpret_cast<__m128i*>(pDstLocal + 56), + }; + + __m256i vout0, vout1; + avx2_i4toi8(inv, &vout0, &vout1); + + int8_t tmp[64]; // FIXME: Avoid it + __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); + __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); + _mm256_storeu_si256(tmpv0, vout0); + _mm256_storeu_si256(tmpv1, vout1); + + __m128i i8vecs[8] = { + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), + }; + + __m128i vresults[8] = {avx2_i8tof16(i8vecs[0], svec), + avx2_i8tof16(i8vecs[1], svec), + avx2_i8tof16(i8vecs[2], svec), + avx2_i8tof16(i8vecs[3], svec), + avx2_i8tof16(i8vecs[4], svec), + avx2_i8tof16(i8vecs[5], svec), + avx2_i8tof16(i8vecs[6], svec), + avx2_i8tof16(i8vecs[7], svec)}; + + _mm_storeu_si128(outv[0], vresults[0]); + _mm_storeu_si128(outv[1], vresults[1]); + _mm_storeu_si128(outv[2], vresults[2]); + _mm_storeu_si128(outv[3], vresults[3]); + _mm_storeu_si128(outv[4], vresults[4]); + _mm_storeu_si128(outv[5], vresults[5]); + _mm_storeu_si128(outv[6], vresults[6]); + _mm_storeu_si128(outv[7], vresults[7]); + + pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x i4 + pDstLocal += 64; // note pDst is int16_t + } + pSclLocal += scale_elem_type.size(); + } + }; + size_t stride{1}; + + // since scaling is always 64 elements aligned operations, lets partition only in scale shape + if (unpack_options.nPartitions) { + std::size_t minPartitions; + if (!unpack_options.bStrictPartitioning) { + // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, + // so in terms of stride, it should be nElementsPerscale/64 * 2048 + const auto nIntrinsicsPerScale = elementsPerScale / 64u; + auto minScaleStride = 2048u / nIntrinsicsPerScale; + minScaleStride = std::max(1u, minScaleStride); + minPartitions = stotal / minScaleStride; + minPartitions = std::max(1u, minPartitions); + minPartitions = std::min(minPartitions, unpack_options.nPartitions); + } else { + minPartitions = unpack_options.nPartitions; + } + + // calculating stride in scale elements space + stride = static_cast(stotal / minPartitions); + } + + const size_t numWork = (stotal + stride - 1) / stride; + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(numWork, [unpack_body, stride](size_t index) { + unpack_body(index, stride); + }); + } else { + for (std::size_t index = 0; index < numWork; index++) { + unpack_body(index, stride); + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_i4f16_z(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3); + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[2] == from_shape[2]); + NPUW_ASSERT(scale_shape[1] == 1); + + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(scale_elem_type == ov::element::f32); + +#if defined(HAVE_AVX2) + // This conversion combines i4tof32 and f32tof16. Here we + // - read 256 bits (= 32 bytes, = 64 u4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const size_t C = from_shape[from_shape.size() - 3]; + const size_t H = from_shape[from_shape.size() - 2]; + const size_t W = from_shape[from_shape.size() - 1]; + + const int8_t* const pSrc = static_cast(from->data()); // 2 x i4 elements + const float* const pScl = static_cast(scale->data()); // 1 x f32 element + int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + auto unpack_body = [&](size_t job_index, size_t stride) { + size_t start_c = job_index * stride; + size_t end_c = std::min(C, start_c + stride); + + for (size_t c = start_c; c < end_c; ++c) { + for (size_t h = 0; h < H; ++h) { + for (size_t w = 0; w < W; w += 64) { + const int8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; + __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); + __m256i vout0, vout1; + avx2_i4toi8(vinput, &vout0, &vout1); + int8_t tmp[64]; // FIXME: Avoid it + __m256i* tmpv0 = reinterpret_cast<__m256i*>(tmp); + __m256i* tmpv1 = reinterpret_cast<__m256i*>(tmp + 32); + _mm256_storeu_si256(tmpv0, vout0); + _mm256_storeu_si256(tmpv1, vout1); + __m128i i8vecs[8] = { + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48)), + _mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56)), + }; + + const float* pScl_iter = pScl + w + W * c; + __m256 svalVec[8]; + for (int i = 0; i < 8; ++i) { + svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); + } + + __m128i vresults[8] = {avx2_i8tof16(i8vecs[0], svalVec[0]), + avx2_i8tof16(i8vecs[1], svalVec[1]), + avx2_i8tof16(i8vecs[2], svalVec[2]), + avx2_i8tof16(i8vecs[3], svalVec[3]), + avx2_i8tof16(i8vecs[4], svalVec[4]), + avx2_i8tof16(i8vecs[5], svalVec[5]), + avx2_i8tof16(i8vecs[6], svalVec[6]), + avx2_i8tof16(i8vecs[7], svalVec[7])}; + + int16_t* pDst_iter = pDst + w + W * h + W * H * c; + for (int i = 0; i < 8; ++i) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), vresults[i]); + } + } + } + } + }; + + size_t stride = C; + size_t num_jobs = 1; + + if (unpack_options.nPartitions) { + if (unpack_options.bStrictPartitioning) { + stride = (C + unpack_options.nPartitions - 1) / unpack_options.nPartitions; + num_jobs = unpack_options.nPartitions; + } else { + stride = std::max(1, C / unpack_options.nPartitions); + num_jobs = (C + stride - 1) / stride; + } + } + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(num_jobs, [&](size_t job_index) { + unpack_body(job_index, stride); + }); + } else { + for (size_t job_index = 0; job_index < num_jobs; ++job_index) { + unpack_body(job_index, stride); + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + NPUW_ASSERT(from->get_size() % 64 == 0); + +#if defined(HAVE_AVX2) + // This conversion combines u4i8 and i8tof16 unpacks. Here we + // - read 256 bits (= 32 bytes, = 64 i4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const std::size_t total = to->get_size(); + int8_t const* pSrc = static_cast(from->data()); // 2 x i4 elements + int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + for (std::size_t index = 0; index < total; index += 64) { + __m128i* outv[8] = { + reinterpret_cast<__m128i*>(pDst), + reinterpret_cast<__m128i*>(pDst + 8), + reinterpret_cast<__m128i*>(pDst + 16), + reinterpret_cast<__m128i*>(pDst + 24), + reinterpret_cast<__m128i*>(pDst + 32), + reinterpret_cast<__m128i*>(pDst + 40), + reinterpret_cast<__m128i*>(pDst + 48), + reinterpret_cast<__m128i*>(pDst + 56), + }; + + int8_t tmp[64]; // FIXME: Avoid it + for (std::size_t ii = 0; ii < 32; ii++) { + tmp[ii * 2] = static_cast(lo4(pSrc[ii])); // LSB is [0] -- since OpenVINO 24.0! + tmp[ii * 2 + 1] = static_cast(hi4(pSrc[ii])); // MSB is [1] -- since OpenVINO 24.0! + } + + __m128i vresults[8] = { + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 8))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 16))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 24))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 32))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 40))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 48))), + avx2_i8tof16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(tmp + 56))), + }; + + _mm_storeu_si128(outv[0], vresults[0]); + _mm_storeu_si128(outv[1], vresults[1]); + _mm_storeu_si128(outv[2], vresults[2]); + _mm_storeu_si128(outv[3], vresults[3]); + _mm_storeu_si128(outv[4], vresults[4]); + _mm_storeu_si128(outv[5], vresults[5]); + _mm_storeu_si128(outv[6], vresults[6]); + _mm_storeu_si128(outv[7], vresults[7]); + + pSrc += 32; // shift pSrc only by 32 since it is 64 x i4 + pDst += 64; // note pDst is int16_t + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4f16_scale_zp(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(zerop->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + // Only single-size ZP is supported + NPUW_ASSERT(zerop->get_size() == 1); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + // 2-channel (Symmetric) and 3-channel (group-wise) + // scale factors are supported. The scale/value loop + // iteration is based on stotal, so should work for + // both cases. + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3 || scale_shape.size() == 2); + if (scale_shape.size() == 3) { + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[1] == from_shape[1]); + NPUW_ASSERT(scale_shape[2] == 1); + } else { + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[1] == 1); + } + + const auto zerop_elem_type = zerop->get_element_type(); + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(zerop_elem_type == ov::element::u4); + NPUW_ASSERT(scale_elem_type == ov::element::f16); + +#if defined(HAVE_AVX2) + // This conversion combines u4tof32 and f32tof16. Here we + // - read 256 bits (= 32 bytes, = 64 u4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const std::size_t total = to->get_size(); + const std::size_t stotal = scale->get_size(); + const std::size_t elementsPerScale = total / stotal; + + const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements + const uint8_t* const pZer = static_cast(zerop->data()); // 1 x u4 element + const int8_t* const pScl = static_cast(scale->data()); // 1 x f16 element + const int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + const float zval = static_cast(lo4(*pZer)); // MSB - since OpenVINO 24.0! + + __m256 zvalVec = _mm256_set1_ps(zval); + + auto unpack_body = [pSrc, pDst, pScl, zvalVec, elementsPerScale, scale_elem_type, stotal](std::size_t sindex, + std::size_t stride) { + // number of vectorized operations per scale + size_t elementsPerScaleVectorized = elementsPerScale / 64; + + uint8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; + int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; + int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; + + // if it is last iteration current stride can be smaller - lets check that + sindex *= stride; + const auto jobFinish = std::min(sindex + stride, stotal); + + for (; sindex < jobFinish; sindex++) { + __m256 svalVec = avx2_load_scale(pSclLocal, scale_elem_type); + + for (std::size_t index = 0; index < elementsPerScale; index += 64) { + __m128i* outv[] = { + reinterpret_cast<__m128i*>(pDstLocal), + reinterpret_cast<__m128i*>(pDstLocal + 8), + reinterpret_cast<__m128i*>(pDstLocal + 16), + reinterpret_cast<__m128i*>(pDstLocal + 24), + reinterpret_cast<__m128i*>(pDstLocal + 32), + reinterpret_cast<__m128i*>(pDstLocal + 40), + reinterpret_cast<__m128i*>(pDstLocal + 48), + reinterpret_cast<__m128i*>(pDstLocal + 56), + }; + __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); + __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); + + // loading 256 bit u4 into unalligned memory , so 64 elements + // cannot use aligned version here like _mm256_load_si256 - segfault even on unit tests + __m256i xmmData = _mm256_lddqu_si256(reinterpret_cast<__m256i const*>(pSrcLocal)); + + // unpacking with interleaving + __m256i vht = _mm256_and_si256(xmmData, himask); + __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 + __m256i xmmUnpackedHi = _mm256_and_si256(xmmData, lomask); // 32 x i8 + + // need 4 portions of 8 x i8 elements + __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 + __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 + + __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 + __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 + + // converting to 32 x f16 + __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32LoLo, zvalVec, svalVec)}; + + __m128i f16LoHi[] = { + avx2_u8tof16_hi(unpacked32LoHi, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32LoHi, zvalVec, svalVec), + }; + + __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32HiLo, zvalVec, svalVec)}; + __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32HiHi, zvalVec, svalVec)}; + + // interleaving back + __m128i interleaved[] = {_mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]), + _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]), + _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]), + _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]), + _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]), + _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]), + _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]), + _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1])}; + + // store the results + _mm_storeu_si128(outv[0], interleaved[0]); + _mm_storeu_si128(outv[1], interleaved[1]); + _mm_storeu_si128(outv[2], interleaved[2]); + _mm_storeu_si128(outv[3], interleaved[3]); + _mm_storeu_si128(outv[4], interleaved[4]); + _mm_storeu_si128(outv[5], interleaved[5]); + _mm_storeu_si128(outv[6], interleaved[6]); + _mm_storeu_si128(outv[7], interleaved[7]); + + pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x u4 + pDstLocal += 64; // note pDst is int16_t, so 64 x f16 -> 64 elements + } // for(index) + pSclLocal += scale_elem_type.size(); + } // for(sindex) + }; + + size_t stride{1}; + + // since scaling is always 64 elements aligned operations, lets partition only in scale shape + if (unpack_options.nPartitions) { + std::size_t minPartitions; + if (!unpack_options.bStrictPartitioning) { + // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, + // so in terms of stride, it should be nElementsPerscale/64 * 2048 + const auto nIntrinsicsPerScale = elementsPerScale / 64u; + auto minScaleStride = 2048u / nIntrinsicsPerScale; + minScaleStride = std::max(1u, minScaleStride); + minPartitions = stotal / minScaleStride; + minPartitions = std::max(1u, minPartitions); + minPartitions = std::min(minPartitions, unpack_options.nPartitions); + } else { + minPartitions = unpack_options.nPartitions; + } + + // calculating stride in scale elements space + stride = static_cast(stotal / minPartitions); + } + + const size_t numWork = (stotal + stride - 1) / stride; + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(numWork, [unpack_body, stride](size_t index) { + unpack_body(index, stride); + }); + } else { + for (std::size_t index = 0; index < numWork; index++) { + unpack_body(index, stride); + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4f16_asymm_zp(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(zerop->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + // 3-channel (group-wise) scale factors are + // supported. + + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3); + if (scale_shape.size() == 3) { + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[1] == from_shape[1]); + NPUW_ASSERT(scale_shape[2] == 1); + } + + const auto& zerop_shape = zerop->get_shape(); + NPUW_ASSERT(zerop_shape.size() == 3); + if (zerop_shape.size() == 3) { + NPUW_ASSERT(zerop_shape[0] == from_shape[0]); + NPUW_ASSERT(zerop_shape[1] == from_shape[1]); + NPUW_ASSERT(zerop_shape[2] == 1); + } + + const auto zerop_elem_type = zerop->get_element_type(); + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(zerop_elem_type == ov::element::u4); + NPUW_ASSERT(scale_elem_type == ov::element::f16); + +#if defined(HAVE_AVX2) + // This conversion combines u4tof32 and f32tof16. Here we + // - read 256 bits (= 32 bytes, = 64 u4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const std::size_t total = to->get_size(); + const std::size_t stotal = scale->get_size(); + const std::size_t elementsPerScale = total / stotal; + + const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements + const uint8_t* const pZer = static_cast(zerop->data()); // 2 x u4 element + const int8_t* const pScl = static_cast(scale->data()); // 1 x f16 element + const int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + auto unpack_body = [pSrc, pDst, pScl, pZer, elementsPerScale, scale_elem_type, zerop_elem_type, stotal]( + std::size_t sindex, + std::size_t stride) { + // number of vectorized operations per scale + size_t elementsPerScaleVectorized = elementsPerScale / 64; + + uint8_t const* pSrcLocal = pSrc + 32 * elementsPerScaleVectorized * sindex * stride; + int8_t const* pSclLocal = pScl + scale_elem_type.size() * sindex * stride; + uint8_t const* pZerLocal = pZer + zerop_elem_type.size() * sindex * stride / 2; + int16_t* pDstLocal = const_cast(pDst) + 64 * elementsPerScaleVectorized * sindex * stride; + + // if it is last iteration current stride can be smaller - lets check that + sindex *= stride; + const auto jobFinish = std::min(sindex + stride, stotal); + + for (; sindex < jobFinish; sindex++) { + __m256 svalVec = avx2_load_scale(pSclLocal, scale_elem_type); + __m256 zvalVec = _mm256_set1_ps(static_cast((sindex % 2 == 0) ? lo4(*pZerLocal) : hi4(*pZerLocal))); + + for (std::size_t index = 0; index < elementsPerScale; index += 64) { + __m128i* outv[] = { + reinterpret_cast<__m128i*>(pDstLocal), + reinterpret_cast<__m128i*>(pDstLocal + 8), + reinterpret_cast<__m128i*>(pDstLocal + 16), + reinterpret_cast<__m128i*>(pDstLocal + 24), + reinterpret_cast<__m128i*>(pDstLocal + 32), + reinterpret_cast<__m128i*>(pDstLocal + 40), + reinterpret_cast<__m128i*>(pDstLocal + 48), + reinterpret_cast<__m128i*>(pDstLocal + 56), + }; + __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); + __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); + + // loading 256 bit u4 into unalligned memory , so 64 elements + // cannot use aligned version here like _mm256_load_si256 - segfault even on unit tests + __m256i xmmData = _mm256_lddqu_si256(reinterpret_cast<__m256i const*>(pSrcLocal)); + + // unpacking with interleaving + __m256i vht = _mm256_and_si256(xmmData, himask); + __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 + __m256i xmmUnpackedHi = _mm256_and_si256(xmmData, lomask); // 32 x i8 + + // need 4 portions of 8 x i8 elements + __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 + __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 + + __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 + __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 + + // converting to 32 x f16 + __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32LoLo, zvalVec, svalVec)}; + + __m128i f16LoHi[] = { + avx2_u8tof16_hi(unpacked32LoHi, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32LoHi, zvalVec, svalVec), + }; + + __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32HiLo, zvalVec, svalVec)}; + __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, svalVec), + avx2_u8tof16_lo(unpacked32HiHi, zvalVec, svalVec)}; + + // interleaving back + __m128i interleaved[] = {_mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]), + _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]), + _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]), + _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]), + _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]), + _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]), + _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]), + _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1])}; + + // store the results + _mm_storeu_si128(outv[0], interleaved[0]); + _mm_storeu_si128(outv[1], interleaved[1]); + _mm_storeu_si128(outv[2], interleaved[2]); + _mm_storeu_si128(outv[3], interleaved[3]); + _mm_storeu_si128(outv[4], interleaved[4]); + _mm_storeu_si128(outv[5], interleaved[5]); + _mm_storeu_si128(outv[6], interleaved[6]); + _mm_storeu_si128(outv[7], interleaved[7]); + + pSrcLocal += 32; // shift pSrc only by 32 since it is 64 x u4 + pDstLocal += 64; // note pDst is int16_t, so 64 x f16 -> 64 elements + } // for(index) + pSclLocal += scale_elem_type.size(); + if (sindex % 2 == 1) { + pZerLocal += zerop_elem_type.size(); + } + } // for(sindex) + }; + + size_t stride{1}; + + // since scaling is always 64 elements aligned operations, lets partition only in scale shape + if (unpack_options.nPartitions) { + std::size_t minPartitions; + if (!unpack_options.bStrictPartitioning) { + // some heuristics that every tbb thread workload has to have 2048 x intrinsics operations at least, + // so in terms of stride, it should be nElementsPerscale/64 * 2048 + const auto nIntrinsicsPerScale = elementsPerScale / 64u; + auto minScaleStride = 2048u / nIntrinsicsPerScale; + minScaleStride = std::max(1u, minScaleStride); + minPartitions = stotal / minScaleStride; + minPartitions = std::max(1u, minPartitions); + minPartitions = std::min(minPartitions, unpack_options.nPartitions); + } else { + minPartitions = unpack_options.nPartitions; + } + + // calculating stride in scale elements space + stride = static_cast(stotal / minPartitions); + } + + const size_t numWork = (stotal + stride - 1) / stride; + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(numWork, [unpack_body, stride](size_t index) { + unpack_body(index, stride); + }); + } else { + for (std::size_t index = 0; index < numWork; index++) { + unpack_body(index, stride); + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4f16_z(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(zerop->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + // Only single-size ZP is supported + NPUW_ASSERT(zerop->get_size() == 1); + + const auto& from_shape = from->get_shape(); + NPUW_ASSERT(from_shape.back() % 64 == 0); + + const auto& scale_shape = scale->get_shape(); + NPUW_ASSERT(scale_shape.size() == 3); + NPUW_ASSERT(scale_shape[0] == from_shape[0]); + NPUW_ASSERT(scale_shape[2] == from_shape[2]); + NPUW_ASSERT(scale_shape[1] == 1); + + const auto zerop_elem_type = zerop->get_element_type(); + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(zerop_elem_type == ov::element::f32); + NPUW_ASSERT(scale_elem_type == ov::element::f32); + +#if defined(HAVE_AVX2) + // This conversion combines u4tof32 and f32tof16. Here we + // - read 256 bits (= 32 bytes, = 64 u4 elements) + // - write 1024 bits (= 128 bytes, = 64 f16 elements) + // per every iteration, what translates to (from->size() / 64) iterations + + const size_t C = from_shape[from_shape.size() - 3]; + const size_t H = from_shape[from_shape.size() - 2]; + const size_t W = from_shape[from_shape.size() - 1]; + + const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements + const float* const pScl = static_cast(scale->data()); // 1 x f32 element + int16_t* pDst = static_cast(to->data()); // 1 x f16 element + + const float zval = avx2_load_f32(reinterpret_cast(zerop->data()), zerop_elem_type); + __m256 zvalVec = _mm256_set1_ps(zval); + + auto unpack_body = [&](size_t job_index, size_t stride) { + size_t start_c = job_index * stride; + size_t end_c = std::min(C, start_c + stride); + + for (size_t c = start_c; c < end_c; ++c) { + for (size_t h = 0; h < H; ++h) { + for (size_t w = 0; w < W; w += 64) { + const uint8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; + __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); + const float* pScl_iter = pScl + w + W * c; + int16_t* pDst_iter = pDst + w + W * h + W * H * c; + + __m256 svalVec[8]; + for (int i = 0; i < 8; ++i) { + svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); + } + + // vectorized unpack u4 to f16 + __m128i htmp[8]; // 64 x f16 + avx2_u4tof16(vinput, htmp, zvalVec, svalVec); + + for (int i = 0; i < 8; ++i) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), htmp[i]); + } + } + } + } + }; + + size_t stride = C; + size_t num_jobs = 1; + + if (unpack_options.nPartitions) { + if (unpack_options.bStrictPartitioning) { + stride = (C + unpack_options.nPartitions - 1) / unpack_options.nPartitions; + num_jobs = unpack_options.nPartitions; + } else { + stride = std::max(1, C / unpack_options.nPartitions); + num_jobs = (C + stride - 1) / stride; + } + } + + if (unpack_options.bUseOvParallelFor) { + ov::parallel_for(num_jobs, [&](size_t job_index) { + unpack_body(job_index, stride); + }); + } else { + for (size_t job_index = 0; job_index < num_jobs; ++job_index) { + unpack_body(job_index, stride); + } + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u4f32(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + + uint8_t const* pSrc = static_cast(from->data()); // 2 x u4 elements + float* pDst = static_cast(to->data()); // 1 x f32 element + + const std::size_t total = from->get_size(); + for (std::size_t index = 0; index < total; index += 2) { + pDst[0] = static_cast(lo4(*pSrc)); // LSB is [0] - since OpenVINO 2024.0! + pDst[1] = static_cast(hi4(*pSrc)); // MSB is [1] - since OpenVINO 2024.0! + pSrc++; + pDst += 2; + } +} + +void ov::npuw::util::XARCH::unpack_i8f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + NPUW_ASSERT(from->get_size() % 8 == 0); + +#if defined(HAVE_AVX2) + constexpr std::size_t VECSIZE = 8; + + const std::size_t total = from->get_size(); + int8_t const* pSrc = from->data(); + int16_t* pDst = static_cast(to->data()); + + for (std::size_t index = 0; index < total; index += VECSIZE) { + const __m128i* pSrcV = reinterpret_cast(pSrc); + __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); + __m128i i8vec = _mm_loadl_epi64(pSrcV); // load: 8 x i8 [ 64b of 128b] + __m128i f16vec = avx2_i8tof16(i8vec); + _mm_store_si128(pDstV, f16vec); // store: 8 x f16 [128b] + pSrc += 8; + pDst += 8; + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_i8f16_scale(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + NPUW_ASSERT(from->get_size() % 8 == 0); + NPUW_ASSERT(scale->get_shape()[0] == from->get_shape()[0]); + NPUW_ASSERT(scale->get_shape()[1] == 1); + + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); + +#if defined(HAVE_AVX2) + constexpr std::size_t VECSIZE = 8; + + const std::size_t total = from->get_size(); + const std::size_t stotal = scale->get_size(); + int8_t const* pSrc = from->data(); + int8_t const* pScl = static_cast(scale->data()); + int16_t* pDst = static_cast(to->data()); + + for (std::size_t sindex = 0u; sindex < stotal; sindex++) { + __m256 svec = avx2_load_scale(pScl, scale_elem_type); + for (std::size_t index = 0u; index < (total / stotal); index += VECSIZE) { + __m128i const* pSrcV = reinterpret_cast(pSrc); + __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); + __m128i i8vec = _mm_loadl_epi64(pSrcV); // load: 8 x i8 [ 64b of 128b] + __m128i f16vec = avx2_i8tof16(i8vec, svec); // convert & scale + _mm_store_si128(pDstV, f16vec); // store: 8 x f16 [128b] + pSrc += 8; + pDst += 8; + } // index + pScl += scale_elem_type.size(); + } // sindex +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +void ov::npuw::util::XARCH::unpack_u8f16(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& _options) { + NPUW_ASSERT(from->is_continuous()); + NPUW_ASSERT(zerop->is_continuous()); + NPUW_ASSERT(scale->is_continuous()); + NPUW_ASSERT(to->is_continuous()); + NPUW_ASSERT(from->get_size() == to->get_size()); + NPUW_ASSERT(from->get_size() % 8 == 0); + NPUW_ASSERT(scale->get_shape()[0] == from->get_shape()[0]); + NPUW_ASSERT(scale->get_shape()[1] == 1); + NPUW_ASSERT(zerop->get_shape()[0] == from->get_shape()[0]); + NPUW_ASSERT(zerop->get_shape()[1] == 1); + + const auto scale_elem_type = scale->get_element_type(); + NPUW_ASSERT(scale_elem_type == ov::element::f32 || scale_elem_type == ov::element::f16); + + const auto zerop_elem_type = zerop->get_element_type(); + NPUW_ASSERT(zerop_elem_type == ov::element::u8); + +#if defined(HAVE_AVX2) + constexpr std::size_t VECSIZE = 8; + + const std::size_t total = from->get_size(); + const std::size_t stotal = scale->get_size(); + uint8_t const* pSrc = from->data(); + uint8_t const* pZrp = zerop->data(); + int8_t const* pScl = static_cast(scale->data()); + int16_t* pDst = static_cast(to->data()); + + for (std::size_t sindex = 0u; sindex < stotal; sindex++) { + __m256 svec = avx2_load_scale(pScl, scale_elem_type); + __m128i u8zp = _mm_set1_epi8(*pZrp); // bcast: 8 x u8 + __m256i u32zp = _mm256_cvtepu8_epi32(u8zp); // i32 zero point + __m256 f32zp = _mm256_cvtepi32_ps(u32zp); // f32 zero point + for (std::size_t index = 0u; index < (total / stotal); index += VECSIZE) { + __m128i const* pSrcV = reinterpret_cast(pSrc); + __m128i* pDstV = reinterpret_cast<__m128i*>(pDst); + __m128i u8in = _mm_loadl_epi64(pSrcV); // load: 8 x u8 + __m128i f16vec = avx2_u8tof16(u8in, f32zp, svec); // convert & scale + _mm_store_si128(pDstV, f16vec); // store: 8 x f16 + pSrc += VECSIZE; + pDst += VECSIZE; + } // index + pScl += scale_elem_type.size(); + pZrp++; + } // sindex +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif +} + +ov::Tensor ov::npuw::util::XARCH::to_f16(const ov::Tensor& t) { + ov::Shape shape = t.get_shape(); + NPUW_ASSERT(t.get_element_type() == ov::element::f32); + NPUW_ASSERT(t.get_size() % 8 == 0); + NPUW_ASSERT(t.is_continuous()); + + ov::Tensor tnew(ov::element::f16, shape); + +#if defined(HAVE_AVX2) + const float* psrc = t.data(); + uint8_t* pdst = static_cast(tnew.data()); + + for (std::size_t i = 0; i < t.get_size() / 8; i++) { + __m256 vsrc = _mm256_loadu_ps(psrc); + __m128i vout = _mm256_cvtps_ph(vsrc, _MM_FROUND_TO_NEAREST_INT); + __m128i* pout = reinterpret_cast<__m128i*>(pdst); + _mm_storeu_si128(pout, vout); + psrc += 8; // offset in sizeof(float) + pdst += (8 * 2); // offset in bytes + } +#else + OPENVINO_THROW("AVX2 support is neccessary but it's not enabled!"); +#endif + return tnew; +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp new file mode 100644 index 00000000000000..0f0d9912f3b221 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/npuw/util_xarch.hpp @@ -0,0 +1,88 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "logging.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/runtime/itensor.hpp" +#include "openvino/runtime/so_ptr.hpp" +#include "util.hpp" + +namespace ov { +namespace npuw { +namespace util { +namespace XARCH { + +void unpack_i4i8(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4i8(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_i4f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_i4f16_scale(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_i4f16_z(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4f16_scale_zp(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4f16_asymm_zp(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4f16_z(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u4f32(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_i8f16(const ov::SoPtr& from, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_i8f16_scale(const ov::SoPtr& from, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& unpack_options); + +void unpack_u8f16(const ov::SoPtr& from, + const ov::SoPtr& zerop, + const ov::SoPtr& scale, + const ov::SoPtr& to, + const ov::npuw::util::UnpackOptions& _options); + +ov::Tensor to_f16(const ov::Tensor& t); + +} // namespace XARCH +} // namespace util +} // namespace npuw +} // namespace ov From 54db50b893b72990a10381710edb2bf4b506f78e Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Thu, 17 Oct 2024 04:43:10 -0700 Subject: [PATCH 052/112] [GPU] Fix weight reorder src format to avoid inconsistency ndims of src/dst in weight reorder (#27051) ### Details: - Fix weight reorder src format to avoid inconsistancy ndims of src/dst in weight reorder ### Tickets: - 154614 --- src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index b8ff112cead147..19ea02c7c66d28 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -596,6 +596,14 @@ bool keep_weights_reorder_shape_consistent(cldnn::layout& layout, const dnnl::me // Check whether they have same values and orders. if (filtered_target_dims == filtered_desc_dims) { layout.set_partial_shape(desc_dims); + if (layout.get_rank() != desc_dims.size()) { + if (cldnn::format::is_default_format(layout.format)) { + layout.format = cldnn::format::get_default_format(desc_dims.size()); + } else { + // TO-DO: Consider that weight format is not default format + return false; + } + } return true; } else { return false; From 2f62be0a7ba139e6ed1580d2b48cef6d3d986127 Mon Sep 17 00:00:00 2001 From: Luo Cheng Date: Thu, 17 Oct 2024 21:17:08 +0800 Subject: [PATCH 053/112] [CPU] Support different Key/Value head size and not multiples 16 of head size length for SDPA/PA (#26945) ### Details: - *Support different kv head size for SDPA and PagedAttention* - *Support not 16 times kv head size for PagedAttention* ### Tickets: - *[152445](https://jira.devtools.intel.com/browse/CVS-152445)* - *[145986](https://jira.devtools.intel.com/browse/CVS-145986)* --- .../state_management_pattern.cpp | 8 +- src/core/src/op/paged_attention.cpp | 28 ++- .../nodes/kernels/scaled_attn/attn_memcpy.cpp | 20 +- .../nodes/kernels/scaled_attn/attn_quant.cpp | 9 +- .../nodes/kernels/scaled_attn/executor_pa.cpp | 108 +++++++---- .../kernels/scaled_attn/mha_single_token.cpp | 21 +- .../intel_cpu/src/nodes/paged_attn.cpp | 22 ++- .../intel_cpu/src/nodes/scaled_attn.cpp | 181 ++++++++++-------- .../shape_inference/custom/scaled_attn.cpp | 32 +++- .../cpu_opset/common/op/sdpa.cpp | 37 +++- .../subgraph_tests/src/arm/concat_sdp.cpp | 1 + .../subgraph_tests/src/classes/concat_sdp.cpp | 49 +++-- .../subgraph_tests/src/classes/concat_sdp.hpp | 6 +- .../subgraph_tests/src/common/concat_sdp.cpp | 1 + .../subgraph_tests/src/x64/concat_sdp.cpp | 1 + 15 files changed, 336 insertions(+), 188 deletions(-) diff --git a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp index c259e9387d9dd0..28e7cd90019b34 100644 --- a/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp +++ b/src/common/transformations/src/transformations/sdpa_to_paged_attention/state_management_pattern.cpp @@ -383,12 +383,18 @@ ov::pass::StateManagementPattern::StateManagementPattern(ParameterVector& kv_par auto paged_attention = std::make_shared(pa_arguments); + // The output shape of PagedAttention will be converted to [batch, 1, head_num, head_size_v], the head_size_v + // may be different from head_size_q/head_size_k. The head_size_v could be got from the shape of value input + auto hidden_dim_v = std::make_shared(std::make_shared(v_target_layout), + v0::Constant::create(element::i64, Shape{}, {-1}), + v0::Constant::create(element::i64, Shape{}, {0})); + auto pa_shape = std::make_shared( OutputVector{ v0::Constant::create(element::i64, Shape{1}, {0}), v0::Constant::create(element::i64, Shape{1}, {1}), v0::Constant::create(element::i64, Shape{1}, {-1}), - std::make_shared(hidden_dim, v0::Constant::create(element::i64, Shape{}, {0})), + std::make_shared(hidden_dim_v, v0::Constant::create(element::i64, Shape{}, {0})), }, 0); auto pa_reshape = std::make_shared(paged_attention->output(0), pa_shape, true); diff --git a/src/core/src/op/paged_attention.cpp b/src/core/src/op/paged_attention.cpp index 261b0ce1c47605..cdcb66e86ee33e 100644 --- a/src/core/src/op/paged_attention.cpp +++ b/src/core/src/op/paged_attention.cpp @@ -4,6 +4,7 @@ #include "openvino/op/paged_attention.hpp" +#include "dimension_util.hpp" #include "itt.hpp" #include "openvino/op/op.hpp" @@ -146,10 +147,33 @@ void PagedAttentionExtension::validate_and_infer_types() { get_input_element_type(12), "."); + // value head_size may be not same with key + auto out_ps = get_input_partial_shape(0); + const auto& key_ps = get_input_partial_shape(1); + const auto& value_ps = get_input_partial_shape(2); + if (out_ps.rank().is_static()) { + if (key_ps.rank().is_static() && value_ps.rank().is_static() && key_ps[1].is_static()) { + // The dim of out_ps[1] should be `num_heads * v_head_size`, it can be got from: + // because: + // q: query_ps[1] = num_heads * head_size + // k: key_ps[1] = num_kv_heads * head_size + // v: value_ps[1] = num_kv_heads * v_head_size + // therefore: + // q * v / k = (num_heads * head_size) * (num_kv_heads * v_head_size) / + // (num_kv_heads * head_size) = num_heads * v_head_size + out_ps[1] = out_ps[1] * value_ps[1] / key_ps[1].get_length(); + NODE_VALIDATION_CHECK(this, + !ov::util::dim::is_empty(out_ps[1]), + "The last dimension of output should not be empty."); + } else { + out_ps[1] = Dimension::dynamic(); + } + } + if (m_output_type[0] == ov::element::undefined) { - set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); + set_output_type(0, get_input_element_type(0), out_ps); } else { - set_output_type(0, m_output_type[0], get_input_partial_shape(0)); + set_output_type(0, m_output_type[0], out_ps); } if (m_output_type[1] == ov::element::undefined) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp index 21d8fbbe6e298f..755330bd850c4d 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_memcpy.cpp @@ -51,16 +51,14 @@ void attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& past_k_output, const ov::intel_cpu::PlainTensor& past_v_output) { // For compatibility, all input_kvs are permuted to BHLS - size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; - // Internal LBHS layout has strides[L] > strides[B] - assert(past_k_output.m_strides[2] >= past_k_output.m_strides[0]); + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3], SV = v_input.m_dims[3]; parallel_for3d(L1, B, H, [&](size_t m, size_t b, size_t h) { attn_copy(past_k_output.ptr(b, h, m, 0), k_input.ptr(b, h, m, 0), S); attn_copy(past_v_output.ptr(b, h, m, 0), v_input.ptr(b, h, m, 0), - S); + SV); }); } @@ -69,16 +67,14 @@ static void attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& past_k_output, const ov::intel_cpu::PlainTensor& past_v_output) { // For compatibility, all input_kvs are permuted to BHLS - size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; - // Internal LBHS layout has strides[L] > strides[B] - assert(past_k_output.m_strides[2] >= past_k_output.m_strides[0]); + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3], SV = v_input.m_dims[3]; parallel_for3d(L1, B, H, [&](size_t m, size_t b, size_t h) { std::memcpy(past_k_output.ptr_v(b, h, m, 0), k_input.ptr_v(b, h, m, 0), S * k_input.m_element_size); std::memcpy(past_v_output.ptr_v(b, h, m, 0), v_input.ptr_v(b, h, m, 0), - S * v_input.m_element_size); + SV * v_input.m_element_size); }); } @@ -88,7 +84,7 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& past_k_output, const ov::intel_cpu::PlainTensor& past_v_output, const ov::intel_cpu::PlainTensor& slot_mapping) { - size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3], SV = v_input.m_dims[3]; size_t block_size = past_k_output.m_dims[2]; parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; @@ -100,7 +96,7 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, S); attn_copy(past_v_output.ptr(block_number, h, block_offset, 0), v_input.ptr(b, h, m, 0), - S); + SV); }); } @@ -109,7 +105,7 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, const ov::intel_cpu::PlainTensor& past_k_output, const ov::intel_cpu::PlainTensor& past_v_output, const ov::intel_cpu::PlainTensor& slot_mapping) { - size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3]; + size_t B = k_input.m_dims[0], H = k_input.m_dims[1], L1 = k_input.m_dims[2], S = k_input.m_dims[3], SV = v_input.m_dims[3]; size_t block_size = past_k_output.m_dims[2]; parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; @@ -121,7 +117,7 @@ static void paged_attn_memcpy_kernel(const ov::intel_cpu::PlainTensor& k_input, S * k_input.m_element_size); std::memcpy(past_v_output.ptr_v(block_number, h, block_offset, 0), v_input.ptr_v(b, h, m, 0), - S * v_input.m_element_size); + SV * v_input.m_element_size); }); } diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp index d95f973fa9f2f0..66772bda03db51 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/attn_quant.cpp @@ -178,8 +178,7 @@ static void attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, const ov::intel_cpu::PlainTensor& k_scale_zp, const ov::intel_cpu::PlainTensor& v_scale_zp) { // For compatibility, all input_kvs are permuted to BHLS - size_t B = k_src.m_dims[0], H = k_src.m_dims[1], L1 = k_src.m_dims[2], S = k_src.m_dims[3]; - // Internal LBHS layout has strides[L] > strides[B] + size_t B = k_src.m_dims[0], H = k_src.m_dims[1], L1 = k_src.m_dims[2], S = k_src.m_dims[3], SV = v_src.m_dims[3]; parallel_for3d(L1, B, H, [&](size_t m, size_t b, size_t h) { auto p_k = k_scale_zp.ptr(m, b, h); auto p_v = v_scale_zp.ptr(m, b, h); @@ -190,7 +189,7 @@ static void attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, p_k[1]); quant_u8(v_src.ptr(b, h, m), v_dst.ptr(b, h, m), - S, + SV, p_v[0], p_v[1]); }); @@ -202,7 +201,7 @@ static void paged_attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, const ov::intel_cpu::PlainTensor& k_dst, const ov::intel_cpu::PlainTensor& v_dst, const ov::intel_cpu::PlainTensor& slot_mapping) { - size_t B = k_src.m_dims[0], H = k_src.m_dims[1], L1 = k_src.m_dims[2], S = k_src.m_dims[3]; + size_t B = k_src.m_dims[0], H = k_src.m_dims[1], L1 = k_src.m_dims[2], S = k_src.m_dims[3], SV = v_src.m_dims[3]; size_t block_size = k_dst.m_dims[2]; parallel_for3d(B, L1, H, [&](size_t b, size_t m, size_t h) { auto slot = slot_mapping.ptr(b)[m]; @@ -221,7 +220,7 @@ static void paged_attn_quant_mt(const ov::intel_cpu::PlainTensor& k_src, p_k[1]); quant_u8(v_src.ptr(b, h, m), v_dst.ptr(block_number, h, block_offset) + sizeof(float) + sizeof(float), - S, + SV, p_v[0], p_v[1]); }); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp index 971aa6bb58c994..bef34881ca41bc 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp @@ -708,14 +708,38 @@ static void pack_32x16_kernel(T* dst, T* src, size_t dst_stride, size_t src_stri } template::value || std::is_same::value), bool>::type> -static void pack_32Nx16K(T* dst, T* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +static void pack_32xK_kernel(T* dst, T* src, size_t dst_stride, size_t src_stride, size_t K) { + static const uint64_t idx[8] = {0, 4, 1, 5, 2, 6, 3, 7}; + auto midx = _mm512_loadu_si512(idx); + __mmask16 mask = (1 << K) - 1; + for (size_t i = 0; i < K; i++) { + auto x = _mm256_maskz_loadu_epi16(mask, src); // [a1 a2 a3 a4] total 256-bits in 4 64bits unit + auto y = _mm256_maskz_loadu_epi16(mask, src + src_stride); // [b1 b2 b3 b4] total 256-bits + auto a = _mm512_castsi256_si512(x); + auto b = _mm512_castsi256_si512(y); + a = _mm512_permutexvar_epi64(midx, a); // [a1 x | a2 x | a3 x | a4 x] + b = _mm512_permutexvar_epi64(midx, b); // [b1 x | b2 x | b3 x | b4 x] + auto B0 = _mm512_unpacklo_epi16(a, b); + _mm512_mask_storeu_epi32(dst, mask, B0); + src += 2 * src_stride; + dst += 2 * dst_stride; + } +} + +template::value || std::is_same::value), bool>::type> +static void pack_32NxK(T* dst, T* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { for (size_t n = 0; n < N; n += 32) { size_t k = 0; for (; k + 32 <= K; k += 32) { pack_32x32_kernel(dst + k * 2, src + k, dst_stride, src_stride); } - if (k < K) + if (k + 16 <= K) { pack_32x16_kernel(dst + k * 2, src + k, dst_stride, src_stride); + k += 16; + } + if (k < K) { + pack_32xK_kernel(dst + k * 2, src + k, dst_stride, src_stride, K - k); + } dst += 32 * dst_stride; src += 32 * src_stride; @@ -723,7 +747,7 @@ static void pack_32Nx16K(T* dst, T* src, T* tmp, size_t N, size_t K, size_t dst_ } template::value || std::is_same::value), bool>::type> -static void pack_32Nx16K(T* dst, uint8_t* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +static void pack_32NxK(T* dst, uint8_t* src, T* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { // The layout for per token per head: // |scale(f32)|zeropoint(f32)|quantized feature(u8,idx_1)|quantized feature(u8,idx_2)|...|quantized feature(u8,idx_S)| // The quantized feature will start from 8bytes=sizeof(float)+sizeof(float) @@ -735,14 +759,14 @@ static void pack_32Nx16K(T* dst, uint8_t* src, T* tmp, size_t N, size_t K, size_ s += src_stride + 2 * sizeof(float); t += src_stride; } - pack_32Nx16K(dst, tmp, reinterpret_cast(0), N, K, dst_stride, src_stride); + pack_32NxK(dst, tmp, reinterpret_cast(0), N, K, dst_stride, src_stride); } #endif template -static void pack_32Nx16K(float* dst, T* src, float* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { +static void pack_32NxK(float* dst, T* src, float* tmp, size_t N, size_t K, size_t dst_stride, size_t src_stride) { // never called - OPENVINO_THROW("pack_32Nx16K: should not be called."); + OPENVINO_THROW("pack_32NxK: should not be called."); } template @@ -750,6 +774,7 @@ struct MHAHelper { // initialize once size_t _H; size_t _S; + size_t _SV; size_t _Hk; size_t _h_each_group_len; size_t _block_size; @@ -785,7 +810,7 @@ struct MHAHelper { _weight.resize({size_t{1}, size_t{1}, size_t{1}, size_t{1}}); } - void init(size_t H, size_t S, size_t Hk, size_t h_each_group_len, size_t block_size, size_t sliding_window, + void init(size_t H, size_t S, size_t SV, size_t Hk, size_t h_each_group_len, size_t block_size, size_t sliding_window, float d_scale, size_t kv_len, bool init_alibi_lookup) { // query shape: [B, H, L, S] // present_key shape: [block, H, 32, S] @@ -799,6 +824,7 @@ struct MHAHelper { auto in_type = precision_of::value; _H = H; _S = S; + _SV = SV; _Hk = Hk; _h_each_group_len = h_each_group_len; _block_size = block_size; @@ -811,7 +837,7 @@ struct MHAHelper { auto new_score_stride = std::max(prev_score_stride, want_score_stride); // resize temporary buffers, weight.size(3) will be aligned to block_size _weight.resize({static_cast(_nthr), H, _block_size, new_score_stride}); - _output.resize({static_cast(_nthr), _block_size, H, S}); + _output.resize({static_cast(_nthr), _block_size, H, SV}); // TODO: kernel supports stride if (_qk_gemm.empty() || prev_score_stride < new_score_stride) { @@ -828,20 +854,20 @@ struct MHAHelper { false, in_type); _wv_gemm[i] = std::make_shared(i + 1, - _S, + _SV, _block_size, // if it's bf16, the stride needs double due to reuse float buffer (in_type == ov::element::Type_t::f32 ? 1 : 2) * _weight.stride(2), - _S, + _SV, _output.stride(1), false, in_type); _wv_gemm_acc[i] = std::make_shared(i + 1, - _S, + _SV, _block_size, // if it's bf16, the stride needs double due to reuse float buffer (in_type == ov::element::Type_t::f32 ? 1 : 2) * _weight.stride(2), - _S, + _SV, _output.stride(1), false, in_type, @@ -881,7 +907,7 @@ struct MHAHelper { void init_reorder_buffers(size_t batch, size_t kv_len_in_blocks) { _qk_scratch_b.resize({batch, kv_len_in_blocks, _Hk, _block_size * _S}); - _wv_scratch_b.resize({batch, kv_len_in_blocks, _Hk, _block_size * rnd_up(_S, _block_size)}); + _wv_scratch_b.resize({batch, kv_len_in_blocks, _Hk, _block_size * rnd_up(_SV, _block_size)}); } void init_score_buffers(const PlainTensor& past_lens, const PlainTensor& subsequence_begins) { @@ -992,7 +1018,7 @@ struct MHAHelper { // reuse float buffer, need to use float to compute offset auto* w_ptr = reinterpret_cast(_weight.ptr(ithr, h, 0, 0)); - float* fp32_out_ptr = q_is_xf16 ? _output.ptr(ithr, 0, h, 0) : output_emb.ptr(q_start, h * _S); + float* fp32_out_ptr = q_is_xf16 ? _output.ptr(ithr, 0, h, 0) : output_emb.ptr(q_start, h * _SV); // for each weight block, loop through all value block for (size_t v_blk = 0; v_blk < cur_kv_len_blocks; v_blk++) { @@ -1020,12 +1046,12 @@ struct MHAHelper { } if (q_is_xf16) { attn_memcpy2d_kernel(_output.ptr(ithr, 0, h, 0), - output_emb.ptr(q_start, h * _S), + output_emb.ptr(q_start, h * _SV), ov::element::f32, precision_of::value, _output.stride(1), output_emb.stride(0), - _S, + _SV, q_cnt); } } @@ -1091,7 +1117,7 @@ struct MHAHelper { } } - memset(_output.ptr(ithr), 0, q_len * _H * _S * sizeof(float)); + memset(_output.ptr(ithr), 0, q_len * _H * _SV * sizeof(float)); for (size_t pv = 0, i = 0; pv < cur_kv_len; pv += _block_size, i++) { auto block_number = block_table[i]; auto* v = present_value.ptr(block_number, hk); @@ -1100,7 +1126,7 @@ struct MHAHelper { attn_acc_value_block(_output.ptr(ithr, pq, h), _weight.ptr(ithr, h, pq) + pv, v, - _S, + _SV, std::min(_block_size, cur_kv_len - pv)); } } @@ -1108,7 +1134,7 @@ struct MHAHelper { // convert to dst for (size_t pq = 0; pq < q_len; pq++) for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) - cvt_copy(output_emb.ptr(pq, h * _S), _output.ptr(ithr, pq, h), _S); + cvt_copy(output_emb.ptr(pq, h * _SV), _output.ptr(ithr, pq, h), _SV); } // compute one token, loop along batch, head dimensions and kv_len, it's special for very long kv_len with small batch tokens. @@ -1197,7 +1223,7 @@ struct MHAHelper { } // attn_w * V - _output_bhl.resize({static_cast(_nthr), B, q_len, _H, _S}); + _output_bhl.resize({static_cast(_nthr), B, q_len, _H, _SV}); // m_attn_w {B, H, q_len, kv_len} parallel_nt_static(_nthr, [&](const size_t ithr, const size_t nthr) { memset(_output_bhl.ptr(ithr, 0, 0, 0, 0), 0, _output_bhl.stride(0) * sizeof(float)); @@ -1216,7 +1242,7 @@ struct MHAHelper { attn_acc_value_block(_output_bhl.ptr(ithr, b, pq, h), _weight_bhl.ptr(b, h, pq) + pv, v, - _S, + _SV, std::min(_block_size, context_len - pv)); } } @@ -1226,8 +1252,8 @@ struct MHAHelper { parallel_for3d(B, _H, q_len, [&](size_t b, size_t h, size_t pq) { auto* temp = _output_bhl.ptr(0, b, pq, h); size_t temp_stride = _output_bhl.stride(0); - auto* dst = output_emb.ptr(b, pq, h * _S); - attn_reduce(dst, temp, _nthr, _S, temp_stride); + auto* dst = output_emb.ptr(b, pq, h * _SV); + attn_reduce(dst, temp, _nthr, _SV, temp_stride); }); } }; @@ -1375,17 +1401,17 @@ struct MHA { _helper._block_size, _helper._S, _helper._block_size, _helper._S); if (q_is_xf16) { - pack_32Nx16K(_helper._wv_scratch_b.template ptr(batch_in_reorder, kv_block, hk), - v_ptr, - _helper._output.template ptr(ithr), - _helper._block_size, - _helper._S, - rnd_up(_helper._S, _helper._block_size), - _helper._S); + pack_32NxK(_helper._wv_scratch_b.template ptr(batch_in_reorder, kv_block, hk), + v_ptr, + _helper._output.template ptr(ithr), + _helper._block_size, + _helper._SV, + rnd_up(_helper._SV, _helper._block_size), + _helper._SV); } else { // need to decompress if (!q_cache_is_same) { - dequant(_helper._wv_scratch_b.template ptr(batch_in_reorder, kv_block, hk), v_ptr, _helper._block_size, _helper._S); + dequant(_helper._wv_scratch_b.template ptr(batch_in_reorder, kv_block, hk), v_ptr, _helper._block_size, _helper._SV); } } }); @@ -1429,7 +1455,7 @@ struct MHA { sub_query = sub_query.permute({1, 0, 2}); _helper.exec_kernel_multiple(sub_query, v_cache, - output_emb.slice(0, batch_in_token, batch_in_token + q_len).reshape({q_len, _helper._H * _helper._S}), + output_emb.slice(0, batch_in_token, batch_in_token + q_len).reshape({q_len, _helper._H * _helper._SV}), _helper._qk_scratch_b.slice(0, batch_in_reorder, batch_in_reorder), _helper._wv_scratch_b.slice(0, batch_in_reorder, batch_in_reorder), block_indices.ptr() + block_indices_begins.ptr()[batch_in_seq], @@ -1518,7 +1544,8 @@ struct AttentionExecutor : public PagedAttentionExecutor { // The layout for per token per head for u8 kv cache: // |scale(f32)|zeropoint(f32)|quantized feature(u8,idx_1)|quantized feature(u8,idx_2)|...|quantized feature(u8,idx_S)| // The actual size needs to deduct scale and zeropoint. - auto S = v_cache.size(3) - (k_cache.m_dt == ov::element::Type_t::u8 ? sizeof(float) * 2 : 0); + auto S = k_cache.size(3) - (k_cache.m_dt == ov::element::Type_t::u8 ? sizeof(float) * 2 : 0); + auto SV = v_cache.size(3) - (k_cache.m_dt == ov::element::Type_t::u8 ? sizeof(float) * 2 : 0); auto block_size = k_cache.size(2); auto H = q.size(1) / S; auto h_each_group_len = 1; @@ -1529,16 +1556,16 @@ struct AttentionExecutor : public PagedAttentionExecutor { q.assert_dims({B_token, H * S}); k.assert_dims({B_token, Hk * S}); - v.assert_dims({B_token, Hk * S}); + v.assert_dims({B_token, Hk * SV}); q = q.reshape({B_token, H, 1, S}); k = k.reshape({B_token, Hk, 1, S}); - v = v.reshape({B_token, Hk, 1, S}); + v = v.reshape({B_token, Hk, 1, SV}); if (k_cache.m_dt == ov::element::Type_t::u8) { k_cache.assert_dims({0, Hk, block_size, S + sizeof(float) * 2}, true); - v_cache.assert_dims({k_cache.m_dims[0], Hk, block_size, S + sizeof(float) * 2}); + v_cache.assert_dims({k_cache.m_dims[0], Hk, block_size, SV + sizeof(float) * 2}); } else { k_cache.assert_dims({0, Hk, block_size, S}, true); - v_cache.assert_dims({k_cache.m_dims[0], Hk, block_size, S}); + v_cache.assert_dims({k_cache.m_dims[0], Hk, block_size, SV}); } past_lens.assert_dims({B_seq}); subsequence_begins.assert_dims({B_seq + 1}); @@ -1549,14 +1576,13 @@ struct AttentionExecutor : public PagedAttentionExecutor { if (alibi_slopes) { alibi_slopes.assert_dims({H}); } - output_emb.assert_dims({B_token, H * S}); - output_emb = output_emb.reshape({B_token, 1, H * S}); + output_emb.assert_dims({B_token, H * SV}); + output_emb = output_emb.reshape({B_token, 1, H * SV}); // TODO: enable block_size to be multiple of 32 OPENVINO_ASSERT(block_size == 32, "CPU: block size must be 32, current: ", block_size); - OPENVINO_ASSERT(S % 16 == 0, "CPU: head size must be multiple of 16, current: ", S); - _helper.init(H, S, Hk, h_each_group_len, block_size, sliding_window, scale, max_context_len, alibi_slopes); + _helper.init(H, S, SV, Hk, h_each_group_len, block_size, sliding_window, scale, max_context_len, alibi_slopes); } void concat_pastkv(const PlainTensor& k, const PlainTensor& v, const PlainTensor& k_cache, const PlainTensor& v_cache, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index 0670c744a6da91..1543c168403382 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -861,6 +861,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, auto H = query.size(1); auto q_len = query.size(2); auto S = query.size(3); + auto SV = present_value.size(3); auto h_group_num = present_value.size(1); auto precision = ov::element::f32; if (std::is_same::value) { @@ -991,10 +992,10 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, // attn_w * V // Fast Path if there are enough works for each thread if (B >= static_cast(nthr)) { - buf_attn_score.resize({static_cast(nthr), q_len, h_each_group_len, S}); + buf_attn_score.resize({static_cast(nthr), q_len, h_each_group_len, SV}); parallel_for2d(B, h_group_num, [&](size_t b, size_t h_group) { auto ithr = parallel_get_thread_num(); - memset(buf_attn_score.ptr(ithr), 0, q_len * h_each_group_len * S * sizeof(T3)); + memset(buf_attn_score.ptr(ithr), 0, q_len * h_each_group_len * SV * sizeof(T3)); for (size_t pv = 0; pv < kv_len; pv++) { auto b_kv = beams ? beams.ptr(b)[pv] : b; auto* v = present_value.ptr(b_kv, h_group, pv); @@ -1004,7 +1005,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, attn_acc_value(buf_attn_score.ptr(ithr, pq, group_idx), buf_attn_w.ptr(b, h, pq)[pv], v, - S, + SV, p + 0, p + 1); } @@ -1014,15 +1015,15 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, for (size_t pq = 0; pq < q_len; pq++) { for (size_t h = h_group * h_each_group_len, group_idx = 0; h < (h_group + 1) * h_each_group_len; h++, group_idx++) { - auto* dst = has_out_transpose ? output_emb.ptr(b, pq, h * S) : output_emb.ptr(b, h, pq); - cvt_copy(dst, buf_attn_score.ptr(ithr, pq, group_idx), S); + auto* dst = has_out_transpose ? output_emb.ptr(b, pq, h * SV) : output_emb.ptr(b, h, pq); + cvt_copy(dst, buf_attn_score.ptr(ithr, pq, group_idx), SV); } } }); return; } - buf_attn_score.resize({static_cast(nthr), B, q_len, H, S}); + buf_attn_score.resize({static_cast(nthr), B, q_len, H, SV}); // buf_attn_w {B, H, q_len, kv_len} parallel_nt_static(nthr, [&](const size_t ithr, const size_t nthr) { size_t start{0}, end{0}; @@ -1041,7 +1042,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, attn_acc_value(buf_attn_score.ptr(ithr, b, 0, h_group), buf_attn_w.ptr(b, h_group, 0, pv)[0], v, - S, + SV, p + 0, p + 1); parallel_it_step(pv, kv_len, b, B, h_group, h_group_num); @@ -1056,7 +1057,7 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, attn_acc_value(buf_attn_score.ptr(ithr, b, pq, h), buf_attn_w.ptr(b, h, pq)[pv], v, - S, + SV, p + 0, p + 1); } @@ -1070,8 +1071,8 @@ static void mha_single_token_kernel(const ov::intel_cpu::PlainTensor& query, parallel_for3d(B, H, q_len, [&](size_t b, size_t h, size_t pq) { auto* temp = buf_attn_score.ptr(0, b, pq, h); size_t temp_stride = buf_attn_score.stride(0); - auto* dst = has_out_transpose ? output_emb.ptr(b, pq, h * S) : output_emb.ptr(b, h, pq); - attn_reduce(dst, temp, nthr, S, temp_stride); + auto* dst = has_out_transpose ? output_emb.ptr(b, pq, h * SV) : output_emb.ptr(b, h, pq); + attn_reduce(dst, temp, nthr, SV, temp_stride); }); } diff --git a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp index 6bf7d3099a85d9..b9666388490f74 100644 --- a/src/plugins/intel_cpu/src/nodes/paged_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/paged_attn.cpp @@ -152,18 +152,32 @@ void PagedAttention::execute(dnnl::stream strm) { inputs[i] = getSrcMemoryAtPort(i); } - const auto& queryDims = inputs[0]->getStaticDims(); + auto outDims = inputs[0]->getStaticDims(); + const auto& keyDims = inputs[1]->getStaticDims(); + const auto& valueDims = inputs[2]->getStaticDims(); + // value head_size may be not same with key + if (keyDims[1] != valueDims[1]) { + // The outDims[1] should be `num_heads * v_head_size`, it can be got from: + // because: + // q: query_ps[1] = num_heads * head_size + // k: key_ps[1] = num_kv_heads * head_size + // v: value_ps[1] = num_kv_heads * v_head_size + // therefore: + // q * v / k = (num_heads * head_size) * (num_kv_heads * v_head_size) / + // (num_kv_heads * head_size) = num_heads * v_head_size + outDims[1] = outDims[1] * valueDims[1] / keyDims[1]; + } if (m_hasScore) { size_t len = 0; const auto& pastLensDims = inputs[5]->getStaticDims(); auto pastLens = inputs[5]->getDataAs(); for (size_t i = 0; i < pastLensDims[0]; i++) len += pastLens[i]; - len += queryDims[0]; + len += outDims[0]; VectorDims scoreDims{len}; - redefineOutputMemory({queryDims, scoreDims}); + redefineOutputMemory({outDims, scoreDims}); } else { - redefineOutputMemory(0, queryDims); + redefineOutputMemory(0, outDims); } outputs[0] = getDstMemoryAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index eecba2acff260b..e70a3932b11b1e 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -135,6 +135,7 @@ struct MHAKernel { auto H = query.size(1); auto q_len = query.size(2); auto head_size = query.size(3); + auto head_size_v = present_value.size(3); auto kv_len = present_key.size(2); auto Hk = present_key.size(1); size_t h_each_group_len = H / Hk; @@ -145,7 +146,7 @@ struct MHAKernel { parallel_for2d(B, H, [&](size_t b, size_t h) { std::vector attn_score(kv_len); - std::vector word_vec(head_size, 0.0f); + std::vector word_vec(head_size_v, 0.0f); for (size_t m = 0; m < q_len; m++) { // dot-product to get attention scores @@ -185,14 +186,14 @@ struct MHAKernel { softmax(&attn_score[0], ncausal); // linearly combine value - word_vec.assign(head_size, 0.0f); + word_vec.assign(head_size_v, 0.0f); for (size_t n = 0; n < ncausal; n++) { auto* v = &present_value.at({b, h / h_each_group_len, n, 0}, true); - accumulate(word_vec.data(), v, head_size, attn_score[n]); + accumulate(word_vec.data(), v, head_size_v, attn_score[n]); } // output [B, L1, H*head_size] - auto* out = has_out_transpose ? &output_emb.at({b, m, h * head_size}) : &output_emb.at({b, h, m}); + auto* out = has_out_transpose ? &output_emb.at({b, m, h * head_size_v}) : &output_emb.at({b, h, m}); std::copy(word_vec.begin(), word_vec.end(), out); } }); @@ -259,13 +260,14 @@ struct MHAKernel { return dnnl_dims; } - void prepare_brgemm_prim(dnnl::stream strm, PlainTensor& query, PlainTensor& present_key, bool has_out_transpose) { + void prepare_brgemm_prim(dnnl::stream strm, PlainTensor& query, PlainTensor& present_key, PlainTensor& present_value, bool has_out_transpose) { auto in_type = precision_of::value; auto qkv_dt = DnnlExtensionUtils::ElementTypeToDataType(in_type); auto B = query.size(0); auto H = query.size(1); auto q_len = query.size(2); auto head_size = query.size(3); + auto head_size_v = present_value.size(3); auto kv_len = present_key.size(2); auto Hk = present_key.size(1); brgemmKey qk_key = {q_len, kv_len, head_size, query.stride(2), present_key.stride(2), kv_len, true, in_type}; @@ -289,19 +291,19 @@ struct MHAKernel { qk_gemm_ptr = qk_result.first; if (has_out_transpose) - out_md = dnnl::memory::desc(make_dnnl_dims({B, q_len, H, head_size}), qkv_dt, tag::abcd); + out_md = dnnl::memory::desc(make_dnnl_dims({B, q_len, H, head_size_v}), qkv_dt, tag::abcd); else - out_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, head_size}), qkv_dt, tag::abcd); + out_md = dnnl::memory::desc(make_dnnl_dims({B, H, q_len, head_size_v}), qkv_dt, tag::abcd); size_t ldc_index = 2; if (has_out_transpose) { ldc_index = 1; } brgemmKey wv_key = {q_len, - head_size, + head_size_v, kv_len, kv_len * (in_type == ov::element::Type_t::f32 ? 1 : 2), - present_key.stride(2), + present_value.stride(2), static_cast(out_md.get_strides()[ldc_index]), false, in_type}; @@ -329,9 +331,9 @@ struct MHAKernel { const size_t m_block_size = qk_gemm_ptr->get_mblk_size(); weight_score.resize({static_cast(parallel_get_max_threads()), H, m_block_size, kv_len}); if (has_out_transpose) { - fp32_out.resize({B, q_len, H, head_size}); + fp32_out.resize({B, q_len, H, head_size_v}); } else { - fp32_out.resize({B, H, q_len, head_size}); + fp32_out.resize({B, H, q_len, head_size_v}); } return; } @@ -348,7 +350,7 @@ struct MHAKernel { const auto B = query.size(0); const auto H = query.size(1); const auto q_len = query.size(2); - const auto head_size = query.size(3); + const auto head_size_v = present_value.size(3); const auto Hk = present_key.size(1); const auto kv_len = present_key.size(2); size_t h_each_group_len = H / Hk; @@ -423,7 +425,7 @@ struct MHAKernel { if (is_xf16) { fp32_out_ptr = has_out_transpose ? &fp32_out.at({b, m_start, h, 0}) : &fp32_out.at({b, h, m_start, 0}); } else { - fp32_out_ptr = has_out_transpose ? &output_emb.at({b, m_start, h * head_size}) : &output_emb.at({b, h, m_start, 0}); + fp32_out_ptr = has_out_transpose ? &output_emb.at({b, m_start, h * head_size_v}) : &output_emb.at({b, h, m_start, 0}); } T* v_ptr = is_xf16 ? &wv_scratch_b.at({b, h / h_each_group_len, 0}) : &present_value.at({b, h / h_each_group_len, 0, 0}); @@ -436,12 +438,12 @@ struct MHAKernel { if (is_xf16) { if (has_out_transpose) { attn_memcpy2d_kernel(&fp32_out.at({b, m_start, h, 0}), - &output_emb.at({b, m_start, h * head_size}), + &output_emb.at({b, m_start, h * head_size_v}), ov::element::f32, precision_of::value, fp32_out.stride(1), output_emb.stride(1), - head_size, + head_size_v, m_cnt); } else { attn_memcpy2d_kernel(&fp32_out.at({b, h, m_start, 0}), @@ -450,7 +452,7 @@ struct MHAKernel { precision_of::value, 0, 0, - m_cnt * head_size, + m_cnt * head_size_v, 1); } } @@ -485,7 +487,7 @@ struct MHAKernel { if (d_scale == 0.0f) d_scale = 1.0f / sqrt(head_size); - prepare_brgemm_prim(strm, query, present_key, has_out_transpose); + prepare_brgemm_prim(strm, query, present_key, present_value, has_out_transpose); execute_brgemm(query, present_key, present_value, @@ -540,6 +542,7 @@ struct MHAKernel { auto H = query.size(1); auto q_len = query.size(2); auto head_size = query.size(3); + auto head_size_v = present_value.size(3); auto kv_len = present_key.size(2); auto h_group_num = present_key.size(1); size_t h_each_group_len = H / h_group_num; @@ -620,9 +623,9 @@ struct MHAKernel { arm_compute::TensorInfo outInfo; arm_compute::Tensor outTensor; - auto out = has_out_transpose ? &output_emb.at({b, m_start, h * head_size}) : &output_emb.at({b, h, m_start}); + auto out = has_out_transpose ? &output_emb.at({b, m_start, h * head_size_v}) : &output_emb.at({b, h, m_start}); auto strides = arm_compute::Strides({output_emb.stride_bytes(1), output_emb.stride_bytes(2)}); - GemmKernel out_gemm(m_cnt, kv_len, head_size, false, precision); + GemmKernel out_gemm(m_cnt, kv_len, head_size_v, false, precision); arm_compute::Strides vStrides({present_value.stride_bytes(3), present_value.stride_bytes(2)}); out_gemm.executeGemm(qkTensor.buffer(), @@ -685,6 +688,7 @@ struct MHAKernel { auto H = query.size(1); auto q_len = query.size(2); auto head_size = query.size(3); + auto head_size_v = present_value.size(3); auto kv_len = present_key.size(2); auto h_group_num = present_key.size(1); size_t h_each_group_len = H / h_group_num; @@ -786,7 +790,7 @@ struct MHAKernel { mlas_sgemm("N", "N", m_cnt, - head_size, + head_size_v, kv_len, 1.0f, qk, @@ -794,7 +798,7 @@ struct MHAKernel { v_ptr, present_value.stride(2), 0.f, - has_out_transpose ? &output_emb.at({b, m_start, h * head_size}) : &output_emb.at({b, h, m_start}), + has_out_transpose ? &output_emb.at({b, m_start, h * head_size_v}) : &output_emb.at({b, h, m_start}), has_out_transpose ? output_emb.stride(1) : output_emb.stride(2), 1); }); @@ -875,7 +879,7 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt PlainTensor attn_mask; PlainTensor output_emb(output); float scale_input = 0.0f; - size_t B, L1, L0, S; + size_t B, L1, L0, S, SV; q_input.reset(inputs[0]); k_input.reset(inputs[1]); @@ -911,18 +915,19 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt B = q_input.size(0); L1 = q_input.size(2); S = q_input.size(3); + SV = v_input.size(3); L0 = present_key.size(2) - L1; auto Hk = k_input.size(1); if (fuse_concat) { k_input.assert_dims({B, Hk, L1, S}); - v_input.assert_dims({B, Hk, L1, S}); + v_input.assert_dims({B, Hk, L1, SV}); } else { k_input.assert_dims({B, Hk, L0 + L1, S}); - v_input.assert_dims({B, Hk, L0 + L1, S}); + v_input.assert_dims({B, Hk, L0 + L1, SV}); } present_key.assert_dims({B, Hk, L0 + L1, S}); - present_value.assert_dims({B, Hk, L0 + L1, S}); + present_value.assert_dims({B, Hk, L0 + L1, SV}); if (beam_table) beam_table.assert_dims({B, L0 + L1}); @@ -1222,6 +1227,7 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, auto H = cur_k.size(1); auto L1 = cur_k.size(2); auto S = cur_k.size(3); + auto SV = cur_v.size(3); auto reverse = [&order] (const std::vector& cur) { std::vector result(cur.size()); for (size_t i = 0; i < cur.size(); i++) { @@ -1244,12 +1250,17 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, // BHLS is the stated input shape of SDPA, however internally we use LBHS for KV-cache storage. // real_order is used to permute the original shape to LBHS std::vector shape = reverse({B, H, (L0 + L1) * 2, S}); - auto mem_desc = std::make_shared(kvcache_precision, - Shape(shape), - permute_axes(shape, real_order), - real_order); - auto new_internal_mem_k = std::make_shared(getEngine(), mem_desc); - auto new_internal_mem_v = std::make_shared(getEngine(), mem_desc); + auto mem_desc_k = std::make_shared(kvcache_precision, + Shape(shape), + permute_axes(shape, real_order), + real_order); + auto new_internal_mem_k = std::make_shared(getEngine(), mem_desc_k); + shape = reverse({B, H, (L0 + L1) * 2, SV}); + auto mem_desc_v = std::make_shared(kvcache_precision, + Shape(shape), + permute_axes(shape, real_order), + real_order); + auto new_internal_mem_v = std::make_shared(getEngine(), mem_desc_v); PlainTensor new_pastk, new_pastv, old_past_k, old_past_v; new_pastk.reset(new_internal_mem_k); @@ -1271,7 +1282,7 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, S * old_past_k.m_element_size); memcpy(&new_pastv.at({b, h, m}), &old_past_v.at({b_kv, h, m}), - S * old_past_v.m_element_size); + SV * old_past_v.m_element_size); }); } if (kvcache_precision == ov::element::u8) { @@ -1301,16 +1312,26 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, std::vector new_shape = reverse({B, H, (L0 + L1), S}); // Get the shape of physical layout using real order - auto strides = mem_desc->getStrides(); - mem_desc = std::make_shared(kvcache_precision, - Shape(new_shape), - permute_axes(new_shape, real_order), - real_order, - 0, - VectorDims{}, - strides); - new_internal_mem_k->redefineDesc(mem_desc); - new_internal_mem_v->redefineDesc(mem_desc); + auto strides = mem_desc_k->getStrides(); + mem_desc_k = std::make_shared(kvcache_precision, + Shape(new_shape), + permute_axes(new_shape, real_order), + real_order, + 0, + VectorDims{}, + mem_desc_k->getStrides()); + new_internal_mem_k->redefineDesc(mem_desc_k); + new_shape = reverse({B, H, (L0 + L1), SV}); + // Get the shape of physical layout using real order + strides = mem_desc_v->getStrides(); + mem_desc_v = std::make_shared(kvcache_precision, + Shape(new_shape), + permute_axes(new_shape, real_order), + real_order, + 0, + VectorDims{}, + strides); + new_internal_mem_v->redefineDesc(mem_desc_v); if (kvcache_precision == ov::element::u8) { // past_k's shape is BHLS, internal layout LBHS // scale_zp's shape is LBHS, internal layout LBHS @@ -1324,7 +1345,7 @@ void ScaledDotProductAttention::resetBeamTablePastkv(const MemoryPtr& mem_cur_k, m_k_state->assign_internal_state(new_internal_mem_k); m_v_state->assign_internal_state(new_internal_mem_v); m_k_state->assign_internal_state_max_size(B * H * (L0 + L1) * 2 * S); - m_v_state->assign_internal_state_max_size(B * H * (L0 + L1) * 2 * S); + m_v_state->assign_internal_state_max_size(B * H * (L0 + L1) * 2 * SV); } // 3. create beam table { @@ -1534,6 +1555,7 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M auto H = cur_k.size(1); auto L1 = cur_k.size(2); auto S = cur_k.size(3); + auto SV = cur_v.size(3); auto reverse = [&order] (const std::vector& cur) { std::vector result(cur.size()); for (size_t i = 0; i < cur.size(); i++) { @@ -1558,13 +1580,15 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M // new_shape is the shape used by the original model which maybe different from BHLS, reverse here is to permute BHLS to original model shape. // BHLS is the stated input shape of SDPA, however internally we use LBHS for KV-cache storage. // real_order is used to permute the original shape to LBHS - std::vector new_shape = reverse({B, H, (L0 + L1) * 2, S}); - auto real_shape = permute_axes(new_shape, real_order); - auto mem_desc = - std::make_shared(kvcache_precision, Shape(new_shape), real_shape, real_order); + auto new_memory = [&] (size_t new_S) { + std::vector new_shape = reverse({B, H, (L0 + L1) * 2, new_S}); + auto real_shape = permute_axes(new_shape, real_order); + auto mem_desc = std::make_shared(kvcache_precision, Shape(new_shape), real_shape, real_order); + return std::make_shared(getEngine(), mem_desc); + }; - auto new_internal_mem_k = std::make_shared(getEngine(), mem_desc); - auto new_internal_mem_v = std::make_shared(getEngine(), mem_desc); + auto new_internal_mem_k = new_memory(S); + auto new_internal_mem_v = new_memory(SV); PlainTensor new_pastk, new_pastv; new_pastk.reset(new_internal_mem_k); @@ -1585,7 +1609,7 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M m_k_state->assign_internal_state(new_internal_mem_k); m_v_state->assign_internal_state(new_internal_mem_v); m_k_state->assign_internal_state_max_size(2 * (L0 + L1) * B * H * S); - m_v_state->assign_internal_state_max_size(2 * (L0 + L1) * B * H * S); + m_v_state->assign_internal_state_max_size(2 * (L0 + L1) * B * H * SV); if (kvcache_precision == ov::element::u8) { auto& old_scale_zp_k = m_k_state->get_scale_zp(); auto& old_scale_zp_v = m_v_state->get_scale_zp(); @@ -1610,21 +1634,23 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M // new_shape is the shape used by the original model which maybe different from BHLS, reverse here is to permute BHLS to original model shape. // BHLS is the stated input shape of SDPA, however internally we use LBHS for KV-cache storage. // real_order is used to permute the original shape to LBHS - std::vector new_shape = reverse({B, H, (L0 + L1), S}); - VectorDims strides(new_shape.size(), 1); - auto real_shape = permute_axes(new_shape, real_order); - for (size_t i = 2; i <= real_shape.size(); i++) { - strides[real_shape.size() - i] = strides[real_shape.size() - (i-1)] * real_shape[real_shape.size() - (i-1)]; - } - auto mem_desc = std::make_shared(kvcache_precision, - Shape(new_shape), - real_shape, - real_order, - 0, - VectorDims{}, - strides); - internal_mem_k->redefineDesc(mem_desc); - internal_mem_v->redefineDesc(mem_desc); + auto reset_desc = [&] (size_t new_S) { + std::vector new_shape = reverse({B, H, (L0 + L1), new_S}); + VectorDims strides(new_shape.size(), 1); + auto real_shape = permute_axes(new_shape, real_order); + for (size_t i = 2; i <= real_shape.size(); i++) { + strides[real_shape.size() - i] = strides[real_shape.size() - (i-1)] * real_shape[real_shape.size() - (i-1)]; + } + return std::make_shared(kvcache_precision, + Shape(new_shape), + real_shape, + real_order, + 0, + VectorDims{}, + strides); + }; + internal_mem_k->redefineDesc(reset_desc(S)); + internal_mem_v->redefineDesc(reset_desc(SV)); if (kvcache_precision == ov::element::u8) { auto& old_scale_zp_k = m_k_state->get_scale_zp(); auto& old_scale_zp_v = m_v_state->get_scale_zp(); @@ -1640,18 +1666,19 @@ void ScaledDotProductAttention::updatePastkv(const MemoryPtr& mem_cur_k, const M // new_shape is the shape used by the original model which maybe different from BHLS, reverse here is to permute BHLS to original model shape. // BHLS is the stated input shape of SDPA, however internally we use LBHS for KV-cache storage. // real_order is used to permute the original shape to LBHS - std::vector new_shape = reverse({B, H, (L0 + L1), S}); - auto real_shape = permute_axes(new_shape, real_order); - auto mem_desc = - std::make_shared(kvcache_precision, - Shape(new_shape), - real_shape, - real_order, - 0, - VectorDims{}, - internal_mem_k->getDescWithType()->getStrides()); - internal_mem_k->redefineDesc(mem_desc); - internal_mem_v->redefineDesc(mem_desc); + auto redefine_desc = [&] (MemoryPtr& mem, size_t new_S) { + std::vector new_shape = reverse({B, H, (L0 + L1), new_S}); + auto real_shape = permute_axes(new_shape, real_order); + return std::make_shared(kvcache_precision, + Shape(new_shape), + real_shape, + real_order, + 0, + VectorDims{}, + mem->getDescWithType()->getStrides()); + }; + internal_mem_k->redefineDesc(redefine_desc(internal_mem_k, S)); + internal_mem_v->redefineDesc(redefine_desc(internal_mem_v, SV)); } if (!past_k) { diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp index e00e5cae8aae74..c2e8ebd92430bf 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp @@ -20,29 +20,47 @@ class SDPAShapeInfer : public ShapeInferEmptyPads { IShapeInfer::Result infer(const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { const auto& query_dims = input_shapes.front().get(); - VectorDims present_kv_dims = input_shapes.back().get(); + VectorDims present_v_dims = input_shapes.back().get(); const auto& beam_idx_dims = input_shapes.end()[-3].get(); const auto& permute_axes = m_config.permute_axes; if (permute_axes.empty()) { // [B, H, L, S] - present_kv_dims[0] = beam_idx_dims[0]; - present_kv_dims[2] += query_dims[2]; - return {{query_dims, present_kv_dims, present_kv_dims}, ShapeInferStatus::success}; + present_v_dims[0] = beam_idx_dims[0]; + present_v_dims[2] += query_dims[2]; + // normal and fast path + if (present_v_dims[3] == query_dims[3]) + return {{query_dims, present_v_dims, present_v_dims}, ShapeInferStatus::success}; + + // diff kv feature size + auto output_dims = query_dims; + output_dims[3] = present_v_dims[3]; + auto present_k_dims = present_v_dims; + present_k_dims[3] = query_dims[3]; + return {{output_dims, present_k_dims, present_v_dims}, ShapeInferStatus::success}; } // permute_axes[0,1,2,3] gives axis indices of B,H,L,S for query & present_kv const size_t batch_index = permute_axes[0]; const size_t length_index = permute_axes[2]; - present_kv_dims[batch_index] = beam_idx_dims[0]; - present_kv_dims[length_index] += query_dims[length_index]; + present_v_dims[batch_index] = beam_idx_dims[0]; + present_v_dims[length_index] += query_dims[length_index]; auto n_dims = query_dims.size(); VectorDims output_dims(n_dims); for (size_t i = 0; i < n_dims; i++) { output_dims[i] = query_dims[permute_axes[i]]; } - return {{output_dims, present_kv_dims, present_kv_dims}, ShapeInferStatus::success}; + + // normal and fast path + if (present_v_dims[3] == query_dims[3]) + return {{output_dims, present_v_dims, present_v_dims}, ShapeInferStatus::success}; + + // diff kv feature size + output_dims[3] = present_v_dims[3]; + auto present_k_dims = present_v_dims; + present_k_dims[3] = query_dims[3]; + return {{output_dims, present_k_dims, present_v_dims}, ShapeInferStatus::success}; } port_mask_t get_port_mask() const override { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp index 63b4520cf1b0db..4421499d10204d 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp @@ -27,35 +27,52 @@ void ov::intel_cpu::ScaledDotProductAttentionWithKVCache::validate_and_infer_typ // [B, H, L1, S] auto q_ps = get_input_partial_shape(0); // [B, H, L0, S] - auto past_kv_ps = get_input_partial_shape(input_num - 1); + auto past_k_ps = get_input_partial_shape(input_num - 2); + auto past_v_ps = get_input_partial_shape(input_num - 1); // [present_kv_batch_size] auto beam_idx_ps = get_input_partial_shape(input_num - 3); auto output_logits = q_ps; NODE_VALIDATION_CHECK(this, m_config.output_BLHxS == false); + NODE_VALIDATION_CHECK(this, q_ps.rank().is_static()); NODE_VALIDATION_CHECK(this, q_ps.size() >= 3); // permute_axes from original to [B, H, L, S] const auto& permute_axes = this->m_config.permute_axes; - if (past_kv_ps.rank().is_static()) { + if (past_k_ps.rank().is_static() || past_v_ps.rank().is_static()) { const size_t batch_index = permute_axes.empty() ? 0 : permute_axes[0]; const size_t length_index = permute_axes.empty() ? q_ps.size() - 2 : permute_axes[permute_axes.size() - 2]; const size_t head_num_index = permute_axes.empty() ? q_ps.size() - 3 : permute_axes[permute_axes.size() - 3]; - NODE_VALIDATION_CHECK(this, q_ps.size() == past_kv_ps.size()); + if (past_k_ps.rank().is_static()) + NODE_VALIDATION_CHECK(this, q_ps.size() == past_k_ps.size()); + if (past_v_ps.rank().is_static()) + NODE_VALIDATION_CHECK(this, q_ps.size() == past_v_ps.size()); for (size_t i = 0; i < q_ps.size(); i++) { if (i == head_num_index) { - if (q_ps[i].is_static() && past_kv_ps[i].is_static()) { + if (q_ps[i].is_static() && past_v_ps[i].is_static()) { NODE_VALIDATION_CHECK(this, - q_ps[i].get_length() % past_kv_ps[i].get_length() == 0, + q_ps[i].get_length() % past_v_ps[i].get_length() == 0, "shape not compatiable at index ", i); } + if (past_k_ps[i].is_static() && past_v_ps[i].is_static()) { + NODE_VALIDATION_CHECK(this, + past_k_ps[i].get_length() == past_v_ps[i].get_length(), + "kv shape not compatiable at index ", + i); + } } else { continue; } } // batch_size can be dynamically changed by gather logic - past_kv_ps[batch_index] = beam_idx_ps[0]; - past_kv_ps[length_index] += q_ps[length_index]; + if (past_k_ps.rank().is_static()) { + past_k_ps[batch_index] = beam_idx_ps[0]; + past_k_ps[length_index] += q_ps[length_index]; + } + if (past_v_ps.rank().is_static()) { + past_v_ps[batch_index] = beam_idx_ps[0]; + past_v_ps[length_index] += q_ps[length_index]; + } } if (!permute_axes.empty()) { if (q_ps.rank().is_static()) { @@ -65,9 +82,11 @@ void ov::intel_cpu::ScaledDotProductAttentionWithKVCache::validate_and_infer_typ } } } + if (output_logits.rank().is_static() && past_v_ps.rank().is_static()) + output_logits[output_logits.size() - 1] = past_v_ps[output_logits.size() - 1]; set_output_type(0, get_input_element_type(0), output_logits); - set_output_type(1, get_input_element_type(input_num - 1), past_kv_ps); - set_output_type(2, get_input_element_type(input_num - 1), past_kv_ps); + set_output_type(1, get_input_element_type(input_num - 1), past_k_ps); + set_output_type(2, get_input_element_type(input_num - 1), past_v_ps); } bool ov::intel_cpu::ScaledDotProductAttentionWithKVCache::visit_attributes(ov::AttributeVisitor& visitor) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp index 8a9212f8998f94..f049a16a7640fc 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/arm/concat_sdp.cpp @@ -38,6 +38,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, ::testing::Combine(::testing::Values(ElementType::f16), ::testing::ValuesIn(inputShapes), ::testing::Values(false), + ::testing::Values(true, false), ::testing::Values(true, false)), ConcatSDPTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp index f5a7bfacfac99f..83fc0a635546fc 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.cpp @@ -30,7 +30,8 @@ std::string ConcatSDPTest::getTestCaseName(const testing::TestParamInfo inputShapes; bool forceKVU8; bool hasShapeOf; - std::tie(inType, inputShapes, forceKVU8, hasShapeOf) = obj.param; + bool isDiffKVHeadSize; + std::tie(inType, inputShapes, forceKVU8, hasShapeOf, isDiffKVHeadSize) = obj.param; std::ostringstream result; result << "IS="; for (const auto& shape : inputShapes) { @@ -48,14 +49,15 @@ std::string ConcatSDPTest::getTestCaseName(const testing::TestParamInfoset_friendly_name("k"); inputParams[2]->set_friendly_name("v"); @@ -81,9 +87,15 @@ void ConcatSDPTest::SetUp() { ov::op::util::VariableInfo{inputDynamicShapes[1], inType, "pastk"}); auto pastk = std::make_shared(inputParams[3], var_k); pastk->set_friendly_name("pastk_r"); + // pastv init_cost + auto v_init_ps = inputDynamicShapes[1]; + if (m_isDiffKVHeadSize) { + v_init_ps[3] += m_diffKVHeadSize; + } + inputParams.push_back(std::make_shared(inType, v_init_ps)); auto var_v = std::make_shared( - ov::op::util::VariableInfo{inputDynamicShapes[1], inType, "pastv"}); - auto pastv = std::make_shared(inputParams[3], var_v); + ov::op::util::VariableInfo{v_init_ps, inType, "pastv"}); + auto pastv = std::make_shared(inputParams[4], var_v); pastv->set_friendly_name("pastv_r"); auto beam_idx = std::make_shared(ElementType::i32, ov::PartialShape{-1}); beam_idx->set_friendly_name("beam_idx"); @@ -125,15 +137,6 @@ void ConcatSDPTest::SetUp() { manager.run_passes(functionRefs); } -void ConcatSDPTest::generate_inputs(const std::vector& targetInputStaticShapes) { - std::vector shapes(4); - shapes[0] = targetInputStaticShapes[0]; - shapes[1] = targetInputStaticShapes[0]; - shapes[2] = targetInputStaticShapes[0]; - shapes[3] = targetInputStaticShapes[1]; - SubgraphBaseTest::generate_inputs(shapes); -} - template void strided_iota(IT first, size_t n, T value, T stride) { for (size_t i = 0; i < n; i++) { @@ -163,17 +166,26 @@ void ConcatSDPTest::generate(int idx, const std::vector& targetInputS strided_iota(static_cast(t.data()), t.get_size(), val, 0.0f); inputs.insert({param, t}); } else { + ASSERT_TRUE(param->get_element_type() == element::bf16); ov::Tensor t{ov::element::bf16, shape}; strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); inputs.insert({param, t}); } }; // q, k, v, pastkv + auto v_shape = targetInputStaticShapes[0]; + auto v_init_shape = targetInputStaticShapes[1]; + if (m_isDiffKVHeadSize) { + v_shape[3] += m_diffKVHeadSize; + v_init_shape[3] += m_diffKVHeadSize; + } + create_input(function->get_parameters()[0], targetInputStaticShapes[0], idx + 1.0f); create_input(function->get_parameters()[1], targetInputStaticShapes[0], idx + 2.0f); - create_input(function->get_parameters()[2], targetInputStaticShapes[0], idx + 3.0f); + create_input(function->get_parameters()[2], v_shape, idx + 3.0f); create_input(function->get_parameters()[3], targetInputStaticShapes[1], idx + 4.0f); - create_input(function->get_parameters()[4], ov::Shape{targetInputStaticShapes[0][0]}, idx + 0.0f); + create_input(function->get_parameters()[4], v_init_shape, idx + 4.0f); + create_input(function->get_parameters()[5], ov::Shape{targetInputStaticShapes[0][0]}, idx + 0.0f); } void ConcatSDPTest::prepare() { @@ -214,7 +226,8 @@ TEST_P(ConcatSDPTest, CompareWithRefs) { std::vector inputShapes; bool forceKVU8; bool hasShapeOf; - std::tie(inType, inputShapes, forceKVU8, hasShapeOf) = this->GetParam(); + bool isDiffKVHeadSize; + std::tie(inType, inputShapes, forceKVU8, hasShapeOf, isDiffKVHeadSize) = this->GetParam(); auto actualOutputs = run_test(function); if (!hasShapeOf) { diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp index ac59e48f496b3b..83e1814f18b2ee 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/concat_sdp.hpp @@ -34,7 +34,7 @@ namespace test { template void strided_iota(IT first, size_t n, T value, T stride); -typedef std::tuple, bool, bool> ConcatSDPTestParams; +typedef std::tuple, bool, bool, bool> ConcatSDPTestParams; class ConcatSDPTest : public testing::WithParamInterface, @@ -48,9 +48,11 @@ class ConcatSDPTest : std::vector run_test(std::shared_ptr model); bool m_forceKVU8; bool m_hasShapeOf; + bool m_isDiffKVHeadSize; protected: - void generate_inputs(const std::vector& targetInputStaticShapes) override; void SetUp() override; + + static constexpr size_t m_diffKVHeadSize = 16; }; } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp index 57927434524891..6761acf8b5dfb1 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_sdp.cpp @@ -38,6 +38,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, ::testing::Combine(::testing::Values(ElementType::f32), ::testing::ValuesIn(inputShapes), ::testing::Values(true, false), + ::testing::Values(true, false), ::testing::Values(true, false)), ConcatSDPTest::getTestCaseName); diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp index 93c99048fec349..29667e2ffa3072 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/concat_sdp.cpp @@ -38,6 +38,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConcatSDPTest, ::testing::Combine(::testing::Values(ElementType::bf16, ElementType::f16), ::testing::ValuesIn(inputShapes), ::testing::Values(true, false), + ::testing::Values(true, false), ::testing::Values(true, false)), ConcatSDPTest::getTestCaseName); From 28bb0fd510036545e9b07f3ed2650cc531f78b03 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Thu, 17 Oct 2024 19:49:32 +0400 Subject: [PATCH 054/112] [PT FE] Handle None-value case among inputs (#27102) **Details:** Handle None-value case **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- src/frontends/pytorch/src/node_context.cpp | 28 ++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 565b0cdbd39385..6a8c370ef2b410 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -4,6 +4,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" +#include "helper_ops/internal_op.hpp" #include "openvino/core/validation_util.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/frontend/pytorch/decoder.hpp" @@ -151,13 +152,26 @@ OutputVector NodeContext::inputs() const { if (input == 0) { // Case when input can be inlined (possible only for fx decoder) if (m_decoder->is_input_inlined(i)) { - auto inlined_input = m_decoder->inlined_input(i); - FRONT_END_GENERAL_CHECK(inlined_input.size() == 1, - "Incorrect inlined input with index: ", - i, - " for operation ", - get_op_type()); - res.push_back(inlined_input[0]); + if (input_is_none(i)) { + // some operations like aten.index.Tensor can have None inputs + auto dummy_decoder = std::make_shared("torch::None", 1); + auto fw_node = std::make_shared(dummy_decoder, OutputVector{}); + auto attrs = fw_node->get_attrs(); + attrs["none_value"] = ""; + attrs[PtFrameworkNode::failed_conversion_key] = + "None constant cannot be converted to OpenVINO opset and should be removed by consuming " + "operation."; + fw_node->set_attrs(attrs); + res.push_back(fw_node->output(0)); + } else { + auto inlined_input = m_decoder->inlined_input(i); + FRONT_END_GENERAL_CHECK(inlined_input.size() == 1, + "Incorrect inlined input with index: ", + i, + " for operation ", + get_op_type()); + res.push_back(inlined_input[0]); + } continue; } } From 55d8c47f850076454f0f1a478fca4549a5206021 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:24:25 +0200 Subject: [PATCH 055/112] Update setuptools requirement from <74.1.0,>=65.6.1 to >=65.6.1,<75.3.0 in /src/bindings/python (#27098) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates the requirements on [setuptools](https://github.com/pypa/setuptools) to permit the latest version.
Changelog

Sourced from setuptools's changelog.

v75.2.0

Features

  • Made errors when parsing Distribution data more explicit about the expected type (tuple[str, ...] | list[str]) -- by :user:Avasam (#4578)

Bugfixes

  • Fix a TypeError when a Distribution's old included attribute was a tuple -- by :user:Avasam (#4578)
  • Add workaround for bdist_wheel --dist-info-dir errors when customisation does not inherit from setuptools. (#4684)

v75.1.1

Bugfixes

  • Re-use pre-existing .dist-info dir when creating wheels via the build backend APIs (PEP 517) and the metadata_directory argument is passed -- by :user:pelson. (#1825)
  • Changed egg_info command to avoid adding an empty .egg-info directory while iterating over entry-points. This avoids triggering integration problems with importlib.metadata/importlib_metadata (reference: pypa/pyproject-hooks#206). (#4680)

v75.1.0

Features

  • Deprecated bdist_wheel.universal configuration. (#4617)

Bugfixes

  • Removed reference to upload_docs module in entry points. (#4650)

v75.0.0

Features

... (truncated)

Commits
  • 61a5a03 Bump version: 75.1.1 → 75.2.0
  • 8ad3ea7 Workaround for bdist_wheel.dist_info_dir problems (#4684)
  • 9af0877 Type sequence checks in setuptools/dist.py (#4578)
  • 0534fde Add news fragment
  • 50b732a Check for more specific error message
  • a663287 Add pragma for edge-case code path
  • 96be735 Workaround for bdist_wheel.dist_info_dir problems
  • 000a413 Deprecate public access to setuptools.dist.sequence
  • 00995c1 Use variable msg instead of tmpl in setuptools/dist
  • d457d0e Type sequence checks in setuptools/dist.py
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- src/bindings/python/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index bb3d708a0ca23d..b3a8267e4c1f14 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -10,7 +10,7 @@ pytest-timeout==2.3.1 # Python bindings py>=1.9.0 pygments>=2.8.1 -setuptools>=65.6.1,<74.1.0 +setuptools>=65.6.1,<75.3.0 sympy>=1.10 wheel>=0.38.1 patchelf<=0.17.2.1 From 8822480e70c55d16cc2f36d4bfd59ce3e10dd36c Mon Sep 17 00:00:00 2001 From: Tiany1 <54828303+tianyiSKY1@users.noreply.github.com> Date: Fri, 18 Oct 2024 05:22:18 +0800 Subject: [PATCH 056/112] #20927 support inputs that have no batch (#26778) #20927 ### Details: - *add batch dimension before pool* - *remove batch dimension after pool* --- src/frontends/pytorch/src/op/avg_poolnd.cpp | 71 +++++++++++- src/frontends/pytorch/src/op/max_poolnd.cpp | 105 ++++++++++++++++-- src/frontends/pytorch/src/op_table.cpp | 37 +++--- .../layer_tests/pytorch_tests/test_pooling.py | 76 ++++++++----- 4 files changed, 231 insertions(+), 58 deletions(-) diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index 03c32259b45091..d8223b04bfe690 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -3,12 +3,17 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/avg_pool.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/concat.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/pad.hpp" -#include "openvino/op/subtract.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/unsqueeze.hpp" #include "utils.hpp" namespace ov { @@ -17,10 +22,31 @@ namespace pytorch { namespace op { using namespace ov::op; - -OutputVector translate_avg_poolnd(const NodeContext& context) { +OutputVector translate_avg_pool_base(const NodeContext& context, int dims) { num_inputs_check(context, 2, 7); auto input = context.get_input(0); + auto input_shape = context.mark_node(std::make_shared(input)); + + auto const_0 = v0::Constant::create(element::i64, Shape{1}, {0}); + auto const_1 = v0::Constant::create(element::i64, Shape{1}, {1}); + bool is_static = input.get_partial_shape().rank().is_static(); + bool no_batch_dim = is_static && input.get_partial_shape().rank().get_length() == dims + 1; + + if (is_static) { + if (no_batch_dim) { + input = context.mark_node(std::make_shared(input, const_0)); + } + } else { + input = context.mark_node(std::make_shared(input, const_0)); + auto unsqueeze_shape = context.mark_node(std::make_shared(input)); + auto rank = context.mark_node(std::make_shared(unsqueeze_shape)); + auto end_index = context.mark_node(std::make_shared(rank, const_1)); + auto start_index = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims - 2})); + auto reshape_pattern = + context.mark_node(std::make_shared(unsqueeze_shape, start_index, end_index, const_1, const_0)); + input = context.mark_node(std::make_shared(input, reshape_pattern, true)); + } + auto kernel = context.const_input(1); Strides strides; if (!context.input_is_none(2)) { @@ -47,8 +73,43 @@ OutputVector translate_avg_poolnd(const NodeContext& context) { } PYTORCH_OP_CONVERSION_CHECK(context.input_is_none(6), "Translation for aten::avg_pool2d do not support divisor_override input."); - return {context.mark_node( - std::make_shared(input, strides, pads, pads, kernel, !count_include_pad, rounding_type))}; + auto res = context.mark_node( + std::make_shared(input, strides, pads, pads, kernel, !count_include_pad, rounding_type)); + + if (is_static) { + if (no_batch_dim) { + res = context.mark_node(std::make_shared(res, const_0)); + } + } else { + auto pooled_output_shape = context.mark_node(std::make_shared(res)); + + auto start_index_input = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto slice_input_shape = + context.mark_node(std::make_shared(input_shape, const_0, start_index_input, const_1, const_0)); + + auto start_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto end_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2 + dims})); + auto slice_pooled_output_shape = context.mark_node( + std::make_shared(pooled_output_shape, start_index_pooled, end_index_pooled, const_1, const_0)); + + auto concat_shape = context.mark_node( + std::make_shared(OutputVector{slice_input_shape, slice_pooled_output_shape}, 0)); + res = context.mark_node(std::make_shared(res, concat_shape, true)); + } + + return {res}; +}; + +OutputVector translate_avg_pool1d(const NodeContext& context) { + return translate_avg_pool_base(context, 1); +}; + +OutputVector translate_avg_pool2d(const NodeContext& context) { + return translate_avg_pool_base(context, 2); +}; + +OutputVector translate_avg_pool3d(const NodeContext& context) { + return translate_avg_pool_base(context, 3); }; } // namespace op diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp index b6a01af1a7c2df..b846de68d28b49 100644 --- a/src/frontends/pytorch/src/op/max_poolnd.cpp +++ b/src/frontends/pytorch/src/op/max_poolnd.cpp @@ -12,9 +12,13 @@ #include "openvino/op/multiply.hpp" #include "openvino/op/pad.hpp" #include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/squeeze.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" #include "openvino/op/util/framework_node.hpp" #include "utils.hpp" @@ -24,9 +28,31 @@ namespace pytorch { namespace op { using namespace ov::op; - -OutputVector translate_max_poolnd(const NodeContext& context) { +OutputVector translate_max_pool_base(const NodeContext& context, int dims) { num_inputs_check(context, 3, 6); + auto input = context.get_input(0); + auto input_shape = context.mark_node(std::make_shared(input)); + + auto const_0 = v0::Constant::create(element::i64, Shape{1}, {0}); + auto const_1 = v0::Constant::create(element::i64, Shape{1}, {1}); + bool is_static = input.get_partial_shape().rank().is_static(); + bool no_batch_dim = is_static && input.get_partial_shape().rank().get_length() == dims + 1; + + if (is_static) { + if (no_batch_dim) { + input = context.mark_node(std::make_shared(input, const_0)); + } + } else { + input = context.mark_node(std::make_shared(input, const_0)); + auto unsqueeze_shape = context.mark_node(std::make_shared(input)); + auto rank = context.mark_node(std::make_shared(unsqueeze_shape)); + auto end_index = context.mark_node(std::make_shared(rank, const_1)); + auto start_index = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims - 2})); + auto reshape_pattern = + context.mark_node(std::make_shared(unsqueeze_shape, start_index, end_index, const_1, const_0)); + input = context.mark_node(std::make_shared(input, reshape_pattern, true)); + } + auto kernel = context.const_input(1); Strides strides; if (!context.input_is_none(2)) { @@ -53,7 +79,7 @@ OutputVector translate_max_poolnd(const NodeContext& context) { rounding_type = context.const_input(5) ? RoundingType::CEIL_TORCH : RoundingType::FLOOR; } - auto res = context.mark_node(std::make_shared(context.get_input(0), + auto res = context.mark_node(std::make_shared(input, strides, dilations, pads, @@ -63,19 +89,76 @@ OutputVector translate_max_poolnd(const NodeContext& context) { PadType::EXPLICIT, element::i64, 2)); - if (context.get_output_size() == 2) { - auto out1 = res->output(0); - auto out2 = res->output(1); - return {std::move(out1), std::move(out2)}; + if (is_static) { + if (no_batch_dim) { + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + out1 = context.mark_node(std::make_shared(out1, const_0)); + out2 = context.mark_node(std::make_shared(out2, const_0)); + return {std::move(out1), std::move(out2)}; + } else { + res = context.mark_node(std::make_shared(res, const_0)); + return {res}; + } + } else { + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + return {std::move(out1), std::move(out2)}; + } else { + return {res}; + } + } + } else { - return {res}; + auto pooled_output_shape = context.mark_node(std::make_shared(res)); + + auto start_index_input = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto slice_input_shape = + context.mark_node(std::make_shared(input_shape, const_0, start_index_input, const_1, const_0)); + + auto start_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-dims})); + auto end_index_pooled = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2 + dims})); + auto slice_pooled_output_shape = context.mark_node( + std::make_shared(pooled_output_shape, start_index_pooled, end_index_pooled, const_1, const_0)); + + auto concat_shape = context.mark_node( + std::make_shared(OutputVector{slice_input_shape, slice_pooled_output_shape}, 0)); + if (context.get_output_size() == 2) { + auto out1 = res->output(0); + auto out2 = res->output(1); + out1 = context.mark_node(std::make_shared(out1, concat_shape, true)); + out2 = context.mark_node(std::make_shared(out2, concat_shape, true)); + return {std::move(out1), std::move(out2)}; + } else { + res = context.mark_node(std::make_shared(res, concat_shape, true)); + return {res}; + } } }; -OutputVector translate_max_poolnd_fx(const NodeContext& context) { - auto output = translate_max_poolnd(context); +OutputVector translate_max_pool1d(const NodeContext& context) { + return translate_max_pool_base(context, 1); +}; + +OutputVector translate_max_pool2d(const NodeContext& context) { + return translate_max_pool_base(context, 2); +}; + +OutputVector translate_max_pool3d(const NodeContext& context) { + return translate_max_pool_base(context, 3); +}; + +OutputVector translate_max_pool2d_fx(const NodeContext& context) { + auto output = translate_max_pool2d(context); return {context.mark_node(make_list_construct(output))}; -} +}; + +OutputVector translate_max_pool3d_fx(const NodeContext& context) { + auto output = translate_max_pool3d(context); + return {context.mark_node(make_list_construct(output))}; +}; } // namespace op } // namespace pytorch diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index b68c182e17ee2a..5d63a6dc037b14 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -42,7 +42,9 @@ OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argmin); OP_CONVERTER(translate_as_strided); OP_CONVERTER(translate_as_tensor); -OP_CONVERTER(translate_avg_poolnd); +OP_CONVERTER(translate_avg_pool1d); +OP_CONVERTER(translate_avg_pool2d); +OP_CONVERTER(translate_avg_pool3d); OP_CONVERTER(translate_bool); OP_CONVERTER(translate_batch_norm); OP_CONVERTER(translate_bitwise_and); @@ -139,7 +141,9 @@ OP_CONVERTER(translate_masked_scatter); OP_CONVERTER(translate_masked_select); OP_CONVERTER(translate_max); OP_CONVERTER(translate_maximum); -OP_CONVERTER(translate_max_poolnd); +OP_CONVERTER(translate_max_pool1d); +OP_CONVERTER(translate_max_pool2d); +OP_CONVERTER(translate_max_pool3d); OP_CONVERTER(translate_mean); OP_CONVERTER(translate_meshgrid); OP_CONVERTER(translate_min); @@ -281,7 +285,8 @@ OP_CONVERTER(translate_leaky_relu_fx); OP_CONVERTER(translate_log_sigmoid_fx); OP_CONVERTER(translate_log_softmax_fx); OP_CONVERTER(translate_max_dim_fx); -OP_CONVERTER(translate_max_poolnd_fx); +OP_CONVERTER(translate_max_pool2d_fx); +OP_CONVERTER(translate_max_pool3d_fx); OP_CONVERTER(translate_mean_fx); OP_CONVERTER(translate_min_dim_fx); OP_CONVERTER(translate_new_full_fx); @@ -380,9 +385,9 @@ const std::unordered_map get_supported_ops_ts() { {"aten::atanh", op::optional_out, 1>}, {"aten::atanh_", op::inplace_op>}, - {"aten::avg_pool1d", op::quantizable_op}, - {"aten::avg_pool2d", op::quantizable_op}, - {"aten::avg_pool3d", op::quantizable_op}, + {"aten::avg_pool1d", op::quantizable_op}, + {"aten::avg_pool2d", op::quantizable_op}, + {"aten::avg_pool3d", op::quantizable_op}, {"aten::baddbmm", op::translate_addmm}, {"aten::batch_norm", op::translate_batch_norm}, {"aten::bitwise_and", op::translate_bitwise_and}, @@ -534,12 +539,12 @@ const std::unordered_map get_supported_ops_ts() { {"aten::max", op::translate_max}, {"aten::mv", op::translate_1to1_match_2_inputs}, {"aten::maximum", op::translate_maximum}, - {"aten::max_pool1d", op::quantizable_op}, - {"aten::max_pool1d_with_indices", op::quantizable_op}, - {"aten::max_pool2d", op::quantizable_op}, - {"aten::max_pool2d_with_indices", op::quantizable_op}, - {"aten::max_pool3d", op::quantizable_op}, - {"aten::max_pool3d_with_indices", op::quantizable_op}, + {"aten::max_pool1d", op::quantizable_op}, + {"aten::max_pool1d_with_indices", op::quantizable_op}, + {"aten::max_pool2d", op::quantizable_op}, + {"aten::max_pool2d_with_indices", op::quantizable_op}, + {"aten::max_pool3d", op::quantizable_op}, + {"aten::max_pool3d_with_indices", op::quantizable_op}, {"aten::mean", op::quantizable_op}, {"aten::meshgrid", op::translate_meshgrid}, {"aten::min", op::translate_min}, @@ -771,8 +776,8 @@ const std::unordered_map get_supported_ops_fx() { {"aten.asinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.atanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, - {"aten.avg_pool2d.default", op::translate_avg_poolnd}, - {"aten.avg_pool3d.default", op::translate_avg_poolnd}, + {"aten.avg_pool2d.default", op::translate_avg_pool2d}, + {"aten.avg_pool3d.default", op::translate_avg_pool3d}, {"aten.baddbmm.default", op::translate_addmm_fx}, {"aten.bitwise_and.Scalar", op::translate_bitwise_and}, {"aten.bitwise_and.Tensor", op::translate_bitwise_and}, @@ -870,8 +875,8 @@ const std::unordered_map get_supported_ops_fx() { {"aten.masked_fill_.Tensor", op::inplace_op}, {"aten.max.default", op::translate_max}, {"aten.max.dim", op::translate_max_dim_fx}, - {"aten.max_pool2d_with_indices.default", op::translate_max_poolnd_fx}, - {"aten.max_pool3d_with_indices.default", op::translate_max_poolnd_fx}, + {"aten.max_pool2d_with_indices.default", op::translate_max_pool2d_fx}, + {"aten.max_pool3d_with_indices.default", op::translate_max_pool3d_fx}, {"aten.maximum.default", op::translate_maximum}, {"aten.mean.default", op::translate_mean_fx}, {"aten.mean.dim", op::translate_mean_fx}, diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py index 32c8a973cb1c92..1924df2484f177 100644 --- a/tests/layer_tests/pytorch_tests/test_pooling.py +++ b/tests/layer_tests/pytorch_tests/test_pooling.py @@ -36,10 +36,8 @@ class TestPooling(PytorchLayerTest): - def _prepare_input(self, ndim=4): - import numpy as np - shape = (1, 3, 15, 15, 15) - return (np.random.randn(*shape[:ndim]).astype(np.float32),) + def _prepare_input(self): + return (self.input_tensor,) def create_model(self, op_type, kernel_size, stride, padding, dilation=1, ceil_mode=True, count_include_pad=True, dtype=torch.float32): class aten_avg_pooling_base(torch.nn.Module): @@ -129,121 +127,147 @@ def forward(self, x): return aten_pooling(), ref_net, f"aten::{op_type}" + @pytest.mark.parametrize("input_shape", [[1, 3, 15], [3, 15]]) @pytest.mark.parametrize("params", d1_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("count_include_pad", [True, False]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): + def test_avg_pool1d(self, input_shape, params, ceil_mode, count_include_pad, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("avg_pool1d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, trace_model=True, - dynamic_shapes=False) + ie_device, precision, ir_version, trace_model=True, + dynamic_shapes=is_dynamic_shapes) - @pytest.mark.parametrize( - "params", d2_params) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15], [3, 15, 15]]) + @pytest.mark.parametrize("params", d2_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("count_include_pad", [True, False]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): + def test_avg_pool2d(self, input_shape, params, ceil_mode, count_include_pad, ie_device, precision, ir_version, is_dynamic_shapes): if ceil_mode and count_include_pad and np.array_equal(np.array(params["kernel_size"]), np.array([8, 8])): pytest.xfail("Ticket - 150292") + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("avg_pool2d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), - ie_device, precision, ir_version, trace_model=True, freeze_model=False, dynamic_shapes=False) + ie_device, precision, ir_version, trace_model=True, freeze_model=False, dynamic_shapes=is_dynamic_shapes) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15, 15], [3, 15, 15, 15]]) @pytest.mark.parametrize("params", d3_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("count_include_pad", [True, False]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): + def test_avg_pool3d(self, input_shape, params, ceil_mode, count_include_pad, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("avg_pool3d", **params, ceil_mode=ceil_mode, count_include_pad=count_include_pad), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, trace_model=True, - dynamic_shapes=False) + ie_device, precision, ir_version, trace_model=True, + dynamic_shapes=is_dynamic_shapes) + @pytest.mark.parametrize("input_shape", [[1, 3, 15], [3, 15]]) @pytest.mark.parametrize("params", d1_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): + def test_max_pool1d(self, input_shape, params, ceil_mode, dilation, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool1d", **params, ceil_mode=ceil_mode, dilation=dilation), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, dynamic_shapes=False) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15], [3, 15, 15]]) @pytest.mark.parametrize("params", d2_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.parametrize("dtype", [torch.float32, torch.int32]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool2d(self, params, ceil_mode, dilation, dtype, ie_device, precision, ir_version): + def test_max_pool2d(self, input_shape, params, ceil_mode, dilation, dtype, ie_device, precision, ir_version, is_dynamic_shapes): to_trace = False if params["stride"] == []: to_trace = True + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool2d", **params, ceil_mode=ceil_mode, dilation=dilation, dtype=dtype), - ie_device, precision, ir_version, dynamic_shapes=False, trace_model=to_trace) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes, trace_model=to_trace) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15, 15], [3, 15, 15, 15]]) @pytest.mark.parametrize("params", d3_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool3d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): + def test_max_pool3d(self, input_shape, params, ceil_mode, dilation, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool3d", **params, ceil_mode=ceil_mode, dilation=dilation), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, dynamic_shapes=False) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes) + @pytest.mark.parametrize("input_shape", [[1, 3, 15], [3, 15]]) @pytest.mark.parametrize("params", d1_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool1d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): + def test_max_pool1d_indices(self, input_shape, params, ceil_mode, dilation, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool1d_with_indices", **params, ceil_mode=ceil_mode, dilation=dilation), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 3}, dynamic_shapes=False) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15], [3, 15, 15]]) @pytest.mark.parametrize("params", d2_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool2d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): + def test_max_pool2d_indices(self, input_shape, params, ceil_mode, dilation, ie_device, precision, ir_version, is_dynamic_shapes): to_trace = False if params["stride"] == []: to_trace = True + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool2d_with_indices", **params, ceil_mode=ceil_mode, dilation=dilation), - ie_device, precision, ir_version, dynamic_shapes=False, trace_model=to_trace) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes, trace_model=to_trace) + @pytest.mark.parametrize("input_shape", [[1, 3, 15, 15, 15], [3, 15, 15, 15]]) @pytest.mark.parametrize("params", d3_params) @pytest.mark.parametrize("ceil_mode", [True, False]) @pytest.mark.parametrize("dilation", [1, 2]) + @pytest.mark.parametrize("is_dynamic_shapes", [True, False]) @pytest.mark.nightly @pytest.mark.precommit @pytest.mark.precommit_fx_backend @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_max_pool3d_indices(self, params, ceil_mode, dilation, ie_device, precision, ir_version): + def test_max_pool3d_indices(self, input_shape, params, ceil_mode, dilation, ie_device, precision, ir_version, is_dynamic_shapes): + self.input_tensor = np.random.randn(*input_shape).astype(np.float32) self._test(*self.create_model("max_pool3d_with_indices", **params, ceil_mode=ceil_mode, dilation=dilation), - ie_device, precision, ir_version, kwargs_to_prepare_input={'ndim': 5}, dynamic_shapes=False) + ie_device, precision, ir_version, dynamic_shapes=is_dynamic_shapes) From 212be8e18b57684bebfd17c2290098008a815c46 Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Fri, 18 Oct 2024 01:13:00 +0100 Subject: [PATCH 057/112] NPUW: Dynamic Spatial (#27104) ### Details: - Introduce a new SPATIAL pipeline which is a shortcut to PIPELINE:REG+ISOLATE:COMPUTE+SPATIAL:ON; - Refactor some code re: spatial regions handling in models and requests; - Finally, introduce a dyn dispatch over the spatial range - Based on runtime-detected features - Can be disabled to measure full range performance ### Tickets: - E-143572 --- .../al/include/intel_npu/al/config/npuw.hpp | 3 +- .../al/include/npuw_private_properties.hpp | 12 +- .../intel_npu/src/al/src/config/npuw.cpp | 1 + .../src/plugin/npuw/compiled_model.cpp | 17 +-- .../src/plugin/npuw/compiled_model.hpp | 18 +-- .../plugin/npuw/just_sync_infer_request.cpp | 35 +++++- .../plugin/npuw/just_sync_infer_request.hpp | 5 +- .../npuw/partitioning/online/compiler.cpp | 25 ++++- .../plugin/npuw/partitioning/partitioning.cpp | 2 +- .../plugin/npuw/partitioning/partitioning.hpp | 16 +-- .../intel_npu/src/plugin/npuw/spatial.cpp | 44 ++++++++ .../intel_npu/src/plugin/npuw/spatial.hpp | 106 ++++++++++++++++++ .../intel_npu/src/plugin/npuw/util.cpp | 37 ++++++ .../intel_npu/src/plugin/npuw/util.hpp | 4 + 14 files changed, 270 insertions(+), 55 deletions(-) create mode 100644 src/plugins/intel_npu/src/plugin/npuw/spatial.cpp create mode 100644 src/plugins/intel_npu/src/plugin/npuw/spatial.hpp diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp index fef9470545482a..b0ecf3cd45d152 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp @@ -45,7 +45,8 @@ DEFINE_OPT(NPUW_DQ, bool, false, npuw::partitioning::dyn_quant, CompileTime); DEFINE_OPT(NPUW_PMM, std::string, "2", npuw::partitioning::par_matmul_merge_dims, CompileTime); DEFINE_OPT(NPUW_HOST_GATHER, bool, true, npuw::partitioning::host_gather, CompileTime); DEFINE_OPT(NPUW_SPATIAL, bool, false, npuw::partitioning::spatial, CompileTime); -DEFINE_OPT(NPUW_SPATIAL_NWAY, std::size_t, 64, npuw::partitioning::spatial_nway, CompileTime); +DEFINE_OPT(NPUW_SPATIAL_NWAY, std::size_t, 128, npuw::partitioning::spatial_nway, CompileTime); +DEFINE_OPT(NPUW_SPATIAL_DYN, bool, true, npuw::partitioning::spatial_dyn, CompileTime); DEFINE_OPT(NPUW_DCOFF_TYPE, std::string, "", npuw::partitioning::dcoff_type, CompileTime); DEFINE_OPT(NPUW_DCOFF_SCALE, bool, false, npuw::partitioning::dcoff_with_scale, CompileTime); DEFINE_OPT(NPUW_FUNCALL_FOR_ALL, bool, false, npuw::partitioning::funcall_for_all, CompileTime); diff --git a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp index 059977ee47a063..834f90db9cf9ef 100644 --- a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp @@ -67,7 +67,7 @@ namespace online { * @brief * Type: std::string. * Specify which partitioning pipeline to run. - * Possible values: "NONE", "INIT", "JUST", "REP", "REG", "COMPUTE". + * Possible values: "NONE", "INIT", "JUST", "REP", "REG", "COMPUTE", "SPATIAL". * Default value: "REG". */ static constexpr ov::Property pipeline{"NPUW_ONLINE_PIPELINE"}; @@ -206,10 +206,18 @@ static constexpr ov::Property spatial{"NPUW_SPATIAL"}; * @brief * Type: std::size_t. * Submission size for the spatial execution. - * Default value: 64 + * Default value: 128 */ static constexpr ov::Property spatial_nway{"NPUW_SPATIAL_NWAY"}; +/** + * @brief + * Type: boolean. + * Enable dynamic submission for spatial subgraphs. Requires SPATIAL pipeline to be selected. + * Default value: true + */ +static constexpr ov::Property spatial_dyn{"NPUW_SPATIAL_DYN"}; + /** * @brief * Type: boolean diff --git a/src/plugins/intel_npu/src/al/src/config/npuw.cpp b/src/plugins/intel_npu/src/al/src/config/npuw.cpp index 4aa4a88b9b5ba7..b5180633e4357e 100644 --- a/src/plugins/intel_npu/src/al/src/config/npuw.cpp +++ b/src/plugins/intel_npu/src/al/src/config/npuw.cpp @@ -30,6 +30,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); desc.add(); desc.add(); desc.add(); diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index a312a806cac4bc..6ae61fc42410b8 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -283,18 +283,8 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, // Fill in the spatial information, if it is present if (fcn_template._spatial) { - using S = CompiledModelDesc::Spatial; - S s; - s.range = fcn_template._spatial->_range; - s.nway = fcn_template._spatial->_slice; - s.out_dim = fcn_template._spatial->_out_dim; - s.nway_iters = s.range / s.nway; - s.tail_size = s.range % s.nway; - for (auto&& input : fcn_template._spatial->_inputs) { - std::size_t p_idx = fcn_template._model->get_parameter_index(input.param); - s.params.push_back(S::Param{p_idx, input.dim}); - } - m_compiled_submodels[id].spatial = std::move(s); + m_compiled_submodels[id].spatial = + compiled::Spatial(fcn_template._spatial.value(), fcn_template._model); } LOG_INFO("Subgraph[" << id << "] is a function body for " << subgraph._funcall); } else { @@ -918,7 +908,8 @@ void ov::npuw::CompiledModel::implement_properties() { BIND(npuw::partitioning::dyn_quant, NPUW_DQ), BIND(npuw::partitioning::par_matmul_merge_dims, NPUW_PMM), BIND(npuw::partitioning::spatial, NPUW_SPATIAL), - BIND(npuw::partitioning::spatial, NPUW_SPATIAL_NWAY), + BIND(npuw::partitioning::spatial_nway, NPUW_SPATIAL_NWAY), + BIND(npuw::partitioning::spatial_dyn, NPUW_SPATIAL_DYN), BIND(npuw::partitioning::host_gather, NPUW_HOST_GATHER), BIND(npuw::partitioning::funcall_for_all, NPUW_FUNCALL_FOR_ALL), BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE), diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp index 038c1bb176b029..7a02ae1c8a485a 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -13,6 +13,7 @@ #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/so_ptr.hpp" #include "partitioning/partitioning.hpp" +#include "spatial.hpp" #include "weights_bank.hpp" namespace intel_npu { @@ -123,20 +124,7 @@ class CompiledModel : public ov::ICompiledModel { std::optional replaced_by; Subgraph::Gather host_gather; - struct Spatial { - struct Param { - std::size_t idx; - std::size_t dim; - }; - std::vector params; - std::size_t range = 0u; - std::size_t nway = 0u; - std::size_t out_dim = 0u; - - std::size_t nway_iters = 0u; - std::size_t tail_size = 0u; - }; - std::optional spatial; + std::optional spatial; // FIXME: This is a 1:1 copy of the ov::npuw::Subgraph structure // w.r.t. function calls diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp index c4e2c3ee98b676..bac69f0a3b0d36 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp @@ -199,6 +199,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrm_cfg.get<::intel_npu::NPUW_SPATIAL_DYN>()) { + LOG_VERB("Finding spatial features..."); + LOG_BLOCK(); + m_spatial_selector = runtime::spatial::AttentionMask::find(*this); + if (!m_spatial_selector) { + LOG_WARN("Spatial capability is enabled, but no run-time features were found."); + // Fallback selector to ALL + m_spatial_selector.reset(new runtime::spatial::All()); + } + } else { + // Just force selector to ALL + m_spatial_selector.reset(new runtime::spatial::All()); + } + LOG_VERB("Done"); + } } void ov::npuw::JustInferRequest::connect_subrequests() { @@ -506,6 +527,11 @@ void ov::npuw::JustInferRequest::prepare_for_infer() { LOG_DEBUG("Pre-initializing weights for subgraph[" << id << "]"); unpack_closure(id, m_subrequests[id]); } + + // Adjust spatial input range, if supported + if (m_spatial_selector) { + m_spatial_selector->prepare(); + } LOG_DEBUG("Done"); } @@ -915,6 +941,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) { // must be prepared in the m_spatial_io at this point const auto& spatial = comp_model_desc.spatial.value(); const auto num_outputs = comp_model_desc.compiled_model->outputs().size(); + NPUW_ASSERT(m_spatial_selector); // Create a sparse vector with full input sizes. // For the access simplicity, its size is aligned with function's @@ -940,6 +967,10 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) { std::size_t offset = 0u; for (std::size_t i = 0u; i < spatial.nway_iters; i++, offset += spatial.nway) { + if (!m_spatial_selector->need_submit(offset, spatial.nway)) { + continue; + } + // Collect spatial inputs for this offset for (auto&& param : spatial.params) { const auto& iport = comp_model_desc.compiled_model->inputs()[param.idx]; @@ -963,7 +994,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) { } // for(full_nway_times) // Now process the tail, if required - if (spatial.tail_size) { + if (spatial.tail_size && m_spatial_selector->need_submit(offset, spatial.tail_size)) { // Copy the sub-ranges to spatial inputs // NOTE: tails buffers are read from/written to at 0th offset! for (auto&& param : spatial.params) { @@ -1085,7 +1116,7 @@ ov::npuw::TensorPtr ov::npuw::JustInferRequest::allocMem(const ov::element::Type return ov::get_tensor_impl(ov::Tensor(type, shape)); } - std::lock_guard guard(m_alloc_mutex); + // Protect access to shared context(s) - at least among infer requests auto remote_ctx = m_npuw_model->get_plugin()->get_core()->get_default_context(device)._ptr; auto remote_tensor = remote_ctx->create_host_tensor(type, shape); return ov::get_tensor_impl(ov::make_tensor(remote_tensor)); diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp index 88838d8b39d75f..fb105fd7629c7c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp @@ -15,6 +15,7 @@ #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/tensor.hpp" +#include "spatial.hpp" namespace ov { namespace npuw { @@ -148,8 +149,10 @@ class JustInferRequest final : public IBaseInferRequest { }; std::vector m_subrequests_gio; - std::mutex m_alloc_mutex; std::unordered_set m_input_allocated; + + // Represents spatial run-time info + runtime::spatial::Selector::Ptr m_spatial_selector; }; } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index a66159e6b4d1b7..a06a6f3bd1ced5 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -267,12 +267,13 @@ void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) { // Interface to get online partitioning from the model class Compiler { enum class Pipeline { - NONE, // Partitioning will consist of a single group with all the Ops - INIT, // Initialize only. The hardest mode, every group has just 1 layer inside - JUST, // "justParitioning" - combination of LHF + Remnants - REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants - REG, // Regularized repeated blocks pipeline -same as REP, but with some strong hints first - COMPUTE // Separates non-foldable compute subgraphs from the model based on predefined rules + REP + NONE, // Partitioning will consist of a single group with all the Ops + INIT, // Initialize only. The hardest mode, every group has just 1 layer inside + JUST, // "justParitioning" - combination of LHF + Remnants + REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants + REG, // Regularized repeated blocks pipeline - same as REP, but with some strong hints first + COMPUTE, // Separates non-foldable compute subgraphs from the model based on predefined rules + REP + SPATIAL // Similar to COMPUTE but allows folding }; template @@ -299,6 +300,8 @@ class Compiler { return Pipeline::REG; } else if (pipeline_opt == "COMPUTE") { return Pipeline::COMPUTE; + } else if (pipeline_opt == "SPATIAL") { + return Pipeline::SPATIAL; } else { LOG_WARN("Unknown partitioning compiler pipeline " << pipeline_opt << ", switching to REP"); return Pipeline::REP; @@ -428,6 +431,16 @@ class Compiler { m_snapshot->setCtx(ctx); rep(); break; + case Pipeline::SPATIAL: + warn_unused<::intel_npu::NPUW_ONLINE_ISOLATE>(); + m_cfg.update(::intel_npu::Config::ConfigMap{{std::string(::intel_npu::NPUW_SPATIAL::key()), "YES"}}); + + // Manually set predefined isolates and nofolds then do rep() pipeline + // FIXME: initialize via a dedicated function instead of parsing + ctx.isolates = detail::getIsolates(detail::ISOL_PRESETS.at("COMPUTE")); + m_snapshot->setCtx(ctx); + rep(); + break; } LOG_DEBUG("Online partitioning: group sizes after compilation:"); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 5e3f12fedf68a6..f12350e8952eaa 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -1605,7 +1605,7 @@ void Partitioner::identifySpatialRange(ov::npuw::Function& f) { const auto& f_params = f._model->get_parameters(); NPUW_ASSERT(f_params.size() > 0); - using S = ov::npuw::Function::Spatial; + using S = ov::npuw::function::Spatial; S spatial; spatial._range = f_result_0_shape[1]; spatial._out_dim = 1; // the only case we're looking into now diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp index f60c6eff62e96b..5343ba26e6e5aa 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp @@ -10,6 +10,7 @@ #include #include "../lazy_tensor.hpp" +#include "../spatial.hpp" #include "intel_npu/al/config/config.hpp" #include "openvino/openvino.hpp" @@ -70,20 +71,7 @@ struct Function { // NOTE: it seems it is required only for `matchRepeatedSubgraphs()' std::map, std::size_t> _param_mapping; - // Spatial information. So far assume spatial execution in 1 dimension only - struct Spatial { - using PPtr = std::shared_ptr; - struct Param { - PPtr param; - std::size_t dim; - }; - std::size_t _range = 0u; // Range over which spatial execution is organized, e.g. 1024 - std::size_t _slice = 0u; // A submission size for a single execution, e.g. 128 - std::size_t _out_dim = 0u; // Assume it is the same dim for all Results - std::vector _inputs; - }; - using SpatialOpt = std::optional; - SpatialOpt _spatial; + std::optional _spatial; }; struct Group { diff --git a/src/plugins/intel_npu/src/plugin/npuw/spatial.cpp b/src/plugins/intel_npu/src/plugin/npuw/spatial.cpp new file mode 100644 index 00000000000000..a7ea56dd3ff910 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/npuw/spatial.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "spatial.hpp" + +#include "util.hpp" + +ov::npuw::runtime::spatial::AttentionMask::AttentionMask(std::size_t param_idx, const ov::ISyncInferRequest& rq) + : m_attn_mask_param_idx(param_idx), + m_rq(rq) {} + +ov::npuw::runtime::spatial::Selector::Ptr ov::npuw::runtime::spatial::AttentionMask::find( + const ov::ISyncInferRequest& rq) { + auto is_attn_mask = [](const ov::Output& p) { + const auto shape = p.get_shape(); + return p.get_node()->get_friendly_name() == "attention_mask" && + (shape.size() == 1 || (shape.size() == 2 && shape[0] == 1)); + }; + + const auto& inputs = rq.get_inputs(); + auto attn_mask_iter = std::find_if(inputs.begin(), inputs.end(), is_attn_mask); + if (attn_mask_iter != inputs.end()) { + const auto param_idx = std::distance(inputs.begin(), attn_mask_iter); + return Selector::Ptr{new AttentionMask(param_idx, rq)}; + } + return Selector::Ptr{}; +} + +void ov::npuw::runtime::spatial::AttentionMask::prepare() { + // Find the current valid range for this attention mask + // Here we have the following (very strong) assumption: + // The attention mask is dense (that is, has zero or one continuous interest region) + const auto& iport = m_rq.get_compiled_model()->inputs()[m_attn_mask_param_idx]; + std::tie(m_valid_range_begin, m_valid_range_end) = ov::npuw::util::validMaskRange(m_rq.get_tensor(iport)); +} + +bool ov::npuw::runtime::spatial::AttentionMask::need_submit(std::size_t offset, std::size_t len) const { + // We don't submit this request if + // - it is completely below the valid range + // - it is completely above the valid range + // in all other cases, we do + return !(offset + len < m_valid_range_begin || offset >= m_valid_range_end); +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/spatial.hpp b/src/plugins/intel_npu/src/plugin/npuw/spatial.hpp new file mode 100644 index 00000000000000..fce2f63db4e807 --- /dev/null +++ b/src/plugins/intel_npu/src/plugin/npuw/spatial.hpp @@ -0,0 +1,106 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/openvino.hpp" +#include "openvino/runtime/icompiled_model.hpp" +#include "openvino/runtime/isync_infer_request.hpp" + +namespace ov { +namespace npuw { + +namespace function { + +// Partition-time spatial information. So far assume spatial execution in 1 dimension only +// Defined at this level to be aligned with other partitioning entities (but needs to be moved) +struct Spatial { + using PPtr = std::shared_ptr; + struct Param { + PPtr param; + std::size_t dim; + }; + std::size_t _range = 0u; // Range over which spatial execution is organized, e.g. 1024 + std::size_t _slice = 0u; // A submission size for a single execution, e.g. 128 + std::size_t _out_dim = 0u; // Assume it is the same dim for all Results + std::vector _inputs; +}; + +} // namespace function + +namespace compiled { + +// Compile-time spatial information. Not much different from the above +struct Spatial { + struct Param { + std::size_t idx; // function input index for this spatial parameter + std::size_t dim; + }; + std::vector params; + std::size_t range = 0u; // NB: duplication of the above + std::size_t nway = 0u; // NB: duplication of the above + std::size_t out_dim = 0u; // NB: duplication of the above + + std::size_t nway_iters = 0u; + std::size_t tail_size = 0u; + + Spatial(const function::Spatial& s, const std::shared_ptr& m) + : range(s._range), + nway(s._slice), + out_dim(s._out_dim), + nway_iters(range / nway), + tail_size(range % nway) { + for (auto&& input : s._inputs) { + std::size_t p_idx = m->get_parameter_index(input.param); + params.push_back(Param{p_idx, input.dim}); + } + } +}; + +} // namespace compiled + +namespace runtime { +namespace spatial { + +// A base class to decide the work-scope from some feature +class Selector { +public: + using Ptr = std::shared_ptr; + virtual ~Selector() = default; + virtual void prepare() = 0; + virtual bool need_submit(std::size_t offset, std::size_t len) const = 0; +}; + +// No dynamic dispatch - just run over the whole range +class All final : public Selector { + void prepare() override {} + bool need_submit(std::size_t, std::size_t) const override { + return true; + } +}; + +// Define work scope based on attention mask +class AttentionMask final : public Selector { + std::size_t m_attn_mask_param_idx = 0u; + std::size_t m_valid_range_begin = 0u; + std::size_t m_valid_range_end = 0u; + + const ov::ISyncInferRequest& m_rq; + + AttentionMask(std::size_t param_idx, const ov::ISyncInferRequest& rq); + void prepare() override; + bool need_submit(std::size_t offset, std::size_t len) const override; + +public: + static Selector::Ptr find(const ov::ISyncInferRequest& rq); +}; + +} // namespace spatial +} // namespace runtime + +} // namespace npuw +} // namespace ov diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index 1de8f4de4bdb4f..da62d040c06095 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -518,3 +518,40 @@ ov::Tensor ov::npuw::util::concat(const std::vector& tt, std::size_t NPUW_ASSERT(false && "Not supported yet"); } } + +namespace { +template +ov::npuw::util::range_1d validMaskRange(const T* data, std::size_t len) { + using R = ov::npuw::util::range_1d; + std::size_t range_begin = 0u; + bool was_set = false; + + for (std::size_t idx = 0u; idx < len; idx++) { + const bool is_set = static_cast(data[idx] > 0); + + if (is_set && !was_set) { + was_set = true; + range_begin = idx; + } else if (!is_set && was_set) { + return R{range_begin, idx}; + } + } + return was_set ? R{range_begin, len} : R{0u, 0u}; +} +} // namespace + +ov::npuw::util::range_1d ov::npuw::util::validMaskRange(const ov::SoPtr& src) { + NPUW_ASSERT(src->is_continuous()); + + namespace ove = ov::element; +#define HNDL(t, T) \ + case ove::t: \ + return ::validMaskRange(static_cast(src->data()), src->get_size()); + switch (src->get_element_type()) { + HNDL(i64, int64_t); + HNDL(i32, int32_t); + default: + OPENVINO_THROW("Unsupported type ", src->get_element_type()); + } +#undef HNDL +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index 02d2c8c097811e..a826d00e032977 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -64,6 +64,10 @@ ov::Tensor transpose(const ov::Tensor& t); ov::Tensor permute(const ov::Tensor& t, const std::vector& axes); ov::Tensor concat(const std::vector& tt, std::size_t axis); +// Start is inclusive, end is exclusive +using range_1d = std::pair; +range_1d validMaskRange(const ov::SoPtr& t); + namespace at { template struct Impl { From ac7cb8b62065adc18ed17a81b668f87ad0ca48c3 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 18 Oct 2024 09:14:47 +0400 Subject: [PATCH 058/112] [GPU] Save use_onednn attribute in the blob (#27097) ### Details: - This is needed to have correct runtime impl selection of imported model ### Tickets: - *CVS-154891* --- src/plugins/intel_gpu/src/graph/program.cpp | 4 ++ .../tests/unit/test_cases/gemm_gpu_test.cpp | 51 ++++++++++++++----- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 3a3793e8ad764d..d4461b8aad9107 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1776,6 +1776,7 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const { ob << _is_body_program; ob << _can_be_optimized; + ob << get_layout_optimizer().get_optimization_attributes().use_onednn_impls; processing_order.save(ob); { @@ -1895,6 +1896,9 @@ void program::load(cldnn::BinaryInputBuffer& ib) { ib >> _is_body_program; ib >> _can_be_optimized; + int32_t use_onednn_attr = 0; + ib >> use_onednn_attr; + get_layout_optimizer().set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, use_onednn_attr); _loaded_from_cache = true; processing_order.load(ib, *this); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp index 51f66f3abb7bfe..3b41f44050e527 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp @@ -15,6 +15,7 @@ #include "intel_gpu/runtime/compilation_context.hpp" #include "gemm_inst.h" #include "permute_inst.h" +#include "layout_optimizer.h" #include #include @@ -625,7 +626,7 @@ class gemm_gpu_tests: public ::testing::Test { topology topology; topology.add(input_layout("input1", in1_layout), input_layout("input2", in2_layout), - gemm("gemm_ref", { input_info("input1"), input_info("input2") }, data_types::f16, + gemm("gemm_ref", { input_info("input1"), input_info("input2") }, data_types::f16, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 1, 2, 3}) ); @@ -652,7 +653,7 @@ class gemm_gpu_tests: public ::testing::Test { topology topology; topology.add(input_layout("input1", in1_layout), input_layout("input2", in2_layout), - gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f16, + gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f16, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 1, 2, 3}) ); @@ -2789,7 +2790,7 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_onednn_ndims, ::testing::ValuesIn(std::v class gemm_onednn: public ::testing::Test { public: - void test_impl_replacement_with_cldnn() { + void test_impl_replacement_with_cldnn(bool is_caching_test) { auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad) @@ -2828,16 +2829,34 @@ class gemm_onednn: public ::testing::Test { ov::intel_gpu::optimize_data(true), ov::intel_gpu::allow_new_shape_infer(true) }; - network network(engine, topology, cfg); - network.set_input_data("input1", input1); - network.set_input_data("input2", input2); + cldnn::network::ptr network; + if (is_caching_test) { + membuf mem_buf; + { + std::ostream out_mem(&mem_buf); + BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); + ob.set_stream(get_test_stream_ptr().get()); + program::build_program(engine, topology, cfg)->save(ob); + } + { + std::istream in_mem(&mem_buf); + BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine); + auto imported_prog = std::make_shared(engine, cfg); + imported_prog->load(ib); + network = std::make_shared(imported_prog); + } + } else { + network = std::make_shared(engine, topology, cfg); + } + network->set_input_data("input1", input1); + network->set_input_data("input2", input2); - auto inst = network.get_primitive("gemm"); + auto inst = network->get_primitive("gemm"); auto impl = inst->get_impl(); ASSERT_TRUE(impl != nullptr); ASSERT_TRUE(impl->is_dynamic()); - auto outputs = network.execute(); + auto outputs = network->execute(); auto output = outputs.at("gemm").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -2847,12 +2866,15 @@ class gemm_onednn: public ::testing::Test { ASSERT_FLOAT_EQ(output_ptr[i], out_data[i]); } - // WA: Call wait_all() to wait for all queued kernels compilation finish - network.get_program()->get_compilation_context().wait_all(); + // Call wait_all() to wait for all queued kernels compilation finish + network->get_program()->get_compilation_context().wait_all(); + + auto& lo = network->get_program()->get_layout_optimizer(); + ASSERT_TRUE(lo.get_optimization_attributes().use_onednn_impls); // Check if OneDNN's impl is used for the next execute() call - network.execute(); - inst = network.get_primitive("gemm"); + network->execute(); + inst = network->get_primitive("gemm"); impl = inst->get_impl(); ASSERT_TRUE(impl != nullptr); ASSERT_FALSE(impl->is_dynamic()); @@ -3214,7 +3236,10 @@ class gemm_onednn: public ::testing::Test { }; TEST_F(gemm_onednn, impl_replacement_with_cldnn) { - this->test_impl_replacement_with_cldnn(); + this->test_impl_replacement_with_cldnn(false); +} +TEST_F(gemm_onednn, impl_replacement_with_cldnn_cached) { + this->test_impl_replacement_with_cldnn(true); } // Check gemm_onednn transpose_format() can accept transpose white list format (byfx/bxfy) From 4bf52c13d005649e9583b426624613eadebf89cf Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 18 Oct 2024 09:16:43 +0400 Subject: [PATCH 059/112] [GPU] Fix USMHost tensor sharing between models from different Cores (#27105) ### Details: - Treat USMHost tensor from another context as non-sharable --- .../src/plugin/sync_infer_request.cpp | 4 +-- .../functional/behavior/infer_request.cpp | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 88d69dcd3e47b3..58e99e037fb931 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -547,7 +547,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe bool can_share = !is_convert_required(user_tensor->get_element_type(), element_type) && can_use_usm_host(engine) && !generic_remote_tensor; - if (usm_host_tensor && can_share) { + if (usm_host_tensor && can_share && m_context == usm_host_tensor->get_impl()->get_context()) { return { usm_host_tensor->get_impl(), user_tensor_wrapper.owner }; } else if (usm_host_raw_ptr && can_share) { return { std::make_shared(m_context, @@ -727,7 +727,7 @@ std::vector SyncInferRequest::prepare_input(const std::string auto usm_host_ptr = std::dynamic_pointer_cast(user_tensor); bool is_generic_remote = iremote_tensor_ptr != nullptr && remote_tensor_impl_ptr == nullptr; bool is_remote_tensor_impl = remote_tensor_impl_ptr != nullptr; - bool is_usm_host_tensor = usm_host_ptr != nullptr; + bool is_usm_host_tensor = usm_host_ptr != nullptr && usm_host_ptr->get_impl()->get_context() == m_context; GPU_DEBUG_TRACE_DETAIL << "Prepare input for " << internal_name << " (is_remote_tensor_impl ? " << is_remote_tensor_impl diff --git a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp index d82384f1eb8366..201c91fe9a60c3 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp @@ -6,7 +6,11 @@ #include "common_test_utils/test_common.hpp" #include "common_test_utils/common_utils.hpp" #include "common_test_utils/node_builders/activation.hpp" +#include "openvino/core/partial_shape.hpp" #include "openvino/core/preprocess/pre_post_process.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/relu.hpp" +#include "openvino/op/result.hpp" #include "openvino/runtime/core.hpp" #include "transformations/utils/utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" @@ -369,4 +373,26 @@ TEST(TensorTest, smoke_outputTensorShapesForDynamicInput) { OV_ASSERT_NO_THROW(inf_req.infer()); ASSERT_EQ(inf_req.get_output_tensor().get_shape(), output3_shape); } + +TEST(TensorTest, smoke_canShareTensorIfModelsFromDifferentCores) { + auto core1 = ov::Core(); + auto core2 = ov::Core(); + + auto param = std::make_shared(ov::element::f32, ov::PartialShape{4, 8}); + auto relu = std::make_shared(param); + auto result = std::make_shared(relu); + auto model = std::make_shared(ov::ResultVector{result}, ov::ParameterVector{param}); + + auto compiled_model1 = core1.compile_model(model, ov::test::utils::DEVICE_GPU); + auto compiled_model2 = core2.compile_model(model, ov::test::utils::DEVICE_GPU); + + auto request1 = compiled_model1.create_infer_request(); + auto request2 = compiled_model2.create_infer_request(); + + request2.set_input_tensor(request1.get_output_tensor()); + request2.set_output_tensor(request1.get_input_tensor()); + + OV_ASSERT_NO_THROW(request1.infer()); + OV_ASSERT_NO_THROW(request2.infer()); +} } // namespace From 939b35a96293bf9b02a4eb8732632c3700f46ce5 Mon Sep 17 00:00:00 2001 From: Dan Liu Date: Thu, 17 Oct 2024 22:24:18 -0700 Subject: [PATCH 060/112] [NPU]Change NPUBackend log (#27073) ### Details: [log.error of NPUbackend](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_npu/src/plugin/src/backends.cpp#L130) will confuse the user, thought NPU backend does not impact the compilation and just impact the inference stage. Now [in the inference stage in compiledmodel part](https://github.com/openvinotoolkit/openvino/pull/27073/files#diff-74bc81bb7b258118f04e81468e3ec3b05e65e714546d32246bae45eb892f6abcR125-R130), will get a log.error output when no npu device is checked. ### Tickets: - 153439 --- src/plugins/intel_npu/src/plugin/src/backends.cpp | 5 +++-- .../intel_npu/src/plugin/src/compiled_model.cpp | 11 ++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp index 1019cff3287995..9b090e4ec91529 100644 --- a/src/plugins/intel_npu/src/plugin/src/backends.cpp +++ b/src/plugins/intel_npu/src/plugin/src/backends.cpp @@ -111,7 +111,7 @@ NPUBackends::NPUBackends(const std::vector& backendRegistry, } catch (const std::exception& ex) { _logger.warning("Got an error during backend '%s' loading : %s", backendName.c_str(), ex.what()); } catch (...) { - _logger.error("Got an unknown error during backend '%s' loading", backendName.c_str()); + _logger.warning("Got an unknown error during backend '%s' loading", backendName.c_str()); } } @@ -127,7 +127,8 @@ NPUBackends::NPUBackends(const std::vector& backendRegistry, if (_backend != nullptr) { _logger.info("Use '%s' backend for inference", _backend->getName().c_str()); } else { - _logger.error("Cannot find backend for inference. Make sure the device is available."); + _logger.warning("None of the backends were initialized successfully." + "Only offline compilation can be done!"); } } diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 51ed0e2c5c4858..91aa19499d9de5 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -24,6 +24,9 @@ namespace { constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE = + "Can't create infer request due to create executor failed! Only exports can be made."; + +constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE_NODEVICE = "Can't create infer request!\n" "Please make sure that the device is available. Only exports can be made."; @@ -118,8 +121,14 @@ std::shared_ptr CompiledModel::create_infer_request() co if (_executorPtr == nullptr && _device != nullptr) { _executorPtr = _device->createExecutor(_networkPtr, _config); } + if (_executorPtr == nullptr) { - OPENVINO_THROW(NO_EXECUTOR_FOR_INFERENCE); + if (_device != nullptr) { + OPENVINO_THROW(NO_EXECUTOR_FOR_INFERENCE); + } else { + _logger.error("Can not find device!"); + OPENVINO_THROW(NO_EXECUTOR_FOR_INFERENCE_NODEVICE); + } } const std::shared_ptr& syncInferRequest = From a8293f3b0f94036d757050f6792a599a04be4f53 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 07:04:38 +0000 Subject: [PATCH 061/112] Bump actions/upload-artifact from 4.4.0 to 4.4.3 (#27113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.0 to 4.4.3.
Release notes

Sourced from actions/upload-artifact's releases.

v4.4.3

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.2...v4.4.3

v4.4.2

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.1...v4.4.2

v4.4.1

What's Changed

New Contributors

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.0...v4.4.1

Commits
  • b4b15b8 Merge pull request #632 from actions/joshmgross/undo-dependency-changes
  • 92b01eb Undo indirect dependency updates from #627
  • 8448086 Merge pull request #627 from actions/robherley/v4.4.2
  • b1d4642 add explicit relative and absolute symlinks to workflow
  • d50e660 bump version
  • aabe6f8 build with @​actions/artifact v2.1.11
  • 604373d Merge pull request #625 from actions/robherley/artifact-2.1.10
  • 0150148 paste right core version
  • a009b25 update licenses
  • 9f6f6f4 update @​actions/core and @​actions/artifact to latest versions
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4.4.0&new-version=4.4.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/android_arm64.yml | 2 +- .github/workflows/android_x64.yml | 2 +- .github/workflows/build_doc.yml | 6 +++--- .github/workflows/coverity.yml | 4 ++-- .../workflows/dev_cpu_linux_snippets_libxsmm.yml | 8 ++++---- .github/workflows/job_build_linux.yml | 16 ++++++++-------- .github/workflows/job_build_windows.yml | 8 ++++---- .github/workflows/job_cpu_functional_tests.yml | 2 +- .github/workflows/job_cxx_unit_tests.yml | 2 +- .github/workflows/job_gpu_tests.yml | 2 +- .github/workflows/job_jax_models_tests.yml | 2 +- .github/workflows/job_onnx_models_tests.yml | 2 +- .github/workflows/job_python_unit_tests.yml | 2 +- .github/workflows/job_pytorch_layer_tests.yml | 2 +- .github/workflows/job_pytorch_models_tests.yml | 2 +- .github/workflows/job_tensorflow_layer_tests.yml | 2 +- .../workflows/job_tensorflow_models_tests.yml | 2 +- .github/workflows/job_tokenizers.yml | 2 +- .../workflows/linux_conditional_compilation.yml | 8 ++++---- .github/workflows/linux_sanitizers.yml | 6 +++--- .github/workflows/mac.yml | 8 ++++---- .github/workflows/mac_arm64.yml | 8 ++++---- .github/workflows/py_checks.yml | 6 +++--- .github/workflows/ubuntu_22.yml | 4 ++-- .../windows_conditional_compilation.yml | 6 +++--- .github/workflows/windows_vs2019_release.yml | 4 ++-- 26 files changed, 59 insertions(+), 59 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index 6a67e8f6793ec9..15094a84ee8f5f 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -178,7 +178,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml index cab5239b4c45c0..cebaa9177b69b9 100644 --- a/.github/workflows/android_x64.yml +++ b/.github/workflows/android_x64.yml @@ -152,7 +152,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 53f3eba9a749bf..535be1e4e70457 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -77,13 +77,13 @@ jobs: echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - name: 'Upload sphinx.log' - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: sphinx_build_log_${{ env.PR_NUMBER }}.log path: build/docs/sphinx.log - name: 'Upload docs html' - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_docs_html_${{ env.PR_NUMBER }}.zip path: build/docs/openvino_docs_html.zip @@ -100,7 +100,7 @@ jobs: - name: 'Upload test results' if: failure() - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_docs_pytest path: build/docs/_artifacts/ diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 6a163fb5e50043..8a2338554faae3 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -144,7 +144,7 @@ jobs: run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-configure -c ${COVERITY_TOOL_DIR}/cov-analysis-linux64-2023.6.2/config/coverity_config.xml -lscc text - name: Upload Coverity build log - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: coverity_logs @@ -152,7 +152,7 @@ jobs: if-no-files-found: 'error' - name: Upload Coverity build archive - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: coverity_archive diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml index 83770900559bab..26e8400c22a04f 100644 --- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml +++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml @@ -169,7 +169,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -178,7 +178,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -186,7 +186,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -325,7 +325,7 @@ jobs: timeout-minutes: 25 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml index b8eea4375e7e58..86545b6e9e7a43 100644 --- a/.github/workflows/job_build_linux.yml +++ b/.github/workflows/job_build_linux.yml @@ -249,7 +249,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -258,7 +258,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -266,7 +266,7 @@ jobs: - name: Upload openvino wheels if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -274,7 +274,7 @@ jobs: - name: Upload openvino js package if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -282,7 +282,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -290,7 +290,7 @@ jobs: - name: Upload openvino RPM packages if: ${{ inputs.build-rpm-packages }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_rpm_packages path: ${{ env.BUILD_DIR }}/*.rpm @@ -298,7 +298,7 @@ jobs: - name: Upload openvino debian packages if: ${{ inputs.build-debian-packages }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -306,7 +306,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml index c8e249513a08f0..df2544d9d9e60a 100644 --- a/.github/workflows/job_build_windows.yml +++ b/.github/workflows/job_build_windows.yml @@ -218,21 +218,21 @@ jobs: # - name: Upload openvino package - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.zip if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.BUILD_DIR }}/wheels/*.whl if-no-files-found: 'error' - name: Upload openvino tests package - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -240,7 +240,7 @@ jobs: - name: Upload openvino js package if: ${{ fromJSON(inputs.affected-components).JS_API }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 6848871df6e81e..e197d581f290a4 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -116,7 +116,7 @@ jobs: key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 99c363d04d23a7..3f871151ccd282 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -257,7 +257,7 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 324e653c57ebab..b9862eac09cc05 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -128,7 +128,7 @@ jobs: - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: test-results-${{ inputs.test_type }}-${{ inputs.device }} diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 9956a27f234b36..ea2669071386dd 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -100,7 +100,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-jax-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 321aa88d614310..c879f0cb6a1efc 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -112,7 +112,7 @@ jobs: python3 -m pytest --backend="CPU" --model_zoo_dir="${MODELS_SHARE_PATH}" ${INSTALL_TEST_DIR}/onnx/tests/tests_python/test_zoo_models.py -v -n auto --forked -k 'not _cuda' --model_zoo_xfail - name: Upload logs from pytest - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: onnx_models_tests_logs diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index d63262c665d45c..1fafafd7623545 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -276,7 +276,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index 95074dc84f1ff9..abf614c70cff4e 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -147,7 +147,7 @@ jobs: PYTORCH_TRACING_MODE: TORCHFX - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python-pytorch-layers diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index a77c1318f3a0c8..74915f1d9b823f 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -171,7 +171,7 @@ jobs: df -h - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-torch-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index ae6e91a00d1497..977b2e4f96af73 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -150,7 +150,7 @@ jobs: TEST_PRECISION: FP16 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python-tf-layers diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index db34ec7b793551..0990eae3de6e7e 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -107,7 +107,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-tensorflow-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 238dbfec3a34eb..f7388eb98a2f3c 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -133,7 +133,7 @@ jobs: - name: Upload openvino tokenizers wheel if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tokenizers_wheel path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index 7b5467b01ad73e..42d7810b9f1663 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -223,7 +223,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: always() with: name: build_logs @@ -232,7 +232,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -240,7 +240,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.tar.gz @@ -248,7 +248,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index b23e67a0f2b30e..e1a71fe92dc1a3 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -188,7 +188,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -196,7 +196,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -465,7 +465,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-cpp_${{ matrix.SANITIZER }} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 6e3f344c6dd944..bddbaab134fa9c 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -205,14 +205,14 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -220,7 +220,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -228,7 +228,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 16658318de20d8..576eefde8c9b4a 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -206,14 +206,14 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - name: Upload openvino wheels - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl @@ -221,7 +221,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -229,7 +229,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index ae0625ce4a453c..2b0d3f2272787f 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -50,7 +50,7 @@ jobs: git diff > samples_diff.diff working-directory: samples/python - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: samples_diff @@ -68,7 +68,7 @@ jobs: git diff > pyopenvino_diff.diff working-directory: src/bindings/python/src/openvino - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: pyopenvino_diff @@ -86,7 +86,7 @@ jobs: git diff > wheel_diff.diff working-directory: src/bindings/python/wheel - - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() with: name: wheel_diff diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 2ebca2b059fdd2..5e5ac3c3482624 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -227,7 +227,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-${{ env.TEST_DEVICE }} path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -253,7 +253,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ matrix.TEST_TYPE == 'API' }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-TEMPLATE path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 9c026f01e47233..30b2ce2f20df38 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -249,7 +249,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip @@ -257,7 +257,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -402,7 +402,7 @@ jobs: timeout-minutes: 60 - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index 8cac2b88078d15..bce90165408815 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -391,7 +391,7 @@ jobs: run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-python @@ -502,7 +502,7 @@ jobs: key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} with: name: test-results-functional-cpu From e582f61a65ba430ae005f927695e3fe68b4f4f20 Mon Sep 17 00:00:00 2001 From: Luwei Zhou Date: Fri, 18 Oct 2024 15:05:22 +0800 Subject: [PATCH 062/112] [Transformations] Fix exception when converting precision on Read_Value node without inputs. (#26829) ### Details: - *Read value node without input source on FP16 precision would raise exception. The PR fix this.* ### Tickets: - *CVS-153067* --- .../src/transformations/convert_precision.cpp | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 54fb6a972b7387..6f5166dfd26760 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -29,7 +29,7 @@ bool fuse_type_to_parameter(const std::shared_ptr& node, bool convert_input_precision); // this function inserts Convert operations to 'data' input and outputs of `node` -// to execute 'node' with the original type. +// to execute 'node' with the original type. This function supports nodes with single output. bool wrap_into_original_type(const std::shared_ptr& node, const precisions_map& precisions); bool store_original_type_as_attribute(const std::shared_ptr& node, const precisions_map& precisions); @@ -622,17 +622,20 @@ bool wrap_into_original_type(const std::shared_ptr& node, const precis const auto& to = it->second; const auto& from = it->first; - - auto convert_before = std::make_shared(node->input_value(0), from); - node->input(0).replace_source_output(convert_before); - auto consumers = node->output(0).get_target_inputs(); - auto convert_after = std::make_shared(node, to); - for (auto& input : consumers) { - const auto consumer = input.get_node(); - if (ov::is_type(consumer) || ov::is_type(consumer)) { - continue; + if (node->get_input_size()) { + auto convert_before = std::make_shared(node->input_value(0), from); + node->input(0).replace_source_output(convert_before); + } + if (node->get_output_size() == 1) { + auto consumers = node->output(0).get_target_inputs(); + auto convert_after = std::make_shared(node, to); + for (auto& input : consumers) { + const auto consumer = input.get_node(); + if (ov::is_type(consumer) || ov::is_type(consumer)) { + continue; + } + input.replace_source_output(convert_after); } - input.replace_source_output(convert_after); } return true; From 5a65547fd398863d4bad41d66a2fa6838542734b Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 18 Oct 2024 16:05:29 +0200 Subject: [PATCH 063/112] [TESTS] Fix version for huggingface_hub (#27126) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- tests/requirements_pytorch | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index 0bda286eb83252..40e1f6f66f52e8 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -38,6 +38,9 @@ torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64" wheel==0.44.0 PyYAML==6.0.2 kornia==0.7.3 +super-image==0.1.7 +# huggingface-hub required for super-image +huggingface-hub==0.25.2 # use latest released version once it's available git+https://github.com/huggingface/optimum-intel.git@main; python_version < "3.12" From 62183ab7a695a6939bb82d7d9243ef6db77ef944 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Fri, 18 Oct 2024 14:24:37 +0200 Subject: [PATCH 064/112] [DOCS] Moved versioning to repo (#27128) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/sphinx_setup/assets/versions_raw.js | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/sphinx_setup/assets/versions_raw.js diff --git a/docs/sphinx_setup/assets/versions_raw.js b/docs/sphinx_setup/assets/versions_raw.js new file mode 100644 index 00000000000000..8045057450bf5f --- /dev/null +++ b/docs/sphinx_setup/assets/versions_raw.js @@ -0,0 +1 @@ +var data='[{"version": "2024"}, {"version": "2023.3"}, {"version": "2022.3"}, {"version": "nightly"}, {"version": "archives"}]'; \ No newline at end of file From 43df0b6fc1714779bad48736066e917a045de346 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Fri, 18 Oct 2024 05:51:24 -0700 Subject: [PATCH 065/112] TorchFX: GPTQ accuracy fix (#26294) ### Details: - Fix for the accuracy issues discovered in Llama2 GPTQ with aot_autograd ### Tickets: - [CVS-149032](https://jira.devtools.intel.com/browse/CVS-149032) --------- Co-authored-by: Maxim Vafin --- .../workflows/job_pytorch_models_tests.yml | 11 + .../torchfx_gptq_pattern_replacer.cpp | 188 +++++++++++++----- .../models/gptq-torchfx-models-precommit | 1 + .../test_gptq_torchfx_transformations.py | 102 ++++++++++ 4 files changed, 250 insertions(+), 52 deletions(-) create mode 100644 tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit create mode 100644 tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 74915f1d9b823f..8f3699f6ab42a2 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -160,6 +160,17 @@ jobs: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False + - name: TorchFX GPTQ Pattern Test + if: ${{ inputs.model_scope == 'precommit' }} + # install torch 2.3.1 as newer is not yet supported by openvino backend + run: | + export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH + python3 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cpu + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_gptq_torchfx_transformations.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_gptqpattern_tests.html --self-contained-html -v --tb=short + env: + TEST_DEVICE: CPU + USE_SYSTEM_CACHE: False + - name: Reformat unsupported ops file if: ${{ inputs.model_scope != 'precommit' && !cancelled()}} run: | diff --git a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp index a533739b16fea1..caeeb8c557b380 100644 --- a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp @@ -40,18 +40,6 @@ uint32_t read_u4_data(const void* array, size_t index) { return val; }; -void write_u4_data(void* array, size_t index, uint32_t data) { - auto arr_u32 = reinterpret_cast(array); - size_t idx_u32 = index / 8; - size_t offset_u32 = index % 8; - uint32_t old_val = arr_u32[idx_u32]; - data = data << (offset_u32 * 4); - uint32_t mask = 15; - mask = ~(mask << (offset_u32 * 4)); - uint32_t new_val = (old_val & mask) | data; - arr_u32[idx_u32] = new_val; -}; - GPTQDecompressionReplacer::GPTQDecompressionReplacer() { const auto& const_1 = wrap_type(); const auto& const_2 = wrap_type(); @@ -73,61 +61,157 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() { const auto& convert_2 = wrap_type({const_6}); const auto& bitwise_and = wrap_type({add_or_convert, convert_2}); - ov::matcher_pass_callback callback = [unsqueeze_1](Matcher& m) { + ov::matcher_pass_callback callback = [=](Matcher& m) { auto bitwise_and = m.get_match_root(); if (!bitwise_and) { return false; } const auto& pattern_map = m.get_pattern_value_map(); - const auto& input_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr(); - auto weights_u32 = std::dynamic_pointer_cast(input_node->get_input_node_shared_ptr(0)); - auto axis = std::dynamic_pointer_cast(input_node->get_input_node_shared_ptr(1)); - auto axis_data = axis->get_data_ptr(); - - auto u8_shape = weights_u32->get_shape(); - auto src = weights_u32->get_data_ptr(); - - ov::Shape u4_shape; - bool dim_added = false; - size_t stride = 1; - size_t size_y = 1; - for (size_t i = 0; i < u8_shape.size(); i++) { - if (axis_data[0] == i) { - u4_shape.push_back(8); - dim_added = true; - } - if (axis_data[0] <= i) { - stride *= u8_shape[i]; - } else { - size_y *= u8_shape[i]; - } - u4_shape.push_back(u8_shape[i]); + auto unsqueeze_1_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr(); + auto unsqueeze_1_in0_const = + std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(0)); + auto unsqueeze_1_in1_const = + std::dynamic_pointer_cast(unsqueeze_1_node->get_input_node_shared_ptr(1)); + auto abs_node = pattern_map.at(abs).get_node_shared_ptr(); + auto abs_in_const = std::dynamic_pointer_cast(abs_node->get_input_node_shared_ptr(0)); + auto broadcast_node = pattern_map.at(broadcast).get_node_shared_ptr(); + auto unsqueeze_2_node = pattern_map.at(unsqueeze_2).get_node_shared_ptr(); + auto unsqueeze_2_in0_const = + std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(0)); + auto unsqueeze_2_in1_const = + std::dynamic_pointer_cast(unsqueeze_2_node->get_input_node_shared_ptr(1)); + + OutputVector outputs_1(unsqueeze_1_node->get_output_size()); + OutputVector unsqueeze_1_inputs(2); + unsqueeze_1_inputs[0] = unsqueeze_1_in0_const->outputs()[0]; + unsqueeze_1_inputs[1] = unsqueeze_1_in1_const->outputs()[0]; + if (!unsqueeze_1_node->constant_fold(outputs_1, unsqueeze_1_inputs)) { + return false; } - if (!dim_added) { - u4_shape.push_back(8); + + OutputVector outputs_2(abs_node->get_output_size()); + if (!abs_node->constant_fold(outputs_2, abs_in_const->outputs())) { + return false; } - auto new_const = std::make_shared(element::u4, u4_shape); - auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + OutputVector outputs_3(broadcast_node->get_output_size()); + OutputVector broadcast_inputs(2); + broadcast_inputs[0] = outputs_1[0]; + broadcast_inputs[1] = outputs_2[0]; + if (!broadcast_node->constant_fold(outputs_3, broadcast_inputs)) { + return false; + } + + OutputVector outputs_4(unsqueeze_2_node->get_output_size()); + OutputVector unsqueeze_2_inputs(2); + unsqueeze_2_inputs[0] = unsqueeze_2_in0_const->outputs()[0]; + unsqueeze_2_inputs[1] = unsqueeze_2_in1_const->outputs()[0]; + if (!unsqueeze_2_node->constant_fold(outputs_4, unsqueeze_2_inputs)) { + return false; + } + const int32_t* rs_in0 = + std::dynamic_pointer_cast(outputs_3[0].get_node_shared_ptr())->get_data_ptr(); + const int32_t* rs_in1 = + std::dynamic_pointer_cast(outputs_4[0].get_node_shared_ptr())->get_data_ptr(); + auto shifted_const = std::make_shared(element::i32, outputs_3[0].get_shape()); + auto dst = const_cast(reinterpret_cast(shifted_const->get_data_ptr())); if (!dst) return false; - size_t in_idx = 0; - for (size_t y = 0; y < size_y; y++) { - size_t offset = y * stride * 8; - for (size_t x = 0; x < stride; x++) { - for (size_t z = 0; z < 8; z++) { - uint32_t val = read_u4_data(src, in_idx); - write_u4_data(dst, (offset + x + stride * z), val); - in_idx++; - } + // TODO: Bitwise right shift operation below might need to be + // optimized to reduce FIL. + size_t rs_in0_shape_size = shape_size(outputs_3[0].get_shape()); + const auto& rs_in0_shape = outputs_3[0].get_shape(); + const auto& rs_in1_shape = outputs_4[0].get_shape(); + int shift_dim = -1; + size_t shift_offset = 1; + for (size_t i = 0; i < rs_in1_shape.size(); ++i) { + size_t dim = rs_in1_shape[i]; + if (dim != 1 && dim != rs_in0_shape[i]) { + return false; + } + if (shift_dim != -1) { + shift_offset *= rs_in0_shape[i]; + } + if (dim == rs_in0_shape[i]) { + shift_dim = static_cast(i); + } + } + if (shift_dim == -1) + return false; + for (size_t k = 0; k < rs_in0_shape_size; ++k) { + size_t shift_idx = (k / shift_offset) % rs_in1_shape[shift_dim]; + int32_t shift_val = rs_in1[shift_idx]; + dst[k] = (rs_in0[k] >> shift_val); + } + + std::shared_ptr convert_1_node = nullptr; + OutputVector outputs_7; + if (pattern_map.find(convert_1) != pattern_map.end()) { + convert_1_node = pattern_map.at(convert_1).get_node_shared_ptr(); + outputs_7.resize(convert_1_node->get_output_size()); + if (!convert_1_node->constant_fold(outputs_7, shifted_const->outputs())) { + return false; + } + } else { + auto convert_3_node = pattern_map.at(convert_3).get_node_shared_ptr(); + auto convert_4_node = pattern_map.at(convert_4).get_node_shared_ptr(); + auto convert_4_in_const = + std::dynamic_pointer_cast(convert_4_node->get_input_node_shared_ptr(0)); + auto add_node = pattern_map.at(add).get_node_shared_ptr(); + OutputVector outputs_5(convert_3_node->get_output_size()); + if (!convert_3_node->constant_fold(outputs_5, shifted_const->outputs())) { + return false; + } + OutputVector outputs_6(convert_4_node->get_output_size()); + if (!convert_4_node->constant_fold(outputs_6, convert_4_in_const->outputs())) { + return false; + } + outputs_7.resize(add_node->get_output_size()); + OutputVector add_inputs(2); + add_inputs[0] = outputs_5[0]; + add_inputs[1] = outputs_6[0]; + if (!add_node->constant_fold(outputs_7, add_inputs)) { + return false; } } - copy_runtime_info_and_name(weights_u32, {new_const}, {weights_u32, bitwise_and}); + auto convert_2_node = pattern_map.at(convert_2).get_node_shared_ptr(); + auto convert_2_in_const = std::dynamic_pointer_cast(convert_2_node->get_input_node_shared_ptr(0)); + + OutputVector outputs_8(convert_2_node->get_output_size()); + if (!convert_2_node->constant_fold(outputs_8, convert_2_in_const->outputs())) { + return false; + } + + OutputVector outputs_9(bitwise_and->get_output_size()); + + const int8_t* and_in0 = + std::dynamic_pointer_cast(outputs_7[0].get_node_shared_ptr())->get_data_ptr(); + const int8_t* and_in1 = + std::dynamic_pointer_cast(outputs_8[0].get_node_shared_ptr())->get_data_ptr(); + auto masked_const = std::make_shared(element::i8, outputs_7[0].get_shape()); + auto masked_dst = const_cast(reinterpret_cast(masked_const->get_data_ptr())); + if (!masked_dst) + return false; + + size_t and_in0_shape_size = shape_size(outputs_7[0].get_shape()); + // TODO: Bitwise and operation below might need to be + // optimized to reduce FIL. + int8_t mask = and_in1[0]; + for (size_t k = 0; k < and_in0_shape_size; ++k) { + masked_dst[k] = (and_in0[k] & mask); + } + + auto convert_to_u4 = std::make_shared(masked_const, element::u4); + OutputVector outputs_10(convert_to_u4->get_output_size()); + if (!convert_to_u4->constant_fold(outputs_10, masked_const->outputs())) { + return false; + } - auto new_convert = std::make_shared(new_const, bitwise_and->get_output_element_type(0)); - copy_runtime_info_and_name(bitwise_and, {new_convert}, {input_node}); + auto new_convert = + std::make_shared(outputs_10[0].get_node_shared_ptr(), bitwise_and->get_output_element_type(0)); + copy_runtime_info_and_name(bitwise_and, {new_convert}, {unsqueeze_1_node}); replace_node(bitwise_and, new_convert); return true; }; diff --git a/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit b/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit new file mode 100644 index 00000000000000..b796dd2bf13b5a --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit @@ -0,0 +1 @@ +atorsvn/TinyLlama-1.1B-Chat-v0.3-gptq-4bit,https://huggingface.co/atorsvn/TinyLlama-1.1B-Chat-v0.3-gptq-4bit diff --git a/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py b/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py new file mode 100644 index 00000000000000..dc57c02285e448 --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py @@ -0,0 +1,102 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, pipeline +import torch +import hashlib +from openvino.frontend.pytorch.torchdynamo.execute import compiled_cache +import models_hub_common.utils as utils +import pytest +import os + +def patch_gptq(config): + do_gptq_patching = False + config_dict = config.to_dict() + quantization_config = config_dict.get("quantization_config", None) + do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq" + orig_cuda_check = torch.cuda.is_available + orig_post_init_model = None + if do_gptq_patching: + torch.set_default_dtype(torch.float32) + torch.cuda.is_available = lambda: False + + from optimum.gptq import GPTQQuantizer + + orig_post_init_model = GPTQQuantizer.post_init_model + + def post_init_model(self, model): + from auto_gptq import exllama_set_max_input_length + + class StoreAttr(object): + pass + + model.quantize_config = StoreAttr() + model.quantize_config.desc_act = self.desc_act + if self.desc_act and not self.disable_exllama and self.max_input_length is not None: + model = exllama_set_max_input_length(model, self.max_input_length) + return model + + GPTQQuantizer.post_init_model = post_init_model + return orig_cuda_check, orig_post_init_model + +def run_gptq_torchfx(tmp_path, model_id, model_link, prompt_result_pair): + config = AutoConfig.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32) + cuda, post_init = patch_gptq(config) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32) + model = AutoModelForCausalLM.from_pretrained( + model_id, + trust_remote_code=True, + config=config, + device_map='cpu', + torch_dtype=torch.float32 + ) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=4, + do_sample=True, + temperature=0.01, + top_p=0.01, + top_k=1, + repetition_penalty=1.1, + num_beams=1, + ) + + prompt = prompt_result_pair["prompt"] + expected_md5 = prompt_result_pair["result_md5"] + + model.model.forward = torch.compile(model.model.forward, backend="openvino", dynamic=True, fullgraph=True, options={'aot_autograd': True}) + + result_ov = pipe(prompt) + md5_ov = hashlib.new("md5", result_ov[0]['generated_text'].encode(), usedforsecurity=False).hexdigest() + + u4_ops = ["FullyConnected",] + num_u4_ops = 0 + num_u4_ops_supported = 0 + for pid in compiled_cache: + for op in compiled_cache[pid].get_runtime_model().get_ordered_ops(): + if (str(op.get_rt_info()["layerType"].get()) in u4_ops): + u4_exec = (str(op.get_rt_info()["runtimePrecision"].get()) == "u4") + if u4_exec: + num_u4_ops_supported += 1 + num_u4_ops += 1 + + assert(expected_md5 == md5_ov), "Output does not match with the expected output" + assert((num_u4_ops > 0) and (num_u4_ops == num_u4_ops_supported)), "Runtime precision is not u4" + +@pytest.mark.precommit +@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "gptq-torchfx-models-precommit"))) +@pytest.mark.parametrize('prompt_result_pair', ([ + {"prompt" : "Tell me about AI", "result_md5" : "4385ccbce14627ae91f846b4c8a3f145"}, +])) +def test_gptq_torchfx_precommit(tmp_path, model_name, model_link, mark, reason, prompt_result_pair, ie_device): + assert mark is None or mark == 'skip' or mark == 'xfail', \ + "Incorrect test case: {}, {}".format(model_name, model_link) + if mark == 'skip': + pytest.skip(reason) + elif mark == 'xfail': + pytest.xfail(reason) + run_gptq_torchfx(tmp_path, model_name, model_link, prompt_result_pair) + From 56fe26f6fb3c0617d39fd96f666231088cb8f5dc Mon Sep 17 00:00:00 2001 From: Maksim Doronin Date: Fri, 18 Oct 2024 14:29:02 +0100 Subject: [PATCH 066/112] Introduce protopipe (#27087) ### Details: - Publishing protopipe to open-source ### Tickets: - E-143100 --- .gitmodules | 3 + scripts/CMakeLists.txt | 1 + src/plugins/intel_npu/cmake/features.cmake | 2 + .../intel_npu/thirdparty/CMakeLists.txt | 12 + src/plugins/intel_npu/thirdparty/yaml-cpp | 1 + src/plugins/intel_npu/tools/CMakeLists.txt | 4 + .../intel_npu/tools/protopipe/CMakeLists.txt | 72 ++ .../intel_npu/tools/protopipe/README.md | 608 ++++++++++++ .../tools/protopipe/cmake/standalone.cmake | 63 ++ .../intel_npu/tools/protopipe/main.cpp | 266 ++++++ .../intel_npu/tools/protopipe/src/graph.cpp | 140 +++ .../intel_npu/tools/protopipe/src/graph.hpp | 168 ++++ .../tools/protopipe/src/parser/config.cpp | 872 ++++++++++++++++++ .../tools/protopipe/src/parser/config.hpp | 12 + .../tools/protopipe/src/parser/parser.cpp | 20 + .../tools/protopipe/src/parser/parser.hpp | 61 ++ .../intel_npu/tools/protopipe/src/result.cpp | 22 + .../intel_npu/tools/protopipe/src/result.hpp | 30 + .../src/scenario/accuracy_metrics.cpp | 121 +++ .../src/scenario/accuracy_metrics.hpp | 52 ++ .../protopipe/src/scenario/criterion.cpp | 72 ++ .../protopipe/src/scenario/criterion.hpp | 58 ++ .../protopipe/src/scenario/inference.cpp | 17 + .../protopipe/src/scenario/inference.hpp | 111 +++ .../protopipe/src/scenario/scenario_graph.cpp | 40 + .../protopipe/src/scenario/scenario_graph.hpp | 102 ++ .../protopipe/src/simulation/computation.cpp | 42 + .../protopipe/src/simulation/computation.hpp | 36 + .../src/simulation/computation_builder.cpp | 462 ++++++++++ .../src/simulation/computation_builder.hpp | 74 ++ .../protopipe/src/simulation/dummy_source.cpp | 89 ++ .../protopipe/src/simulation/dummy_source.hpp | 37 + .../protopipe/src/simulation/executor.cpp | 66 ++ .../protopipe/src/simulation/executor.hpp | 42 + .../protopipe/src/simulation/layers_data.cpp | 155 ++++ .../protopipe/src/simulation/layers_data.hpp | 57 ++ .../src/simulation/layers_reader.cpp | 46 + .../src/simulation/layers_reader.hpp | 27 + .../protopipe/src/simulation/operations.cpp | 131 +++ .../protopipe/src/simulation/operations.hpp | 77 ++ .../src/simulation/ov_layers_reader.cpp | 215 +++++ .../src/simulation/performance_mode.cpp | 337 +++++++ .../src/simulation/performance_mode.hpp | 41 + .../src/simulation/reference_mode.cpp | 361 ++++++++ .../src/simulation/reference_mode.hpp | 35 + .../protopipe/src/simulation/simulation.cpp | 131 +++ .../protopipe/src/simulation/simulation.hpp | 57 ++ .../src/simulation/validation_mode.cpp | 363 ++++++++ .../src/simulation/validation_mode.hpp | 34 + .../protopipe/src/utils/data_providers.cpp | 64 ++ .../protopipe/src/utils/data_providers.hpp | 70 ++ .../tools/protopipe/src/utils/error.hpp | 39 + .../tools/protopipe/src/utils/logger.cpp | 32 + .../tools/protopipe/src/utils/logger.hpp | 29 + .../tools/protopipe/src/utils/timer.cpp | 73 ++ .../tools/protopipe/src/utils/timer.hpp | 25 + .../tools/protopipe/src/utils/utils.cpp | 84 ++ .../tools/protopipe/src/utils/utils.hpp | 65 ++ .../tools/single-image-test/CMakeLists.txt | 2 +- 59 files changed, 6327 insertions(+), 1 deletion(-) create mode 160000 src/plugins/intel_npu/thirdparty/yaml-cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/CMakeLists.txt create mode 100644 src/plugins/intel_npu/tools/protopipe/README.md create mode 100644 src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake create mode 100644 src/plugins/intel_npu/tools/protopipe/main.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/graph.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/graph.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/result.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/result.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp create mode 100644 src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp diff --git a/.gitmodules b/.gitmodules index a9cad1dee5f494..5feb7458da1801 100644 --- a/.gitmodules +++ b/.gitmodules @@ -78,6 +78,9 @@ [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"] path = src/plugins/intel_npu/thirdparty/level-zero-ext url = https://github.com/intel/level-zero-npu-extensions.git +[submodule "src/plugins/intel_npu/thirdparty/yaml-cpp"] + path = src/plugins/intel_npu/thirdparty/yaml-cpp + url = https://github.com/jbeder/yaml-cpp.git [submodule "thirdparty/telemetry"] path = thirdparty/telemetry url = https://github.com/openvinotoolkit/telemetry.git diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 73cdd57e508bdb..69ad9f460e357a 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -12,6 +12,7 @@ set(shellcheck_skip_list "${OpenVINO_SOURCE_DIR}/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_cpu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_gpu/thirdparty" + "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11" "${TEMP}") diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 07efefd4452403..8a9dce04f071b9 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -20,3 +20,5 @@ if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMP endif() ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF) + +ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) diff --git a/src/plugins/intel_npu/thirdparty/CMakeLists.txt b/src/plugins/intel_npu/thirdparty/CMakeLists.txt index 4d0c66beeb7520..b064b5c7b9acd5 100644 --- a/src/plugins/intel_npu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_npu/thirdparty/CMakeLists.txt @@ -12,3 +12,15 @@ if(ENABLE_ZEROAPI_BACKEND) add_library(LevelZero::NPUExt ALIAS level-zero-ext) install(TARGETS level-zero-ext EXPORT "${PROJECT_NAME}Targets") endif() + +# +# yaml-cpp +# + +if(ENABLE_INTEL_NPU_PROTOPIPE) + add_subdirectory(yaml-cpp EXCLUDE_FROM_ALL) + # NB: Suppress warnings in yaml-cpp + if(SUGGEST_OVERRIDE_SUPPORTED) + target_compile_options(yaml-cpp PRIVATE -Wno-suggest-override) + endif() +endif() diff --git a/src/plugins/intel_npu/thirdparty/yaml-cpp b/src/plugins/intel_npu/thirdparty/yaml-cpp new file mode 160000 index 00000000000000..da82fd982c260e --- /dev/null +++ b/src/plugins/intel_npu/thirdparty/yaml-cpp @@ -0,0 +1 @@ +Subproject commit da82fd982c260e7f335ce5acbceff24b270544d1 diff --git a/src/plugins/intel_npu/tools/CMakeLists.txt b/src/plugins/intel_npu/tools/CMakeLists.txt index c0e620981952e1..ac1a51f74519c8 100644 --- a/src/plugins/intel_npu/tools/CMakeLists.txt +++ b/src/plugins/intel_npu/tools/CMakeLists.txt @@ -6,3 +6,7 @@ add_subdirectory(common) add_subdirectory(compile_tool) add_subdirectory(single-image-test) + +if (ENABLE_INTEL_NPU_PROTOPIPE) + add_subdirectory(protopipe) +endif() diff --git a/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt new file mode 100644 index 00000000000000..9ba76d89ca8445 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt @@ -0,0 +1,72 @@ +# +# Copyright (C) 2023-2024 Intel Corporation. +# SPDX-License-Identifier: Apache 2.0 +# + +set(TARGET_NAME protopipe) + +if (NOT DEFINED PROJECT_NAME) + cmake_minimum_required(VERSION 3.13 FATAL_ERROR) + project(protopipe_standalone) + include("cmake/standalone.cmake") + return() +endif() + +# +# Dependencies +# + +find_package(OpenCV QUIET COMPONENTS gapi) +if(OpenCV_VERSION VERSION_LESS 4.9) + message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: gapi from OpenCV >= 4.9.") + return() +endif() + +if (WIN32) + # WA: add_tool_target expects to have all dependencies as cmake targets. + add_library(winmm INTERFACE) + target_link_libraries(winmm INTERFACE "winmm.lib") +endif() + +# +# Define the target +# + +set(PROTOPIPE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +ov_add_target(ADD_CPPLINT + TYPE EXECUTABLE + NAME ${TARGET_NAME} + ROOT ${CMAKE_CURRENT_SOURCE_DIR} + ADDITIONAL_SOURCE_DIRS ${PROTOPIPE_SOURCE_DIR} + INCLUDES ${PROTOPIPE_SOURCE_DIR} + LINK_LIBRARIES + PRIVATE + Threads::Threads + gflags + yaml-cpp + openvino::runtime + opencv_gapi + winmm) + + + +set_target_properties(${TARGET_NAME} PROPERTIES + FOLDER ${CMAKE_CURRENT_SOURCE_DIR} + CXX_STANDARD 17) + +# +# Install +# + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION "tools/${TARGET_NAME}" + COMPONENT ${NPU_INTERNAL_COMPONENT} + ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL}) + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md" + DESTINATION "tools/${TARGET_NAME}" + COMPONENT ${NPU_INTERNAL_COMPONENT} + ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL}) +endif() diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md new file mode 100644 index 00000000000000..afe6e8cffbc8c3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/README.md @@ -0,0 +1,608 @@ +# Protopipe +Protopipe is the C++ tool for simulating performance and validating accuracy of the various AI scenarios. + +Protopipe is built atop of [OpenCV G-API](https://github.com/opencv/opencv/wiki/Graph-API) and supports running inference through the [OpenVINO](https://github.com/openvinotoolkit/openvino) and [ONNXRuntime](https://github.com/microsoft/onnxruntime) frameworks. + +## Table of Contents +* [Quick start](#quick-start) +* [How to configure](#how-to-configure) + * [Global parameters](#global-parameters) + * [Model parameters](#model-parameters) + * [Graph structure](#graph-structure) + * [Dependency Graph](#dependency-graph) + * [Network sequence](#network-sequence) + * [Scenario parameters](#scenario-parameters) + * [Config example](#config-example) +* [How to run](#how-to-run) +* [Use cases](#use-cases) + * [Measure Performance](#measure-performance) + * [Generate Reference](#generate-reference) + * [Validate Accuracy](#validate-accuracy) +* [How to build](#how-to-build) + +## Quick start +Consider the following [Config example](#config-example) to start using Protopipe. + +Learn more about available config parameters (see: [How to configure](#how-to-configure)) and explore different execution modes (see: [Use-cases](#use-cases)) for more advanced usage. + +## How to configure +Protopipe uses **YAML** format file to describe the AI scenario structure and its parameters + +### Global parameters +The **YAML** config starts with specifying the several global parameters: +- `model_dir` - **Optional**. Path to the models location. (**Default**: ".") +- `blob_dir` - **Optional**. Path to the models location. (**Default**: ".") +- `device_name` - **Optional**. OpenVINO device name: _CPU_, _GPU_, etc. (**Default**: _NPU_) +- `compiler_type` - **Optional**. NPU compiler type: _DRIVER_, _MLIR_. (**Default**: _DRIVER_) +- `log_level` - **Optional**. Log level: _NONE_, _INFO_, _DEBUG_. (**Default**: _NONE_) +- `disable_high_resolution_waitable_timer` - **Optional**. Disables high resolution timer used to perform delays on Windows. (**Default**: false) + +Example: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO +``` +### Model parameters +#### Common parameters +- `name` or `path` - **Required**. Path to the model file. +- `framework` - **Optional**. Framework to use for inference: *onnxrt*, *openvino*. (**Default**: *openvino*) +- `input_data`, `output_data`, `metric`, `random` - **Optional**. Follow [Use-cases](#use-cases) to learn the details. +#### OpenVINO parameters +- `priority` - **Optional**. Model priority: _HIGH_, _MEDIUM_, _LOW_. (Default: _MEDIUM_) +- `config` - **Optional**. OpenVINO Plugin specific parameters. +- `device` - **Optional**. OpenVINO device name. +- `ip` - **Optional**. Input layer precision: _FP16_, _FP32_, _U8_, _I32_. +- `op` - **Optional**. Output layer precision: _FP16_, _FP32_, _U8_, _I32_. +- `il` - **Optional**. Input layer layout. +- `ol` - **Optional**. Output layer layout. +- `iml` - **Optional**. Input model layout. +- `oml` - **Optional**. Output model layout. + +Examples: +``` +- { name: model.xml, ip: FP16, iml: NHWC, il: NCHW } +- { name: model.xml, ip: { data: FP16 }, priority: HIGH } +- { name: model.xml, device: NPU, config: { PERFORMANCE_HINT: THROUGHPUT } } +``` +#### ONNXRT parameters +- `ep` - **Optional**. Specifies the parameters for particular execution provider. +- `session_options` - **Optional**. Set various session options for the ONNX Runtime. + +##### Supported Execution Providers +- [OpenVINO Execution Provider](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html) + - `name: OV` - **Required**. Enables OpenVINO Execution Provider. + - `device_type` - **Optional**.The device type: _NPU_U8_, _CPU_FP32_, etc. + - `params` - **Optional**. Accepts a map of options and their corresponding values that can be passed to OV EP. + +**Note**: If none of the supported execution providers are specified, the default `MLAS` will be used. + +Examples: +``` +- { name: model.onnx, framework: onnxrt } # Default (MLAS) EP will be used +- { name: model.onnx, framework: onnxrt, session_options: { session.disable_cpu_ep_fallback: 1 } } # Default (MLAS) EP with the sessions options will be used +- { name: model.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8, params: { enable_qdq_optimizer: False, model_priority: LOW } } } # OpenVINO EP will be used +``` + +### Graph structure +There are two ways to describe the execution graph structure in Protopipe: +1. Using [Dependency Graph](#dependency-graph) (preferable) +2. Using [Network Sequence](#network-sequence) (old) + +#### Dependency Graph +The dependency graph in Protopipe is specified by: +- `op_desc` - The list of operations, every operation has the following parameters: + - `tag` - **Required**. The unique name of operation. + - `type` - **Optional**. The operation type: _Infer_, _CPU_, _Compound_ (**Default**: _Infer_) + - `repeat_count` - **Optional**. Runs operation over specified number of iterations. +- `connections` - The list of connections between operations. + +Supported operation types +1. `Infer` - Performs model inference. Follow [Model parameters](#model-parameters) for the details. +2. `CPU` - Simulates CPU load by performing the busy wait during `time_in_us` amount of time in microseconds +3. `Compound` - Defines a subgraphs that consists of `Infer` and `CPU` node types + +``` +op_desc: + - { tag: A, path: Model-A.xml, ip: FP16, op: FP16 } + - { tag: B, path: Model-B.onnx, framework: onnxrt, ep: { name: OV, device_type: CPU_FP32 } } + - { tag: C, type: CPU, time_in_us: 5000 } + - { tag: D, path: Model-D.onnx, framework: onnxrt } + - { tag: E, path: Model-E.xml, il: NCHW, device: NPU, config: { PEFORMANCE_HINT: LATENCY } } + - { tag: F, path: Model-F.xml } +connections: + - [A, C, E, F] + - [A, B, D, F] + - [B, F] +``` +```mermaid + graph LR; + A-->B + A-->C + B-->D + B-->F + C-->E + E-->F + D-->F +``` + +The source **is not** reflected in graph structure, assume that all operations that don't have input connections are implicitly linked with the source, e.g for the graph above: +```mermaid + graph LR; + Source-->A + A-->B + A-->C + B-->D + B-->F + C-->E + E-->F + D-->F +``` +**Note:** The situation when all nodes don't have input connections is also possible, consider: +``` +op_desc: + - { tag: A, path: Model-A.xml } + - { tag: B, path: Model-B.xml } + - { tag: C, path: Model-C.xml } +``` + +```mermaid + graph LR; + Source-->A + Source-->B + Source-->C +``` +In this case the section `connections` **can be omitted**. + +**Note:** Graph must remain `DAG`, so any loops in graph are prohibited including the self-loops as well as double edges. These are examples of incorrect graphs: +``` +#1: Invalid - The list must contain at least two operations to connect +- [A] +#2: Invalid - Self-loop is prohibited +- [A, A] +#3: Invalid - Loop is prohibited +- [A, B, C, A] +#4: Invalid - Double edge [B->C] is prohibited +- [A, B, C] +- [B, C] +``` +**Example of repeat_count usage** +``` +- op_desc: + - { tag: A, path: Model_A.xml, ... } + - { tag: B, path: Model_B.xml, repeat_count: 20 } + - { tag: C, path: Model_C.xml, ... } + connections: + - [A, B, C] +``` +This defines the following pipeline: +```mermaid +graph LR; + A-->B + B-->C + B--->|20 iterations|B + +``` +**Example of "Compound" type operation**. +``` +op_desc: + - { tag: A, path: Model-A.xml } + - tag: B, + type: Compound, + repeat_count: 10, + op_desc: + - { tag: D, path: Model-D.xml } + - { tag: E, path: Model-E.xml } + - { tag: F, path: Model-F.xml } + connections: + - [D, E] + - [D, F] + - { tag: C, path: Model-C.xml } +connections: + - [A, B, C] +``` +This defines the following pipeline: +```mermaid +graph LR; + A[Model-A.xml] + C[Model-C.xml] + + subgraph B[Repeats 10 iterations] + direction LR + D[Model-D.xml] + E[Model-E.xml] + F[Model-F.xml] + + D --> E + D --> F + + end + + A --> B + B --> C +``` + +#### Network Sequence +There is also a way to describe the graph by using chain-like structure: +`network` - **Required**. List or list of lists of model parameters. Follow [Model Parameters](#model-parameters) for the details. +`delay_in_us` - **Optional**. Delay between models in microseconds. + +``` +input_stream_list: +- network: + - { name: A.xml, ip: FP16, il: NCHW, device: CPU } + - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }] + - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } } + delay_in_us: 5000 +``` + +```mermaid + graph LR; + A-->Delay1; + Delay1-->B; + Delay1-->C; + B-->Delay2; + C-->Delay2; + Delay2-->D +``` + +### Scenario parameters +The list of scenarios are specified by using `multi_inference` parameter, every scenario has the following parameters: +- `name` - **Optional**. The name of execution scenario. +- `input_stream_list` - **Required**. The list of the streams that will be run in parallel. + +Every stream has the following execution parameters: +- `name` - **Optional**. The name of the stream. +- `iteration_count` - **Optional**. Number of iterations to execute. +- `exec_time_in_secs` - **Optional**. Execute until timeout specified. +- `frames_interval_in_ms` - **Optional**. Execution frequency of the stream (**Default**: 0 - Unbounded) +- `target_fps` - **Optional**. Execution frequency of the stream. `target_fps = 1000 / frames_interval_in_ms`. `target_fps` and `frames_interval_in_ms` are mutually exclusive and cannot be provided together. +- `target_latency_in_ms` - **Optional**. When iteration isn't finished within specified interval, the next frame will be dropped from execution. (**Default**: Disabled) +- `op_desc`/`conections` or `network` - **Required**. Execution graph structure. Follow [Graph structure](#graph-structure) for the details. + +### Config example +Consider the following scenario that consists of two parallel streams specified on `config.yaml`: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO + +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, il: NCHW, device: CPU } + - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }] + - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } } + target_fps: 30 + exec_time_in_secs: 15 + - op_desc: + - { tag: E, path: E.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8 } } + - { tag: F, type: CPU, time_in_us: 5000 } + - { tag: G, path: G.xml, ip: FP16, op: FP16, priority: HIGH } + connections: + - [E, F, G] + target_fps: 100 + exec_time_in_secs: 15 +``` +- The first `stream` is defined by using [Network sequence](#network-sequence) syntax and will execute the following graph with `30` FPS cadence: + ```mermaid + graph LR; + A-->B; + A-->C; + B-->D; + C-->D; + ``` +- The second `stream` is defined by using [Dependency graph](#dependency-graph) syntax and will execute the following graph with `100` FPS cadence. + ```mermaid + graph LR; + E-->F; + F-->G; + ``` + +Run: +``` +./protopipe -cfg config.yaml --drop_frames +``` +Both streams will be executed simultaneously in different threads during `15` seconds. + +Output format: +``` +stream 0: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: / +stream 1: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: / +``` + +## How to run +Protopipe has the following `CLI` options to configure the execution behaviour: + +`--cfg ` - Path to configuration file. +`--drop_frames`- **Optional**. Drop frames if they come earlier than stream is completed. E.g if `stream` works with `target_fps: 10` (~`100ms` latency) but stream iteration takes `150ms` - the next iteration will be triggered only in `50ms` if option is enabled. +`--pipeline` - **Optional**. Enables pipelined execution for all scenarios/streams. +`--niter ` - **Optional**. Number of iterations. If specified overwrites termination criterion specified in configuration file for all scenarios/streams. +`-t ` - **Optional**. Time in seconds. If specified overwrites termination criterion specified in configuration file for all scenarios/streams. +`--mode ` - **Optional**. Execution mode: *performance*, *reference*, *validation* (**Default**: *performance*) +`--exec_filter ` - **Optional**. Run only the scenarios that match provided string pattern. +`--inference_only` - **Optional**. Run only inference execution for every model excluding i/o data transfer (**Default**: true) + +### Filtering +Sometime it's needed to run particular set of scenarios specified in config file rather than all of them. +For example consider the following config file with three scenarios specified in `scenarios.yaml`: +``` +model_dir: + local: /models/ +device_name: CPU +multi_inference: +- input_stream_list: + - network: + - { name: A.xml } +- input_stream_list: + - network: + - { name: B.xml } +- input_stream_list: + - network: + - { name: C.xml } +``` +By default all scenarios are assigned unique names according to the following `multi_inference_` pattern. +E.g scenario with model `A.xml` has default name `multi_inference_0`. +Use `-exec_filter ` CLI option to control what scenarios from config should be executed: +``` +./protopipe -cfg scenarios.yaml -niter 100 -exec_filter=".*[0-1]" +``` +Only `multi_inference_0` and `multi_inference_1` scenarios will be executed. + +It's also possible to overwrite the default names in config file: +``` +model_dir: + local: /models/ +device_name: CPU +multi_inference: +- name: Model-A-Scenario + input_stream_list: + - network: + - { name: A.xml } +- name: Model-B-Scenario + input_stream_list: + - network: + - { name: B.xml } +- name: Model-C-Scenario + input_stream_list: + - network: + - { name: C.xml } +``` +and use them for filtering: +``` +./protopipe --cfg scenarios.yaml --niter 100 --exec_filter ".*-[AB].*" +``` +Only `Model-A-Scenario` and `Model-B-Scenario` scenarios will be executed. + +**Note**: Protopipe uses [std::regex](https://en.cppreference.com/w/cpp/regex) rules for pattern matching. + +## Use cases +Once scenario configuration is defined (see: [How to configure](#how-to-configure)) it can be used for various uses cases. +### Measure performance +`Protopipe` can report the performance statistics, consider the following run example: +``` +./protopipe --cfg config.yaml --drop_frames -t 30 +``` +Example of output: +``` +stream 0: throughput: 7.62659 FPS, latency: min: 93.804 ms, avg: 111.31 ms, max: 145.178 ms, frames dropped: 290/390 +``` +It might be also interesting to play with the following `CLI` options: +- `--drop_frames=false` - Disables frame drop. By default, if iteration doesn't fit into 1000 / `target_fps` latency interval, the next iteration will be skipped. +- `--inference_only=false` - Enables i/o data transfer for inference. By default only inference time is captured in performance statistics. +- `--pipeline` - Enables ***pipelined*** execution. + +### Generate reference +As the prerequisite for accuracy validation it's useful to have a mechanism that provides an opportunity to generate the reference output data to compare with. In Protopipe in can be done by using the `reference` mode. +Use additional parameters to configure `reference` mode: +- `input_data` - **Required**. Path that contain input data for the model, if entity under the path is empty, input data will be generated randomly and dumped into the path specified. +- `output_data` - **Required**. Path where to dump reference output data. +- `random` - **Optional**. Initializer to generate input data randomly. (Default: ` { dist: uniform, low: 0.0, high: 255 }`) + +Examples: +``` +random: { dist: uniform, low: -1.0, high: 1.0 } # specified globally for all models +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: B-inputs/ } + # overwrites global initializer for the model B.xml + - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outptus/, random: { name: uniform, low: 0, high: 255.0 } +``` + +Run `Protopipe` in `reference` mode: +``` +./protopipe -cfg config.yaml -mode reference -niter 10 +``` +Output: +``` +stream 0: Reference data has been generated for 10 iteration(s) +``` + +### Validate accuracy +Protopipe has the dedicated `validation` mode to perform accuracy validation. Existing configuration file can be simply extended to perform accuracy validation: + +- `save_validation_outputs` - **Optional**. Accepts the path where to dump actual execution outputs. (Default: disabled) +- `metric` - **Optional**. Accuracy metric to compare actual vs reference outputs. (Default: `{ name: norm, tolerance: 0.0 }`) +- `input_data` - **Required**. Path that contain input data for the model. +- `output_data` - **Required**. Path that contain **reference** data to compare with. + +**Note**: If folder is provided either for **input_data** or **output_data**, it must be in the following format: +``` +input_data/ + / + input_0.bin + input_1.bin + ... + input_N.bin + +output_data/ + / + output_0.bin + output_1.bin + ... + output_N.bin +``` +**Note**: input and output data can be generated automatically by using `Protopipe` in **reference** mode. (see: [Generate reference](#generate-reference)) + +Examples: +``` +- { name: model.xml, ip: FP16, input_data: input_data/, output_data: output_data/ } +- { name: model.xml, ip: FP16, input_data: input.bin, output_data: output.bin } +- { name: model.xml, ip: FP16, input_data: { data: input.bin }, output_data: { result: output.bin} } +``` + +### Supported metrics +1. L2 Norm: $$\text{Norm}(\mathbf{A}, \mathbf{B}) = \sqrt{\sum_{i,j} (A_{i,j} - B_{i,j})^2}$$ +Parameters: + - `name: norm` - **Required**. Enables L2 Norm metric. + - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**. +3. Cosine similarity: $$\text{Cosine}(\mathbf{A}, \mathbf{B}) = \frac{\mathbf{A} \cdot \mathbf{B}}{\| \mathbf{A} \|_2 \| \mathbf{B} \|_2}$$ +Parameters: + - `name: cosine` - **Required**. Enables cosine similarity metric. + - `threshold` - **Required**. If value of metric is lower than **threshold** it will be treated as **FAIL**. +3. NRMSE : $$\text{NRMSE}(\mathbf{A}, \mathbf{B}) = \frac{1}{D}\sqrt{\frac{1}{N}\sum_{i=1}^N(A_i - B_i)^2}$$ +Where, +$$D = \text{max}(0.001, \text{max}(A_{max}-A_{min}\text{, } B_{max}-B_{min}))$$ +Parameters: + - `name: nrmse` - **Required**. Enables nrmse metric. + - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**. + +### Example +Consider the following `config.yaml`: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO + +save_validation_outputs: actual-outputs/ +metric: { name: norm, tolerance: 0.01 } + +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: A-outputs/ } + # overwrites the global metric for the model B.xml + - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outputs/, metric: { name: norm, tolerance: 0.0 } +``` + +Use `reference` mode to generate the input random data for every model and calculate reference outputs +**Note**: If reference device is different, it can be changed in config file (`device_name`) accordingly +``` +./protopipe --cfg config.yaml --mode reference -niter 10 +``` +Use `validation` mode to perform accuracy validation: +``` +./protopipe --cfg config.yaml --mode validation -t 15 +``` +Example of successful validation: +``` +stream 0: Validation has passed for iteration(s) +``` +In case of accuracy issues the output will be the following: +``` +stream 0: Accuraccy check failed on iteration(s) (first 10): +Iteration : + Model: A, Layer: , Metric: Norm{tolerance: 0.01}, Reason: > 0.01; +``` + +## How to build +### Prerequisites +1. Clone `npu-plugin` repository +2. Build OpenCV G-API with OpenVINO/ONNXRT support +#### Build OpenCV G-API with OpenVINO/ONNXRT support +1. Clone OpenCV repo: + ``` + git clone https://github.com/opencv/opencv + cd opencv && git checkout 78195bc3df + ``` +2. Build OpenCV G-API: + ``` + mkdir -p build && cd build + cmake ../ -DBUILD_LIST=gapi \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_OPENVINO=ON \ + -DOpenVINO_DIR= \ + -DWITH_ONNX=ON \ + -DORT_INSTALL_DIR= + cmake --build . --config Release --target opencv_gapi --parallel + ``` +### In-plugin build + +1. Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources +2. Build OpenCV G-API with OpenVINO / ONNXRT support +3. Clone `npu-plugin` repository + ``` + git clone https://github.com/openvinotoolkit/npu_plugin + git submodule update --init --recursive + ``` +4. Build `Protopipe` as part of the `npu-plugin` build: + ``` + mkdir build && cd build + cmake ../ -DOpenCV_DIR= -DOpenVINODeveloperPackage_DIR= + cmake --build . --config Release --target protopipe --parallel + ``` + +### Standalone build +1. Build `yaml-cpp` + ``` + mkdir -p yaml-cpp_build cd && yaml-cpp_build + cmake ..//thirdparty/yaml-cpp -DCMAKE_INSTALL_PREFIX=install + cmake --build . --config Release --target install --parallel + ``` +2. Build `gflags` + ``` + git clone https://github.com/gflags/gflags + cd gflags + mkdir -p gflags_build cd && gflags_build + cmake ../ -DCMAKE_INSTALL_PREFIX=install + cmake --build . --config Release --target install --parallel + ``` +3. Build `Protopipe` + ``` + mkdir -b protopipe_build && cd protopipe_build + cmake /tools/protopipe/ \ + -DOpenCV_DIR= \ + -Dgflags_DIR= \ + -DOpenVINO_DIR= \ + + cmake --build . --config Release --target protopipe --parallel + ``` +### Verify the installation +**Note**: Make sure `opencv_*` libraries are visible in the environment: +- Windows: + ``` + set PATH=\build\bin\Release\;%PATH% + ``` +- Linux: + ``` + export LD_LIBRARY_PATH=/build/lib/:$LD_LIBRARY_PATH + ``` +**Note**: If `OpenCV` has been build with `ONNXRT` support, all `ONNXRT` related libraries must be located in the same folder as `protopipe` executable. + +Run `Protopipe` with -h flag to verify installation: +``` +> protopipe.exe -h +``` +Successful build will show the information about `Protopipe` CLI options: +``` +protopipe [OPTIONS] + + Common options: + -h Optional. Print the usage message. + -cfg Path to the configuration file. + -pipeline Optional. Enable pipelined execution. + -drop_frames Optional. Drop frames if they come earlier than pipeline is completed. + -mode Optional. Simulation mode: performance (default), reference, validation. + -niter Optional. Number of iterations. If specified overwrites termination criterion for all scenarios in configuration file. + -t Optional. Time in seconds. If specified overwrites termination criterion for all scenarios in configuration file. + -inference_only Optional. Run only inference execution for every model excluding i/o data transfer. Applicable only for "performance" mode. (default: true). + -exec_filter Optional. Run the scenarios that match provided string pattern. +``` diff --git a/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake new file mode 100644 index 00000000000000..090756f86c44c0 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake @@ -0,0 +1,63 @@ +# +# Copyright (C) 2024 Intel Corporation. +# SPDX-License-Identifier: Apache 2.0 +# + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +if("${CMAKE_BUILD_TYPE}" STREQUAL "") + set(CMAKE_BUILD_TYPE "Release") +endif() + +find_package(OpenVINO REQUIRED COMPONENTS Runtime) +find_package(Threads REQUIRED) +find_package(OpenCV 4.9.0 REQUIRED COMPONENTS gapi) + +find_package(yaml-cpp QUIET) +find_package(gflags QUIET) + +if (NOT yaml-cpp_FOUND) + set(YAML_CPP_SOURCES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/yaml-cpp") + message(STATUS "yaml-cpp package was not found. Trying to find source package in ${YAML_CPP_SOURCES_PATH}.") + if(EXISTS ${YAML_CPP_SOURCES_PATH}) + message(STATUS "yaml-cpp source package found. yaml-cpp will be built from sources.") + add_subdirectory(${YAML_CPP_SOURCES_PATH} yaml-cpp EXCLUDE_FROM_ALL) + else() + message(FATAL_ERROR "yaml-cpp package and sources were not found. CMake will exit." ) + endif() +endif() + +if (NOT gflags_FOUND) + set(GFLAGS_SOURCES_PATH "${PACKAGE_PREFIX_DIR}/samples/cpp/thirdparty/gflags") + message(STATUS "gflags package was not found. Trying to find source package in ${GFLAGS_SOURCES_PATH}.") + if(EXISTS ${GFLAGS_SOURCES_PATH}) + message(STATUS "gflags source package found. gflags will be built from sources.") + add_subdirectory(${GFLAGS_SOURCES_PATH} gflags EXCLUDE_FROM_ALL) + else() + message(FATAL_ERROR "gflags was not found. CMake will exit." ) + endif() +endif() + +set(DEPENDENCIES + Threads::Threads + gflags + yaml-cpp + openvino::runtime + opencv_gapi +) + +if (WIN32) + list(APPEND DEPENDENCIES "winmm.lib") +endif() + +file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp") +list(APPEND SOURCES main.cpp) + +add_executable(${TARGET_NAME} ${SOURCES}) +target_link_libraries(${TARGET_NAME} PRIVATE ${DEPENDENCIES}) +target_include_directories(${TARGET_NAME} PUBLIC "${PROJECT_SOURCE_DIR}/src/") + +install(TARGETS ${TARGET_NAME} + DESTINATION "tools/${TARGET_NAME}" + COMPONENT npu_tools) diff --git a/src/plugins/intel_npu/tools/protopipe/main.cpp b/src/plugins/intel_npu/tools/protopipe/main.cpp new file mode 100644 index 00000000000000..8596ba864335ca --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/main.cpp @@ -0,0 +1,266 @@ +// +// Copyright (C) 2023-2024 Intel Corporation. +// SPDX-License-Identifier: Apache 2.0 +// + +#include +#include +#include + +#include + +#include "parser/parser.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/performance_mode.hpp" +#include "simulation/reference_mode.hpp" +#include "simulation/validation_mode.hpp" + +#include "utils/error.hpp" +#include "utils/logger.hpp" + +static constexpr char help_message[] = "Optional. Print the usage message."; +static constexpr char cfg_message[] = "Path to the configuration file."; +static constexpr char device_message[] = + "Optional. Device name. If specified overwrites device specified in config file."; +static constexpr char pipeline_message[] = "Optional. Enable pipelined execution."; +static constexpr char drop_message[] = "Optional. Drop frames if they come earlier than pipeline is completed."; +static constexpr char mode_message[] = "Optional. Simulation mode: performance (default), reference, validation."; +static constexpr char niter_message[] = "Optional. Number of iterations. If specified overwrites termination criterion" + " for all scenarios in configuration file."; +static constexpr char exec_time_message[] = "Optional. Time in seconds. If specified overwrites termination criterion" + " for all scenarios in configuration file."; +static constexpr char inference_only_message[] = + "Optional. Run only inference execution for every model excluding i/o data transfer." + " Applicable only for \"performance\" mode. (default: true)."; + +static constexpr char exec_filter_msg[] = "Optional. Run the scenarios that match provided string pattern."; + +DEFINE_bool(h, false, help_message); +DEFINE_string(cfg, "", cfg_message); +DEFINE_string(d, "", device_message); +DEFINE_bool(pipeline, false, pipeline_message); +DEFINE_bool(drop_frames, false, drop_message); +DEFINE_string(mode, "performance", mode_message); +DEFINE_uint64(niter, 0, niter_message); +DEFINE_uint64(t, 0, exec_time_message); +DEFINE_bool(inference_only, true, inference_only_message); +DEFINE_string(exec_filter, ".*", exec_filter_msg); + +static void showUsage() { + std::cout << "protopipe [OPTIONS]" << std::endl; + std::cout << std::endl; + std::cout << " Common options: " << std::endl; + std::cout << " -h " << help_message << std::endl; + std::cout << " -cfg " << cfg_message << std::endl; + std::cout << " -pipeline " << pipeline_message << std::endl; + std::cout << " -drop_frames " << drop_message << std::endl; + std::cout << " -d " << device_message << std::endl; + std::cout << " -mode " << mode_message << std::endl; + std::cout << " -niter " << niter_message << std::endl; + std::cout << " -t " << exec_time_message << std::endl; + std::cout << " -inference_only " << inference_only_message << std::endl; + std::cout << " -exec_filter " << exec_filter_msg << std::endl; + std::cout << std::endl; +} + +bool parseCommandLine(int* argc, char*** argv) { + gflags::ParseCommandLineNonHelpFlags(argc, argv, true); + + if (FLAGS_h) { + showUsage(); + return false; + } + + if (FLAGS_cfg.empty()) { + throw std::invalid_argument("Path to config file is required"); + } + + std::cout << "Parameters:" << std::endl; + std::cout << " Config file: " << FLAGS_cfg << std::endl; + std::cout << " Pipelining is enabled: " << std::boolalpha << FLAGS_pipeline << std::endl; + std::cout << " Simulation mode: " << FLAGS_mode << std::endl; + std::cout << " Inference only: " << std::boolalpha << FLAGS_inference_only << std::endl; + std::cout << " Device: " << FLAGS_d << std::endl; + return true; +} + +static ICompiled::Ptr compileSimulation(Simulation::Ptr simulation, const bool pipelined, const bool drop_frames) { + LOG_INFO() << "Compile simulation" << std::endl; + if (pipelined) { + return simulation->compilePipelined(drop_frames); + } + return simulation->compileSync(drop_frames); +}; + +class ThreadRunner { +public: + using F = std::function; + void add(F&& func) { + m_funcs.push_back(std::move(func)); + } + void run(); + +private: + std::vector m_funcs; +}; + +void ThreadRunner::run() { + std::vector> futures; + futures.reserve(m_funcs.size()); + for (auto&& func : m_funcs) { + futures.push_back(std::async(std::launch::async, std::move(func))); + } + for (auto& future : futures) { + future.get(); + }; +}; + +class Task { +public: + Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion); + + void operator()(); + const Result& result() const; + const std::string& name() const; + +private: + ICompiled::Ptr m_compiled; + std::string m_name; + ITermCriterion::Ptr m_criterion; + + Result m_result; +}; + +Task::Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion) + : m_compiled(std::move(compiled)), m_name(std::move(name)), m_criterion(std::move(criterion)) { +} + +void Task::operator()() { + try { + m_result = m_compiled->run(m_criterion); + } catch (const std::exception& e) { + m_result = Error{e.what()}; + } +} + +const Result& Task::result() const { + return m_result; +} + +const std::string& Task::name() const { + return m_name; +} + +static Simulation::Ptr createSimulation(const std::string& mode, StreamDesc&& stream, const bool inference_only, + const Config& config) { + Simulation::Ptr simulation; + // NB: Common parameters for all simulations + Simulation::Config cfg{stream.name, stream.frames_interval_in_us, config.disable_high_resolution_timer, + std::move(stream.graph), std::move(stream.infer_params_map)}; + if (mode == "performance") { + PerformanceSimulation::Options opts{config.initializer, std::move(stream.initializers_map), + std::move(stream.input_data_map), inference_only, + std::move(stream.target_latency)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else if (mode == "reference") { + CalcRefSimulation::Options opts{config.initializer, std::move(stream.initializers_map), + std::move(stream.input_data_map), std::move(stream.output_data_map)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else if (mode == "validation") { + ValSimulation::Options opts{config.metric, std::move(stream.metrics_map), std::move(stream.input_data_map), + std::move(stream.output_data_map), std::move(stream.per_iter_outputs_path)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else { + throw std::logic_error("Unsupported simulation mode: " + mode); + } + ASSERT(simulation); + return simulation; +} + +int main(int argc, char* argv[]) { + // NB: Intentionally wrapped into try-catch to display exceptions occur on windows. + try { + if (!parseCommandLine(&argc, &argv)) { + return 0; + } + ReplaceBy replace_by{FLAGS_d}; + + auto parser = std::make_shared(FLAGS_cfg); + + LOG_INFO() << "Parse scenarios from " << FLAGS_cfg << " config file" << std::endl; + auto config = parser->parseScenarios(replace_by); + LOG_INFO() << "Found " << config.scenarios.size() << " scenario(s)" << std::endl; + + // NB: Overwrite termination criteria for all scenarios if specified via CLI + ITermCriterion::Ptr global_criterion; + if (FLAGS_niter != 0u) { + LOG_INFO() << "Termination criterion of " << FLAGS_niter << " iteration(s) will be used for all scenarios" + << std::endl; + global_criterion = std::make_shared(FLAGS_niter); + } + if (FLAGS_t != 0u) { + if (global_criterion) { + // TODO: In fact, it make sense to have them both enabled. + THROW_ERROR("-niter and -t options can't be specified together!"); + } + LOG_INFO() << "Termination criterion of " << FLAGS_t << " second(s) will be used for all scenarios" + << std::endl; + // NB: TimeOut accepts microseconds + global_criterion = std::make_shared(FLAGS_t * 1'000'000); + } + + std::regex filter_regex{FLAGS_exec_filter}; + bool any_scenario_failed = false; + for (auto&& scenario : config.scenarios) { + // NB: Skip the scenarios that don't match provided filter pattern + if (!std::regex_match(scenario.name, filter_regex)) { + LOG_INFO() << "Skip the scenario " << scenario.name << " as it doesn't match the -exec_filter=\"" + << FLAGS_exec_filter << "\" pattern" << std::endl; + continue; + } + LOG_INFO() << "Start processing " << scenario.name << std::endl; + + ThreadRunner runner; + std::vector tasks; + tasks.reserve(scenario.streams.size()); + for (auto&& stream : scenario.streams) { + auto criterion = stream.criterion; + auto stream_name = stream.name; + if (global_criterion) { + if (criterion) { + LOG_INFO() << "Stream: " << stream_name + << " termination criterion is overwritten by CLI parameter" << std::endl; + } + criterion = global_criterion->clone(); + } + auto simulation = createSimulation(FLAGS_mode, std::move(stream), FLAGS_inference_only, config); + auto compiled = compileSimulation(simulation, FLAGS_pipeline, FLAGS_drop_frames); + tasks.emplace_back(std::move(compiled), std::move(stream_name), std::move(criterion)); + runner.add(std::ref(tasks.back())); + } + + LOG_INFO() << "Run " << tasks.size() << " stream(s) asynchronously" << std::endl; + runner.run(); + LOG_INFO() << "Execution has finished" << std::endl; + + for (const auto& task : tasks) { + if (!task.result()) { + // NB: Scenario failed if any of the streams failed + any_scenario_failed = true; + } + std::cout << "stream " << task.name() << ": " << task.result().str() << std::endl; + } + std::cout << "\n"; + } + if (any_scenario_failed) { + return EXIT_FAILURE; + } + } catch (const std::exception& e) { + std::cout << e.what() << std::endl; + throw; + } catch (...) { + std::cout << "Unknown error" << std::endl; + throw; + } + return 0; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp new file mode 100644 index 00000000000000..d13d2954a21b12 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp @@ -0,0 +1,140 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "graph.hpp" + +Nodes Node::srcNodes() const { + Nodes src_nodes; + src_nodes.reserve(m_src_edges.size()); + std::transform(m_src_edges.begin(), m_src_edges.end(), std::back_inserter(src_nodes), [](EdgeHandle edge) { + return edge->srcNode(); + }); + return src_nodes; +} + +Nodes Node::dstNodes() const { + Nodes dst_nodes; + dst_nodes.reserve(m_dst_edges.size()); + std::transform(m_dst_edges.begin(), m_dst_edges.end(), std::back_inserter(dst_nodes), [](EdgeHandle edge) { + return edge->dstNode(); + }); + return dst_nodes; +} + +Edges Node::srcEdges() const { + return {m_src_edges.begin(), m_src_edges.end()}; +} + +Edges Node::dstEdges() const { + return {m_dst_edges.begin(), m_dst_edges.end()}; +} + +NodeHandle Graph::create() { + auto node = std::make_shared(); + NodeHandle nh(node); + m_nodes.emplace(node.get(), MetaPtr{node, Meta{}}); + return nh; +} + +void Graph::remove(NodeHandle nh) { + auto src_edges = nh->srcEdges(); + for (size_t i = 0; i < src_edges.size(); ++i) { + remove(src_edges[i]); + } + auto dst_edges = nh->dstEdges(); + for (size_t i = 0; i < dst_edges.size(); ++i) { + remove(dst_edges[i]); + } + m_nodes.erase(nh.get()); +} + +void Graph::remove(EdgeHandle eh) { + auto src = eh->srcNode(); + auto dst = eh->dstNode(); + src->m_dst_edges.erase(eh); + dst->m_src_edges.erase(eh); + m_edges.erase(eh.get()); +}; + +EdgeHandle Graph::link(NodeHandle src, NodeHandle dst) { + auto edge = std::make_shared(src, dst); + EdgeHandle eh{edge}; + m_edges.emplace(edge.get(), MetaPtr{edge, Meta{}}); + src->m_dst_edges.insert(eh); + dst->m_src_edges.insert(eh); + return eh; +} + +Meta& Graph::meta(NodeHandle handle) { + const auto it = m_nodes.find(handle.get()); + ASSERT(it != m_nodes.end()); + return it->second.meta; +} + +const Meta& Graph::meta(NodeHandle handle) const { + const auto it = m_nodes.find(handle.get()); + ASSERT(it != m_nodes.end()); + return it->second.meta; +} + +Meta& Graph::meta(EdgeHandle handle) { + const auto it = m_edges.find(handle.get()); + ASSERT(it != m_edges.end()); + return it->second.meta; +} + +const Meta& Graph::meta(EdgeHandle handle) const { + const auto it = m_edges.find(handle.get()); + ASSERT(it != m_edges.end()); + return it->second.meta; +} + +std::vector Graph::nodes() const { + std::vector ret; + std::transform(m_nodes.begin(), m_nodes.end(), std::back_inserter(ret), [](const auto& p) { + return NodeHandle{p.second.ptr}; + }); + return ret; +} + +static void dfs(NodeHandle& nh, std::unordered_set& visited, std::stack& stack) { + visited.insert(nh); + auto dst_nodes = nh->dstNodes(); + for (auto dst_nh : dst_nodes) { + auto it = visited.find(dst_nh); + if (it == visited.end()) { + dfs(dst_nh, visited, stack); + } + } + stack.push(nh); +}; + +std::vector Graph::sorted() const { + std::unordered_set visited; + std::stack stack; + const auto nodes = this->nodes(); + for (auto nh : nodes) { + auto it = visited.find(nh); + if (it == visited.end()) { + dfs(nh, visited, stack); + } + } + std::vector sorted; + while (!stack.empty()) { + sorted.push_back(stack.top()); + stack.pop(); + } + return sorted; +} + +Meta& Meta::operator+=(const Meta& other) { + for (const auto& p : other.store) { + ASSERT(store.emplace(p.first, p.second).second); + } + return *this; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp new file mode 100644 index 00000000000000..66aeccbe156d09 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp @@ -0,0 +1,168 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "utils/error.hpp" + +template +class WeakHandle { +public: + explicit WeakHandle(std::shared_ptr obj): m_obj(obj) { + } + T* get() const { + return m_obj.lock().get(); + } + T* operator->() const { + return get(); + } + bool operator==(const WeakHandle& other) const { + return get() == other.get(); + } + +private: + std::weak_ptr m_obj; +}; + +namespace std { +template +struct hash> { + uint64_t operator()(const WeakHandle& handle) const { + return std::hash()(handle.get()); + } +}; +} // namespace std + +class Graph; +class Node; +class Edge; + +using NodeHandle = WeakHandle; +using EdgeHandle = WeakHandle; +using Nodes = std::vector; +using Edges = std::vector; +using NodeSet = std::unordered_set; +using EdgeSet = std::unordered_set; + +class Node { + friend class Graph; + using Ptr = std::shared_ptr; + +public: + Nodes srcNodes() const; + Nodes dstNodes() const; + Edges srcEdges() const; + Edges dstEdges() const; + +private: + EdgeSet m_src_edges; + EdgeSet m_dst_edges; +}; + +class Edge { + friend class Graph; + using Ptr = std::shared_ptr; + +public: + Edge(NodeHandle src, NodeHandle dst): m_src(src), m_dst(dst) { + } + NodeHandle srcNode() const { + return m_src; + } + NodeHandle dstNode() const { + return m_dst; + } + +private: + NodeHandle m_src; + NodeHandle m_dst; +}; + +class Meta { +public: + template + void set(T&& meta); + template + const T& get() const; + template + T& get(); + template + bool has() const; + Meta& operator+=(const Meta& other); + +private: + using MetaStore = std::unordered_map; + MetaStore store; +}; + +template +void Meta::set(T&& meta) { + // NB: Check if there is no such meta yet. + ASSERT(store.emplace(std::type_index(typeid(T)), std::forward(meta)).second); +} + +template +bool Meta::has() const { + auto it = store.find(std::type_index(typeid(T))); + return it != store.end(); +} + +template +const T& Meta::get() const { + const auto it = store.find(std::type_index(typeid(T))); + ASSERT(it != store.end()); + return *std::any_cast(&it->second); +} + +template +T& Meta::get() { + auto it = store.find(std::type_index(typeid(T))); + ASSERT(it != store.end()); + return *std::any_cast(&it->second); +} + +class Graph { +public: + NodeHandle create(); + void remove(NodeHandle nh); + void remove(EdgeHandle eh); + EdgeHandle link(NodeHandle src, NodeHandle dst); + + Meta& meta() { + return m_graph_meta; + } + const Meta& meta() const { + return m_graph_meta; + } + + Meta& meta(NodeHandle handle); + const Meta& meta(NodeHandle handle) const; + Meta& meta(EdgeHandle handle); + const Meta& meta(EdgeHandle handle) const; + + std::vector nodes() const; + std::vector sorted() const; + +private: + template + struct MetaPtr { + std::shared_ptr ptr; + Meta meta; + }; + template + using MetaMap = std::unordered_map>; + + Meta m_graph_meta; + MetaMap m_nodes; + MetaMap m_edges; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp new file mode 100644 index 00000000000000..34099d36a69fdb --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp @@ -0,0 +1,872 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "parser/config.hpp" + +#include "utils/error.hpp" +#include "utils/logger.hpp" + +#include +#include +#include +#include + +#include // depth + +namespace fs = std::filesystem; + +struct GlobalOptions { + std::string blob_dir = "."; + std::string model_dir = "."; + std::string device_name = "NPU"; + std::string log_level = "NONE"; + std::string compiler_type = "DRIVER"; + std::optional save_validation_outputs; +}; + +struct Network { + std::string tag; + InferenceParams params; + LayerVariantAttr input_data; + LayerVariantAttr output_data; + LayerVariantAttr initializers; + LayerVariantAttr accuracy_metrics; +}; + +struct InferOp { + InferenceParams params; + LayerVariantAttr input_data; + LayerVariantAttr output_data; + LayerVariantAttr initializers; + LayerVariantAttr accuracy_metrics; +}; + +struct CPUOp { + uint64_t time_in_us; +}; + +struct CompoundOp { + uint64_t repeat_count; + InferenceParamsMap params; + ScenarioGraph subgraph; +}; + +struct OpDesc { + std::string tag; + using OpType = std::variant; + OpType op; +}; + +// NB: Handles duplicating tags. +class TagsManager { +public: + std::string add(const std::string& tag); + +private: + std::unordered_multiset m_tags; +}; + +std::string TagsManager::add(const std::string& tag) { + std::string t = tag; + m_tags.insert(t); + const auto c = m_tags.count(t); + if (c > 1) { + t += "-" + std::to_string(c); + } + return t; +} + +static LogLevel toLogLevel(const std::string& lvl) { + if (lvl == "NONE") + return LogLevel::None; + if (lvl == "INFO") + return LogLevel::Info; + if (lvl == "DEBUG") + return LogLevel::Debug; + THROW_ERROR("Unsupported log level: " << lvl); +} + +static int toDepth(const std::string& prec) { + if (prec == "FP32") + return CV_32F; + if (prec == "FP16") + return CV_16F; + if (prec == "U8") + return CV_8U; + if (prec == "I32") + return CV_32S; + throw std::logic_error("Unsupported precision type: " + prec); +} + +static AttrMap toDepth(const AttrMap& attrmap) { + AttrMap depthmap; + for (const auto& [name, str_depth] : attrmap) { + depthmap.emplace(name, toDepth(str_depth)); + } + return depthmap; +} + +static LayerVariantAttr toDepth(const LayerVariantAttr& attr) { + LayerVariantAttr depthattr; + if (std::holds_alternative(attr)) { + depthattr = toDepth(std::get(attr)); + } else { + depthattr = toDepth(std::get>(attr)); + } + return depthattr; +} + +static std::string toPriority(const std::string& priority) { + if (priority == "LOW") { + return "LOW"; + } + if (priority == "NORMAL") { + return "MEDIUM"; + } + if (priority == "HIGH") { + return "HIGH"; + } + throw std::logic_error("Unsupported model priority: " + priority); +} + +static ScenarioGraph buildGraph(const std::vector& op_descs, + const std::vector>& connections); + +namespace YAML { + +template +struct convert> { + static bool decode(const Node& node, std::vector& vec) { + if (!node.IsSequence()) { + return false; + } + + for (auto& child : node) { + vec.push_back(child.as()); + } + return true; + } +}; + +template +struct convert> { + static bool decode(const Node& node, std::map& map) { + if (!node.IsMap()) { + return false; + } + for (const auto& itr : node) { + map.emplace(itr.first.as(), itr.second.as()); + } + return true; + } +}; + +template +struct convert> { + static bool decode(const Node& node, LayerVariantAttr& layer_attr) { + if (node.IsMap()) { + layer_attr = node.as>(); + } else { + layer_attr = node.as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, UniformGenerator::Ptr& generator) { + if (!node["low"]) { + THROW_ERROR("Uniform distribution must have \"low\" attribute"); + } + if (!node["high"]) { + THROW_ERROR("Uniform distribution must have \"high\" attribute"); + } + generator = std::make_shared(node["low"].as(), node["high"].as()); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, IRandomGenerator::Ptr& generator) { + if (!node["dist"]) { + THROW_ERROR("\"random\" must have \"dist\" attribute!"); + } + const auto dist = node["dist"].as(); + if (dist == "uniform") { + generator = node.as(); + } else { + THROW_ERROR("Unsupported random distribution: \"" << dist << "\""); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Norm::Ptr& metric) { + // NB: If bigger than tolerance - fail. + if (!node["tolerance"]) { + THROW_ERROR("Metric \"norm\" must have \"tolerance\" attribute!"); + } + const auto tolerance = node["tolerance"].as(); + metric = std::make_shared(tolerance); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Cosine::Ptr& metric) { + // NB: If lower than threshold - fail. + if (!node["threshold"]) { + THROW_ERROR("Metric \"cosine\" must have \"threshold\" attribute!"); + } + const auto threshold = node["threshold"].as(); + metric = std::make_shared(threshold); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, NRMSE::Ptr& metric) { + // NB: If bigger than tolerance - fail. + if (!node["tolerance"]) { + THROW_ERROR("Metric \"nrmse\" must have \"tolerance\" attribute!"); + } + const auto tolerance = node["tolerance"].as(); + metric = std::make_shared(tolerance); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, IAccuracyMetric::Ptr& metric) { + const auto type = node["name"].as(); + if (type == "norm") { + metric = node.as(); + } else if (type == "cosine") { + metric = node.as(); + } else if (type == "nrmse") { + metric = node.as(); + } else { + THROW_ERROR("Unsupported metric type: " << type); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, GlobalOptions& opts) { + if (node["model_dir"]) { + if (!node["model_dir"]["local"]) { + THROW_ERROR("\"model_dir\" must contain \"local\" key!"); + } + opts.model_dir = node["model_dir"]["local"].as(); + } + + if (node["blob_dir"]) { + if (!node["blob_dir"]["local"]) { + THROW_ERROR("\"blob_dir\" must contain \"local\" key!"); + } + opts.blob_dir = node["blob_dir"]["local"].as(); + } + + if (node["device_name"]) { + opts.device_name = node["device_name"].as(); + } + + if (node["log_level"]) { + opts.log_level = node["log_level"].as(); + } + + if (node["compiler_type"]) { + opts.compiler_type = node["compiler_type"].as(); + } + + if (node["save_validation_outputs"]) { + const auto path = node["save_validation_outputs"].as(); + opts.save_validation_outputs = std::make_optional(std::filesystem::path{path}); + } + + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, OpenVINOParams& params) { + // FIXME: Worth to separate these two + const auto name = node["name"] ? node["name"].as() : node["path"].as(); + fs::path path{name}; + if (path.extension() == ".xml") { + auto bin_path = path; + bin_path.replace_extension(".bin"); + params.path = OpenVINOParams::ModelPath{path.string(), bin_path.string()}; + } else if (path.extension() == ".blob") { + params.path = OpenVINOParams::BlobPath{path.string()}; + } else { + // NB: *.onnx, *.pdpd, and any other format supported in future + params.path = OpenVINOParams::ModelPath{path.string(), "" /*weights*/}; + } + // NB: If "device" isn't presented in config for network, + // the device specified globally will be substitued later on + if (node["device"]) { + params.device = node["device"].as(); + } + + if (node["ip"]) { + params.input_precision = toDepth(node["ip"].as>()); + } + + if (node["op"]) { + params.output_precision = toDepth(node["op"].as>()); + } + + if (node["il"]) { + params.input_layout = node["il"].as>(); + } + + if (node["ol"]) { + params.output_layout = node["ol"].as>(); + } + + if (node["iml"]) { + params.input_model_layout = node["iml"].as>(); + } + + if (node["oml"]) { + params.output_model_layout = node["oml"].as>(); + } + + if (node["config"]) { + params.config = node["config"].as>(); + } + + // NB: Note, it should be handled after "config" is set above + if (node["priority"]) { + params.config.emplace("MODEL_PRIORITY", toPriority(node["priority"].as())); + } + + if (node["nireq"]) { + params.nireq = node["nireq"].as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams::OpenVINO& ov_ep) { + if (node["params"]) { + ov_ep.params_map = node["params"].as>(); + } + if (node["device_type"]) { + std::string device_type = node["device_type"].as(); + // Check if device_type already exists in params_map (collision check) + if (ov_ep.params_map.count("device_type") > 0) { + THROW_ERROR("Configuration error: 'device_type' has already been specified in the params."); + } else { + ov_ep.params_map["device_type"] = device_type; + } + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams::EP& ep) { + const auto ep_name = node["name"].as(); + if (ep_name == "OV") { + ep = node.as(); + } else { + THROW_ERROR("Unsupported \"ep name\" value: " << ep_name); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams& params) { + // FIXME: Worth to separate these two + params.model_path = node["name"] ? node["name"].as() : node["path"].as(); + if (node["session_options"]) { + params.session_options = node["session_options"].as>(); + } + if (node["ep"]) { + params.ep = node["ep"].as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Network& network) { + // NB: Take path stem as network tag + // Note that at this point, it's fine if names aren't unique + const auto name = node["name"].as(); + network.tag = std::filesystem::path{name}.stem().string(); + // NB: OpenVINO is default to keep back compatibility for config syntax + const auto framework = node["framework"] ? node["framework"].as() : "openvino"; + if (framework == "openvino") { + // NB: Parse OpenVINO model parameters such as path, device, precision, etc + network.params = node.as(); + } else if (framework == "onnxrt") { + network.params = node.as(); + } else { + THROW_ERROR("Unsupported \"framework:\" value: " << framework); + } + + if (node["random"]) { + network.initializers = node["random"].as>(); + } + if (node["metric"]) { + network.accuracy_metrics = node["metric"].as>(); + } + if (node["input_data"]) { + network.input_data = node["input_data"].as>(); + } + + if (node["output_data"]) { + network.output_data = node["output_data"].as>(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, CPUOp& op) { + // TODO: Assert there are no more options provided + op.time_in_us = node["time_in_us"] ? node["time_in_us"].as() : 0u; + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, InferOp& op) { + const auto framework = node["framework"] ? node["framework"].as() : "openvino"; + if (framework == "openvino") { + // NB: Parse OpenVINO model parameters such as path, device, precision, etc + op.params = node.as(); + } else if (framework == "onnxrt") { + op.params = node.as(); + } else { + THROW_ERROR("Unsupported \"framework:\" value: " << framework); + } + + if (node["random"]) { + op.initializers = node["random"].as>(); + } + if (node["metric"]) { + op.accuracy_metrics = node["metric"].as>(); + } + if (node["input_data"]) { + op.input_data = node["input_data"].as>(); + } + + if (node["output_data"]) { + op.output_data = node["output_data"].as>(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, OpDesc& opdesc) { + opdesc.tag = node["tag"].as(); + auto type = node["type"] ? node["type"].as() : "Infer"; + auto repeat_count = node["repeat_count"] ? node["repeat_count"].as() : 1u; + ASSERT(repeat_count > 0) + if (repeat_count > 1u) { + // NB: repeat_count > 1u assume that "Compound" operation will be used + type = "Compound"; + } + if (type == "Infer") { + opdesc.op = node.as(); + } else if (type == "CPU") { + opdesc.op = node.as(); + } else if (type == "Compound") { + std::vector> connections; + if (node["connections"]) { + connections = node["connections"].as>>(); + } + auto op_descs = node["op_desc"].as>(); + InferenceParamsMap inference_params; + for (const auto& op_desc : op_descs) { + if (std::holds_alternative(op_desc.op)) { + inference_params.emplace(op_desc.tag, std::get(op_desc.op).params); + } + } + opdesc.op = CompoundOp{repeat_count, std::move(inference_params), buildGraph(op_descs, connections)}; + } else { + THROW_ERROR("Unsupported operation type: \"" << type << "\"!"); + } + return true; + } +}; + +} // namespace YAML + +static std::vector> parseNetworks(const YAML::Node& node) { + ASSERT(node.IsSequence()); + TagsManager tgs_mngr; + std::vector> networks_list; + for (const auto& subnode : node) { + if (subnode.IsSequence()) { + networks_list.push_back(subnode.as>()); + } else { + networks_list.push_back({subnode.as()}); + } + // NB: Ensure all network tags are unique! + for (auto& network : networks_list.back()) { + network.tag = tgs_mngr.add(network.tag); + } + } + return networks_list; +} + +static ScenarioGraph buildGraph(const std::vector>& networks_list, const uint32_t delay_in_us) { + ScenarioGraph graph; + auto src = graph.makeSource(); + std::vector producers = {src}; + for (uint32_t list_idx = 0; list_idx < networks_list.size(); ++list_idx) { + auto& networks = networks_list[list_idx]; + // NB: Delay if specified, will not be added to the beginning + // and end of the stream, ONLY between models + if (list_idx != 0u && delay_in_us != 0u) { + auto delay = graph.makeDelay(delay_in_us); + for (auto p : producers) { + graph.link(p, delay); + } + producers = {delay.out()}; + } + std::vector curr_outs; + curr_outs.reserve(networks.size()); + for (uint32_t net_idx = 0; net_idx < networks.size(); ++net_idx) { + auto infer = graph.makeInfer(networks[net_idx].tag); + for (auto p : producers) { + graph.link(p, infer); + } + curr_outs.push_back(infer.out()); + } + producers = std::move(curr_outs); + } + return graph; +} + +static InferenceParams adjustParams(OpenVINOParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) { + // NB: Adjust the model path according to base directories provided for blobs & models + auto& path = params.path; + if (std::holds_alternative(path)) { + auto& model_path = std::get(path); + fs::path model_file_path{model_path.model}; + fs::path bin_file_path{model_path.bin}; + if (model_file_path.is_relative()) { + model_path.model = (opts.model_dir / model_file_path).string(); + } + if (!model_path.bin.empty() && bin_file_path.is_relative()) { + model_path.bin = (opts.model_dir / bin_file_path).string(); + } + } else { + ASSERT(std::holds_alternative(path)); + auto& blob_path = std::get(path); + fs::path blob_file_path{blob_path.blob}; + if (blob_file_path.is_relative()) { + blob_path.blob = (opts.blob_dir / blob_file_path).string(); + } + } + // NB: Adjust device property based on opts.device_name or replace_by + + if (!replace_by.device.empty()) { + // NB: ReplaceBy has priority - overwrite + params.device = replace_by.device; + } else if (params.device.empty()) { + // NB: Otherwise, if empty - take the value from global device name + params.device = opts.device_name; + } + + // NB: Compiler type is only relevant for NPU device + if (params.device == "NPU") { + // NB: Don't overwrite compiler type if it already has been + // specified explicitly for particular model + if (const auto it = params.config.find("NPU_COMPILER_TYPE"); it == params.config.end()) { + params.config.emplace("NPU_COMPILER_TYPE", opts.compiler_type); + } + } + return std::move(params); +} + +static InferenceParams adjustParams(ONNXRTParams&& params, const GlobalOptions& opts) { + fs::path model_file_path{params.model_path}; + if (model_file_path.is_relative()) { + params.model_path = (opts.model_dir / model_file_path).string(); + } + return std::move(params); +} + +static InferenceParams adjustParams(InferenceParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) { + if (std::holds_alternative(params)) { + return adjustParams(std::get(std::move(params)), opts, replace_by); + } + ASSERT(std::holds_alternative(params)); + return adjustParams(std::get(std::move(params)), opts); +} + +static StreamDesc parseStream(const YAML::Node& node, const GlobalOptions& opts, const std::string& default_name, + const ReplaceBy& replace_by) { + StreamDesc stream; + + // FIXME: Create a function for the duplicate code below + stream.name = node["name"] ? node["name"].as() : default_name; + stream.frames_interval_in_us = 0u; + if (node["frames_interval_in_ms"]) { + stream.frames_interval_in_us = node["frames_interval_in_ms"].as() * 1000u; + if (node["target_fps"]) { + THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \"" + << stream.name << "\"! Please specify only one of them as they are mutually exclusive."); + } + } else if (node["target_fps"]) { + uint32_t target_fps = node["target_fps"].as(); + stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0; + } + + if (node["target_latency_in_ms"]) { + stream.target_latency = std::make_optional(node["target_latency_in_ms"].as()); + if (stream.target_latency < 0) { + THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!"); + } + } + if (node["exec_time_in_secs"]) { + const auto exec_time_in_secs = node["exec_time_in_secs"].as(); + stream.criterion = std::make_shared(exec_time_in_secs * 1'000'000); + } + if (node["iteration_count"]) { + const auto iteration_count = node["iteration_count"].as(); + stream.criterion = std::make_shared(iteration_count); + } + + auto networks_list = parseNetworks(node["network"]); + const auto delay_in_us = node["delay_in_us"] ? node["delay_in_us"].as() : 0u; + stream.graph = buildGraph(networks_list, delay_in_us); + // NB: Collect network parameters + for (auto& networks : networks_list) { + for (auto& network : networks) { + stream.metrics_map.emplace(network.tag, std::move(network.accuracy_metrics)); + stream.initializers_map.emplace(network.tag, std::move(network.initializers)); + stream.input_data_map.emplace(network.tag, std::move(network.input_data)); + stream.output_data_map.emplace(network.tag, std::move(network.output_data)); + stream.infer_params_map.emplace(network.tag, adjustParams(std::move(network.params), opts, replace_by)); + } + } + return stream; +} + +using DependencyMap = std::unordered_map>; + +static ScenarioGraph buildGraph(const std::vector& op_descs, + const std::vector>& connections) { + // NB: Build the graph based on list of operations and connections between them + // + // The algorithm is straightforward: + // 1) For every operation create corresponding graph node + // 2) Go though connections and create the dependency map + // 3) Go through every operation and connect with its dependencies + // 3.1) If operation has no dependencies, connect it directly with the source + + // NB: For the fast access to operation node by name + std::unordered_map op_node_map; + // NB: To store the list of dependencies for every operation + std::unordered_map> dependency_map; + + // (1) For every operation create corresponding graph node + ScenarioGraph graph; + for (const auto& desc : op_descs) { + // NB: Initialize dependency list for every operation + dependency_map[desc.tag]; + // FIXME: Implement visitor + if (std::holds_alternative(desc.op)) { + op_node_map.emplace(desc.tag, graph.makeInfer(desc.tag)); + } else if (std::holds_alternative(desc.op)) { + const auto& compound = std::get(desc.op); + op_node_map.emplace( + desc.tag, graph.makeCompound(compound.repeat_count, compound.subgraph, compound.params, desc.tag)); + } else { + ASSERT(std::holds_alternative(desc.op)); + const auto& cpu = std::get(desc.op); + op_node_map.emplace(desc.tag, graph.makeDelay(cpu.time_in_us)); + } + } + + // (2) Go though connections and create the dependency map + for (const auto& tags : connections) { + if (tags.size() < 2) { + THROW_ERROR("Connections list must be at least size of 2!"); + } + for (uint32_t i = 1; i < tags.size(); ++i) { + // [A, B, C] - means B depends on A, and C depends on B + auto deps_it = dependency_map.find(tags[i]); + if (deps_it == dependency_map.end()) { + THROW_ERROR("Operation \"" << tags[i] << "\" hasn't been registered in op_desc list!"); + } + if (tags[i - 1] == tags[i]) { + THROW_ERROR("Operation \"" << tags[i] << "\" cannot be connected with itself!"); + } + auto& dep_set = deps_it->second; + // NB: Check if such connection already exists + auto is_inserted = deps_it->second.emplace(tags[i - 1]).second; + if (!is_inserted) { + THROW_ERROR("Connection between \"" << tags[i - 1] << "\" and \"" << tags[i] + << "\" operations already exists!"); + } + } + } + + // (3) Go through every operation and connect with its dependencies + auto src = graph.makeSource(); + for (const auto& [tag, deps] : dependency_map) { + auto op = op_node_map.at(tag); + // (3.1) If operation has no dependencies, connect it directly to the source + if (deps.empty()) { + graph.link(src, op); + } else { + for (auto dep_tag : deps) { + auto dep = op_node_map.at(dep_tag); + graph.link(dep.out(), op); + } + } + } + return graph; +} + +static StreamDesc parseAdvancedStream(const YAML::Node& node, const GlobalOptions& opts, + const std::string& default_name, const ReplaceBy& replace_by) { + StreamDesc stream; + + // FIXME: Create a function for the duplicate code below + stream.name = node["name"] ? node["name"].as() : default_name; + stream.frames_interval_in_us = 0u; + if (node["frames_interval_in_ms"]) { + stream.frames_interval_in_us = node["frames_interval_in_ms"].as() * 1000u; + if (node["target_fps"]) { + THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \"" + << stream.name << "\"! Please specify only one of them as they are mutually exclusive."); + } + } else if (node["target_fps"]) { + uint32_t target_fps = node["target_fps"].as(); + stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0; + } + + if (node["target_latency_in_ms"]) { + stream.target_latency = std::make_optional(node["target_latency_in_ms"].as()); + if (stream.target_latency < 0) { + THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!"); + } + } + if (node["exec_time_in_secs"]) { + const auto exec_time_in_secs = node["exec_time_in_secs"].as(); + stream.criterion = std::make_shared(exec_time_in_secs * 1'000'000); + } + if (node["iteration_count"]) { + const auto iteration_count = node["iteration_count"].as(); + stream.criterion = std::make_shared(iteration_count); + } + + auto op_descs = node["op_desc"].as>(); + std::vector> connections; + if (node["connections"]) { + connections = node["connections"].as>>(); + } + + for (auto& desc : op_descs) { + if (std::holds_alternative(desc.op)) { + auto&& infer = std::get(desc.op); + stream.metrics_map.emplace(desc.tag, std::move(infer.accuracy_metrics)); + stream.initializers_map.emplace(desc.tag, std::move(infer.initializers)); + stream.input_data_map.emplace(desc.tag, std::move(infer.input_data)); + stream.output_data_map.emplace(desc.tag, std::move(infer.output_data)); + stream.infer_params_map.emplace(desc.tag, adjustParams(std::move(infer.params), opts, replace_by)); + } + if (std::holds_alternative(desc.op)) { + auto& compound = std::get(desc.op); + InferenceParamsMap& params_map = compound.params; + for (auto& pair : params_map) { + pair.second = adjustParams(std::move(pair.second), opts, replace_by); + } + } + } + + stream.graph = buildGraph(op_descs, connections); + return stream; +} + +static std::vector parseStreams(const YAML::Node& node, const GlobalOptions& opts, + const ReplaceBy& replace_by) { + std::vector streams; + uint32_t stream_idx = 0; + for (const auto& subnode : node) { + const auto default_name = std::to_string(stream_idx); + auto stream = subnode["op_desc"] ? parseAdvancedStream(subnode, opts, default_name, replace_by) + : parseStream(subnode, opts, default_name, replace_by); + streams.push_back(std::move(stream)); + ++stream_idx; + } + return streams; +} + +static std::vector parseScenarios(const YAML::Node& node, const GlobalOptions& opts, + const ReplaceBy& replace_by) { + std::vector scenarios; + for (const auto& subnode : node) { + ScenarioDesc scenario; + scenario.name = subnode["name"] ? subnode["name"].as() + : "multi_inference_" + std::to_string(scenarios.size()); + scenario.streams = parseStreams(subnode["input_stream_list"], opts, replace_by); + + if (opts.save_validation_outputs) { + for (auto& stream : scenario.streams) { + const auto& root_path = opts.save_validation_outputs.value(); + std::string stream_dir = "stream_" + stream.name; + std::filesystem::path stream_outputs_path = root_path / scenario.name / stream_dir; + stream.per_iter_outputs_path = std::make_optional(std::move(stream_outputs_path)); + } + } + scenarios.push_back(std::move(scenario)); + } + return scenarios; +} + +Config parseConfig(const YAML::Node& node, const ReplaceBy& replace_by) { + const auto global_opts = node.as(); + + // FIXME: Perhaps should be done somewhere else... + Logger::global_lvl = toLogLevel(global_opts.log_level); + + Config config; + config.scenarios = parseScenarios(node["multi_inference"], global_opts, replace_by); + + ASSERT(!config.scenarios.empty()); + if (node["metric"]) { + config.metric = node["metric"].as(); + } + if (node["random"]) { + config.initializer = node["random"].as(); + } + + config.disable_high_resolution_timer = false; + if (node["disable_high_resolution_waitable_timer"]) { + config.disable_high_resolution_timer = node["disable_high_resolution_waitable_timer"].as(); + } + return config; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp new file mode 100644 index 00000000000000..1dec64ece423b6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp @@ -0,0 +1,12 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "parser/parser.hpp" + +#include + +Config parseConfig(const YAML::Node& root, const ReplaceBy& replace_by); diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp new file mode 100644 index 00000000000000..b4f48b7415615c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp @@ -0,0 +1,20 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "parser/parser.hpp" +#include "parser/config.hpp" + +#include "utils/error.hpp" + +#include + +ScenarioParser::ScenarioParser(const std::string& filepath): m_filepath(filepath) { +} + +Config ScenarioParser::parseScenarios(const ReplaceBy& replace_by) { + const auto root = YAML::LoadFile(m_filepath); + // TODO: Extend to any other config syntax + return parseConfig(root, replace_by); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp new file mode 100644 index 00000000000000..ec228ee8070fd3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp @@ -0,0 +1,61 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "scenario/criterion.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" + +struct StreamDesc { + // NB: Commons parameters for all modes + std::string name; + uint64_t frames_interval_in_us; + ScenarioGraph graph; + InferenceParamsMap infer_params_map; + ITermCriterion::Ptr criterion; + // Mode specific params + ModelsAttrMap metrics_map; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + std::optional target_latency; + std::optional per_iter_outputs_path; +}; + +struct ScenarioDesc { + std::string name; + std::vector streams; + bool disable_high_resolution_timer; +}; + +struct Config { + IRandomGenerator::Ptr initializer; + IAccuracyMetric::Ptr metric; + bool disable_high_resolution_timer; + std::vector scenarios; +}; + +struct ReplaceBy { + std::string device; +}; + +struct IScenarioParser { + virtual Config parseScenarios(const ReplaceBy& replace_by) = 0; + virtual ~IScenarioParser() = default; +}; + +class ScenarioParser : public IScenarioParser { +public: + ScenarioParser(const std::string& filepath); + Config parseScenarios(const ReplaceBy& replace_by) override; + +private: + std::string m_filepath; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.cpp b/src/plugins/intel_npu/tools/protopipe/src/result.cpp new file mode 100644 index 00000000000000..23c6c315eaf123 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/result.cpp @@ -0,0 +1,22 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "result.hpp" +#include "utils/error.hpp" + +Result::Result(const Error& error): m_status(error){}; +Result::Result(const Success& success): m_status(success){}; + +Result::operator bool() const { + return std::holds_alternative(m_status); +} + +std::string Result::str() const { + if (std::holds_alternative(m_status)) { + return std::get(m_status).msg; + } + ASSERT(std::holds_alternative(m_status)); + return std::get(m_status).reason; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.hpp b/src/plugins/intel_npu/tools/protopipe/src/result.hpp new file mode 100644 index 00000000000000..08cbd7b06fc940 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/result.hpp @@ -0,0 +1,30 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +struct Success { + std::string msg; +}; +struct Error { + std::string reason; +}; + +class Result { +public: + Result() = default; // monostate (empty) + Result(const Error& error); + Result(const Success& success); + + operator bool() const; + std::string str() const; + +private: + using Status = std::variant; + Status m_status; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp new file mode 100644 index 00000000000000..9f779b8dab8cfd --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp @@ -0,0 +1,121 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/accuracy_metrics.hpp" + +#include "utils/error.hpp" + +Norm::Norm(const double tolerance): m_tolerance(tolerance){}; + +Result Norm::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + ASSERT(lhsf32.total() == rhsf32.total()); + auto value = cv::norm(lhsf32, rhsf32); + + if (value > m_tolerance) { + std::stringstream ss; + ss << value << " > " << m_tolerance; + return Error{ss.str()}; + } + return Success{}; +} + +std::string Norm::str() { + std::stringstream ss; + ss << "Norm{tolerance: " << m_tolerance << "}"; + return ss.str(); +} + +Cosine::Cosine(const double threshold): m_threshold(threshold){}; + +Result Cosine::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + ASSERT(lhsf32.total() == rhsf32.total()); + const auto* lhsptr = lhsf32.ptr(); + const auto* rhsptr = rhsf32.ptr(); + + double lhsdot = 0.0, rhsdot = 0.0, numr = 0.0; + for (size_t i = 0; i < lhsf32.total(); ++i) { + numr += lhsptr[i] * rhsptr[i]; + lhsdot += lhsptr[i] * lhsptr[i]; + rhsdot += rhsptr[i] * rhsptr[i]; + } + + const double eps = 1e-9; + if (lhsdot < eps || rhsdot < eps) { + return Error{"Division by zero!"}; + } + + const double similarity = numr / (std::sqrt(lhsdot) * std::sqrt(rhsdot)); + if (similarity > (1.0 + eps) || similarity < -(1.0 + eps)) { + std::stringstream ss; + ss << "Invalid result " << similarity << " (valid range [-1 : +1])"; + return Error{ss.str()}; + } + + if (m_threshold - eps > similarity) { + std::stringstream ss; + ss << similarity << " < " << m_threshold; + return Error{ss.str()}; + } + return Success{}; +} + +std::string Cosine::str() { + std::stringstream ss; + ss << "Cosine{threshold: " << m_threshold << "}"; + return ss.str(); +} + +NRMSE::NRMSE(const double tolerance): m_tolerance(tolerance){}; + +Result NRMSE::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + const auto size = lhsf32.total(); + if (size == 0) { + std::stringstream ss; + ss << "Empty output and reference tensors, nrmse loss set to 0" << std::endl; + return Success{}; + } + + const auto* lhsptr = lhsf32.ptr(); + const auto* rhsptr = rhsf32.ptr(); + + double error = 0.0; + float lhsmax = 0.0, rhsmax = 0.0, lhsmin = 0.0, rhsmin = 0.0; + + for (size_t i = 0; i < size; ++i) { + const auto diff = lhsptr[i] - rhsptr[i]; + error += diff * diff; + lhsmax = std::max(lhsptr[i], lhsmax); + rhsmax = std::max(rhsptr[i], rhsmax); + lhsmin = std::min(lhsptr[i], lhsmin); + rhsmin = std::min(rhsptr[i], rhsmin); + } + + double nrmse = sqrt(error / size) / std::max(0.001f, std::max(lhsmax - lhsmin, rhsmax - rhsmin)); + + if (m_tolerance < nrmse) { + std::stringstream ss; + ss << nrmse << " > " << m_tolerance; + return Error{ss.str()}; + } + return Success{}; +} + +std::string NRMSE::str() { + std::stringstream ss; + ss << "nrmse{tolerance: " << m_tolerance << "}"; + return ss.str(); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp new file mode 100644 index 00000000000000..010039360ecb9b --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp @@ -0,0 +1,52 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "result.hpp" + +struct IAccuracyMetric { + using Ptr = std::shared_ptr; + virtual Result compare(const cv::Mat& lhs, const cv::Mat& rhs) = 0; + virtual std::string str() = 0; + virtual ~IAccuracyMetric() = default; +}; + +class Norm : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit Norm(const double tolerance); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_tolerance; +}; + +class Cosine : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit Cosine(const double threshold); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_threshold; +}; + +class NRMSE : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit NRMSE(const double tolerance); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_tolerance; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp new file mode 100644 index 00000000000000..b348fe92e811cb --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp @@ -0,0 +1,72 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "criterion.hpp" + +#include + +#include "utils/utils.hpp" + +Iterations::Iterations(uint64_t num_iters): m_num_iters(num_iters), m_counter(0) { +} + +bool Iterations::check() const { + return m_counter != m_num_iters; +} + +void Iterations::update() { + ++m_counter; +} + +void Iterations::init() { + m_counter = 0; +} + +ITermCriterion::Ptr Iterations::clone() const { + return std::make_shared(*this); +} + +TimeOut::TimeOut(uint64_t time_in_us): m_time_in_us(time_in_us), m_start_ts(-1) { +} + +bool TimeOut::check() const { + return utils::timestamp() - m_start_ts < m_time_in_us; +} + +void TimeOut::update(){/* do nothing */}; + +void TimeOut::init() { + m_start_ts = utils::timestamp(); +} + +ITermCriterion::Ptr TimeOut::clone() const { + return std::make_shared(*this); +} + +CombinedCriterion::CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs): m_lhs(lhs), m_rhs(rhs) { +} + +CombinedCriterion::CombinedCriterion(const CombinedCriterion& other) { + m_lhs = other.m_lhs->clone(); + m_rhs = other.m_rhs->clone(); +} + +bool CombinedCriterion::check() const { + return m_lhs->check() && m_rhs->check(); +} + +void CombinedCriterion::update() { + m_lhs->update(); + m_rhs->update(); +}; + +void CombinedCriterion::init() { + m_lhs->init(); + m_rhs->init(); +} + +ITermCriterion::Ptr CombinedCriterion::clone() const { + return std::make_shared(*this); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp new file mode 100644 index 00000000000000..28b440a7b3b0a3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +struct ITermCriterion { + using Ptr = std::shared_ptr; + virtual void init() = 0; + virtual void update() = 0; + virtual bool check() const = 0; + virtual ITermCriterion::Ptr clone() const = 0; +}; + +class Iterations : public ITermCriterion { +public: + Iterations(uint64_t num_iters); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + uint64_t m_num_iters; + uint64_t m_counter; +}; + +class TimeOut : public ITermCriterion { +public: + TimeOut(uint64_t time_in_us); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + uint64_t m_time_in_us; + uint64_t m_start_ts; +}; + +class CombinedCriterion : public ITermCriterion { +public: + CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs); + CombinedCriterion(const CombinedCriterion& other); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + ITermCriterion::Ptr m_lhs, m_rhs; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp new file mode 100644 index 00000000000000..c1648f3755cbfd --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp @@ -0,0 +1,17 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/inference.hpp" + +#include +#include + +std::vector extractLayerNames(const std::vector& layers) { + std::vector names; + std::transform(layers.begin(), layers.end(), std::back_inserter(names), [](const auto& layer) { + return layer.name; + }); + return names; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp new file mode 100644 index 00000000000000..c4fd85aa26721a --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp @@ -0,0 +1,111 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +using AttrMap = std::map; +// NB: This type is supposed to be used to hold in/out layers +// attributes such as precision, layout, shape etc. +// +// User can provide attributes either: +// 1. std::monostate - No value specified explicitly. +// 2. Attr - value specified explicitly that should be broadcasted to all layers. +// 3. AttrMap[str->T] - map specifies value for particular layer. +template +using LayerVariantAttr = std::variant, Attr>; + +// NB: Map of model tag -> LayerVariantAttr +template +using ModelsAttrMap = std::unordered_map>; + +struct LayerInfo { + std::string name; + std::vector dims; + int prec; +}; +using LayersInfo = std::vector; + +std::vector extractLayerNames(const std::vector& layers); + +template +std::optional lookUp(const std::map& map, const K& key) { + const auto it = map.find(key); + if (it == map.end()) { + return {}; + } + return std::make_optional(std::move(it->second)); +} + +template +static AttrMap unpackLayerAttr(const LayerVariantAttr& attr, const std::vector& layer_names, + const std::string& attrname) { + AttrMap attrmap; + if (std::holds_alternative(attr)) { + auto value = std::get(attr); + for (const auto& name : layer_names) { + attrmap.emplace(name, value); + } + } else if (std::holds_alternative>(attr)) { + attrmap = std::get>(attr); + std::unordered_set layers_set{layer_names.begin(), layer_names.end()}; + for (const auto& [name, attr] : attrmap) { + const auto it = layers_set.find(name); + if (it == layers_set.end()) { + throw std::logic_error("Failed to find layer \"" + name + "\" to specify " + attrname); + } + } + } + return attrmap; +} + +struct OpenVINOParams { + struct ModelPath { + std::string model; + std::string bin; + }; + struct BlobPath { + std::string blob; + }; + using Path = std::variant; + + // NB: Mandatory parameters + Path path; + std::string device; + // NB: Optional parameters + LayerVariantAttr input_precision; + LayerVariantAttr output_precision; + LayerVariantAttr input_layout; + LayerVariantAttr output_layout; + LayerVariantAttr input_model_layout; + LayerVariantAttr output_model_layout; + std::map config; + size_t nireq = 1u; +}; + +struct ONNXRTParams { + std::string model_path; + std::map session_options; + // TODO: Extend for other available ONNXRT EP (e.g DML, CoreML, TensorRT, etc) + struct OpenVINO { + std::map params_map; + }; + // NB: std::monostate stands for the default MLAS Execution provider + using EP = std::variant; + EP ep; +}; + +using InferenceParams = std::variant; +using InferenceParamsMap = std::unordered_map; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp new file mode 100644 index 00000000000000..96984966fbc6fc --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp @@ -0,0 +1,40 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/scenario_graph.hpp" + +DataNode::DataNode(Graph* graph, NodeHandle nh): m_nh(nh) { + graph->meta(nh).set(Data{}); +}; + +OpNode::OpNode(NodeHandle nh, DataNode out_data): m_nh(nh), m_out_data(out_data) { +} + +DataNode OpNode::out() { + return m_out_data; +} + +DataNode ScenarioGraph::makeSource() { + NodeHandle nh = m_graph.create(); + m_graph.meta(nh).set(Source{}); + return DataNode(&m_graph, nh); +} + +void ScenarioGraph::link(DataNode data, OpNode op) { + m_graph.link(data.m_nh, op.m_nh); +} + +OpNode ScenarioGraph::makeInfer(const std::string& tag) { + return makeOp(Infer{tag}); +} + +OpNode ScenarioGraph::makeDelay(uint64_t time_in_us) { + return makeOp(Delay{time_in_us}); +} + +OpNode ScenarioGraph::makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params, + const std::string& tag) { + return makeOp(Compound{repeat_count, subgraph, infer_params, tag}); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp new file mode 100644 index 00000000000000..a9b6523a6be52d --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp @@ -0,0 +1,102 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "graph.hpp" +#include "scenario/accuracy_metrics.hpp" +#include "scenario/inference.hpp" +#include "utils/data_providers.hpp" + +struct Source {}; +struct Data {}; + +class DataNode { +public: + DataNode(Graph* graph, NodeHandle nh); + +private: + friend class ScenarioGraph; + NodeHandle m_nh; +}; + +class OpNode; +template <> +struct std::hash; + +class OpNode { +public: + OpNode(NodeHandle nh, DataNode out_data); + DataNode out(); + +private: + friend class ScenarioGraph; + friend struct std::hash; + NodeHandle m_nh; + DataNode m_out_data; +}; + +namespace std { +template <> +struct hash { + uint64_t operator()(const OpNode& op_node) const { + return std::hash()(op_node.m_nh); + } +}; +} // namespace std + +class ScenarioGraph { +public: + DataNode makeSource(); + OpNode makeInfer(const std::string& tag); + OpNode makeDelay(uint64_t time_in_us); + OpNode makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params, + const std::string& tag); + + void link(DataNode data, OpNode op); + + template + void pass(F&& f) { + f(m_graph); + } + +private: + template + OpNode makeOp(Kind&& kind); + +private: + Graph m_graph; +}; + +struct Infer { + std::string tag; +}; + +struct Delay { + uint64_t time_in_us; +}; + +struct Compound { + uint64_t repeat_count; + ScenarioGraph subgraph; + InferenceParamsMap infer_params; + std::string tag; +}; + +struct Op { + using Kind = std::variant; + Kind kind; +}; + +template +OpNode ScenarioGraph::makeOp(Kind&& kind) { + auto op_nh = m_graph.create(); + auto out_nh = m_graph.create(); + m_graph.meta(op_nh).set(Op{std::forward(kind)}); + m_graph.link(op_nh, out_nh); + return OpNode(op_nh, DataNode(&m_graph, out_nh)); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp new file mode 100644 index 00000000000000..ad0abc7fe89f9b --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp @@ -0,0 +1,42 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/computation.hpp" + +Computation::Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector&& metas, GraphDesc&& desc) + : m_comp(std::move(comp)), + m_compile_args(std::move(args)), + m_out_meta(std::move(metas)), + m_desc(std::move(desc)) { +} + +uint32_t Computation::getMaxParallelBranches() const { + return m_desc.max_parallel_branches; +} + +const std::vector& Computation::getOutMeta() const { + return m_out_meta; +} + +cv::GCompiled Computation::compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) { + auto compile_args = m_compile_args; + compile_args += std::move(args); + return m_comp.compile(std::move(in_meta), std::move(compile_args)); +} + +cv::GStreamingCompiled Computation::compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) { + auto compile_args = m_compile_args; + compile_args += std::move(args); + return m_comp.compileStreaming(std::move(in_meta), std::move(compile_args)); +} + +cv::GMetaArgs descr_of(const std::vector& sources) { + cv::GMetaArgs meta; + meta.reserve(sources.size()); + for (auto src : sources) { + meta.push_back(src->descr_of()); + } + return meta; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp new file mode 100644 index 00000000000000..f9eba3b8c95a5f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp @@ -0,0 +1,36 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "graph.hpp" +#include "simulation/dummy_source.hpp" + +#include +#include + +class Computation { +public: + // NB: Holds information about Graph structure + struct GraphDesc { + const uint32_t max_parallel_branches; + }; + + Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector&& metas, GraphDesc&& desc); + + uint32_t getMaxParallelBranches() const; + const std::vector& getOutMeta() const; + + cv::GCompiled compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {}); + cv::GStreamingCompiled compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {}); + +private: + cv::GComputation m_comp; + cv::GCompileArgs m_compile_args; + std::vector m_out_meta; + GraphDesc m_desc; +}; + +cv::GMetaArgs descr_of(const std::vector& sources); diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp new file mode 100644 index 00000000000000..d43a84ef5fe3a8 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp @@ -0,0 +1,462 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/computation_builder.hpp" +#include "simulation/layers_reader.hpp" +#include "simulation/operations.hpp" +#include "simulation/performance_mode.hpp" +#include "simulation/simulation.hpp" + +#include "utils/error.hpp" + +#include + +struct OpBuilder { + void build(NodeHandle nh, const Infer& infer); + void build(NodeHandle nh, const Delay& delay); + void build(NodeHandle nh, const Compound& compound); + + Graph& graph; + IBuildStrategy::Ptr strategy; + const InferenceParamsMap& params_map; +}; + +void OpBuilder::build(NodeHandle nh, const Compound& compound) { + // Retrieving destination nodes of the current node nh + auto out_nhs = nh->dstNodes(); + + // NB: The Dummy node ensures proper handling of multiple inputs + auto dummy_nh = graph.create(); + auto provider = std::make_shared(utils::createRandom({1}, CV_8U)); + DummyCall dummy_call{{provider}, 0}; + graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)}); + auto in_nhs = nh->srcNodes(); + + // removing input edges to go through dummy node and not to compound node + auto src_edges = nh->srcEdges(); + for (size_t i = 0; i < src_edges.size(); ++i) { + graph.remove(src_edges[i]); + } + + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i}); // Linking in_nhs with dummy_nh + } + + auto dummy_out_nh = graph.create(); // Creating output dunmmy node + graph.meta(graph.link(dummy_nh, dummy_out_nh)) + .set(OutputIdx{0u}); // linking dummy node handle and output dummy node handle + graph.meta(dummy_out_nh).set(GData{}); + graph.meta(graph.link(dummy_out_nh, nh)).set(InputIdx{0u}); + + ASSERT(nh->dstEdges().size() == 1u); + auto dst_edge = nh->dstEdges().front(); + graph.meta(dst_edge).set(OutputIdx{0u}); + + graph.meta(graph.link(nh, out_nhs.front())).set(OutputIdx{0u}); + + ModelsAttrMap input_data_map; + ModelsAttrMap initializers_map; + + for (const auto& [tag, params] : compound.infer_params) { + input_data_map[tag]; + initializers_map[tag]; + } + + PerformanceSimulation::Options opts{ + nullptr, // global_initializer + initializers_map, + input_data_map, + true, // inference_only + {} // target latency + }; + + Simulation::Config cfg{compound.tag, + 0u, // frames_interval_in_ms + false, // disable_high_resolution_timer + compound.subgraph, compound.infer_params}; + + auto compiled = std::make_shared(std::move(cfg), std::move(opts)) + ->compileSync(false /*drop_frames*/); + auto term_criterion = std::make_shared(compound.repeat_count); + auto f = [compiled, term_criterion]() { + compiled->run(term_criterion); + }; + + CompoundCall compound_call{f}; + graph.meta(nh).set(GOperation{std::move(compound_call)}); +} + +void OpBuilder::build(NodeHandle nh, const Delay& delay) { + auto in_nhs = nh->srcNodes(); + auto out_nhs = nh->dstNodes(); + // FIXME: Once nh is removed, delay info is no longer alive!!! + const auto time_in_us = delay.time_in_us; + graph.remove(nh); + + auto delay_nh = graph.create(); + auto provider = std::make_shared(utils::createRandom({1}, CV_8U)); + graph.meta(delay_nh).set(GOperation{DummyCall{{provider}, time_in_us}}); + + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], delay_nh)).set(InputIdx{i}); + } + graph.meta(graph.link(delay_nh, out_nhs.front())).set(OutputIdx{0u}); +} + +void OpBuilder::build(NodeHandle nh, const Infer& infer) { + const auto& params = params_map.at(infer.tag); + auto [in_layers, out_layers] = LayersReader::readLayers(params); + InferDesc desc{infer.tag, std::move(in_layers), std::move(out_layers)}; + + auto out_nhs = nh->dstNodes(); + ASSERT(out_nhs.size() == 1); + + auto [providers, in_meta, out_meta, disable_copy] = strategy->build(desc); + ASSERT(providers.size() == desc.input_layers.size()); + ASSERT(in_meta.size() == desc.input_layers.size()); + ASSERT(out_meta.size() == desc.output_layers.size()); + + // NB: Check if some of the Delay's was fused to this Infer + uint64_t delay_in_us = 0u; + if (graph.meta(nh).has()) { + delay_in_us = graph.meta(nh).get().time_in_us; + } + + auto dummy_nh = graph.create(); + DummyCall dummy_call{providers, delay_in_us, disable_copy}; + graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)}); + auto in_nhs = nh->srcNodes(); + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i}); + } + + graph.remove(nh); + + auto infer_nh = graph.create(); + for (uint32_t layer_idx = 0; layer_idx < desc.input_layers.size(); ++layer_idx) { + // NB: Create dummy out node and link with dummy. + auto dummy_out_nh = graph.create(); + graph.meta(dummy_out_nh) += std::move(in_meta[layer_idx]); + graph.meta(graph.link(dummy_nh, dummy_out_nh)).set(OutputIdx{layer_idx}); + graph.meta(dummy_out_nh).set(GData{}); + // NB: Finally link dummy out with infer + graph.meta(graph.link(dummy_out_nh, infer_nh)).set(InputIdx{layer_idx}); + } + + auto out_nh = out_nhs.front(); + graph.meta(graph.link(infer_nh, out_nh)).set(OutputIdx{0u}); + graph.meta(out_nh) += out_meta.front(); + for (uint32_t layer_idx = 1; layer_idx < desc.output_layers.size(); ++layer_idx) { + auto infer_out_nh = graph.create(); + graph.meta(infer_out_nh) = std::move(out_meta[layer_idx]); + graph.meta(infer_out_nh).set(GData{}); + graph.meta(graph.link(infer_nh, infer_out_nh)).set(OutputIdx{layer_idx}); + } + + InferCall infer_call{desc.tag, extractLayerNames(desc.input_layers), extractLayerNames(desc.output_layers)}; + graph.meta(infer_nh).set(GOperation{std::move(infer_call)}); +}; + +static bool fuseDelay(Graph& graph, NodeHandle nh, const Delay& delay) { + // NB: Current fusing is trivial and applied only for the following case: + // 1) Delay has only single Infer reader + // 2) Infer doesn't have any other writers except Delay + // e.g: [Delay] -> (out) -> [Infer] + + // NB: Access readers of delay output data node. + auto delay_out_nh = nh->dstNodes().front(); + auto out_edges = delay_out_nh->dstEdges(); + // NB: Don't fuse Delay either if it has multiple readers + // or doesn't have readers at all (1) + if (out_edges.size() != 1u) { + return false; + } + + auto out_edge = out_edges.front(); + auto op_nh = out_edge->dstNode(); + auto op = graph.meta(op_nh).get().kind; + // NB: Don't fuse Delay if reader either not an Infer (1) + // or it has other writers except Delay (2). + if (!std::holds_alternative(op) || op_nh->srcEdges().size() != 1u) { + // TODO: Can be also fused to another "delay". + return false; + } + + // NB: Fuse the Delay into Infer: + // 1) Assign Delay meta directly to Infer + // 2) Remove Delay node + // 3) Redirect Delay writers to Infer + graph.meta(op_nh).set(delay); + for (auto in_nh : nh->srcNodes()) { + graph.link(in_nh, op_nh); + } + graph.remove(nh); + graph.remove(delay_out_nh); + + return true; +} + +struct Protocol { + cv::GProtoArgs graph_inputs; + cv::GProtoArgs graph_outputs; +}; + +enum class NodeState { EXPLORING, VISITED }; + +static void visit(NodeHandle nh, std::unordered_map& state) { + auto curr_node_it = state.emplace(nh, NodeState::EXPLORING).first; + for (const auto& dst_nh : nh->dstNodes()) { + const auto dst_it = state.find(dst_nh); + if (dst_it == state.end()) { + visit(dst_nh, state); + } else if (dst_it->second == NodeState::EXPLORING) { + THROW_ERROR("Scenario graph has a cycle!"); + } + } + curr_node_it->second = NodeState::VISITED; +}; + +namespace passes { + +// NB: Throw an exception if there is a cycle in graph +void throwIfCycle(Graph& graph) { + std::unordered_map state; + for (const auto& nh : graph.nodes()) { + if (state.find(nh) == state.end()) { + visit(nh, state); + } + } +} + +// NB: Determines what would be the computation graph +// inputs and outputs and marks intermediate data nodes +void init(Graph& graph) { + ASSERT(!graph.nodes().empty()); + uint32_t num_sources = 0; + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + ++num_sources; + graph.meta(nh).set(GraphInput{}); + } else { + // NB: Check that graph is connected + ASSERT(!nh->srcNodes().empty()); + } + if (nh->dstNodes().empty()) { + ASSERT(graph.meta(nh).has()); + graph.meta(nh).set(GraphOutput{}); + } + if (!graph.meta(nh).has()) { + ASSERT(graph.meta(nh).has()); + graph.meta(nh).set(GData{}); + } + } + ASSERT(num_sources != 0); +}; + +// NB: Fuses delay to the inference nodes as the delay can be performed +// as part of the model dummy preprocessing +void fuseDelays(Graph& graph) { + // NB: Iterate over graph nodes until all delays are fused. + while (true) { + bool is_fused = false; + for (auto nh : graph.nodes()) { + if (!graph.meta(nh).has()) { + continue; + } + auto op = graph.meta(nh).get().kind; + if (std::holds_alternative(op)) { + auto delay = std::get(op); + if (fuseDelay(graph, nh, delay)) { + is_fused = true; + break; + } + } + } + // NB: If delay was fused, some of the nodes were removed + // Iterate one more time... + if (!is_fused) { + break; + } + } +}; + +// NB: Finds the maximum parallelism depth to tell concurrent executor +// how many threads should be used for execution +void findMaxParallelBranches(Graph& graph, uint32_t& max_parallel_branches) { + // NB: Basically the maximum parallelism in computational graph + // is the maximum width of its level in BFS traversal, taking into + // account that dependencies for the node are resolved + std::unordered_set curr_lvl; + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + for (auto op_nh : nh->dstNodes()) { + curr_lvl.emplace(op_nh); + } + } + } + + std::unordered_set visited; + + auto get_all_deps = [&](auto nh) { + std::unordered_set deps; + for (auto in_nhs : nh->srcNodes()) { + for (auto op_nhs : in_nhs->srcNodes()) { + deps.emplace(op_nhs); + } + } + return deps; + }; + + auto all_deps_resolved = [&](auto nh) { + auto deps = get_all_deps(nh); + return std::all_of(deps.begin(), deps.end(), [&](auto dep) { + return visited.find(dep) != visited.end(); + }); + }; + + max_parallel_branches = static_cast(curr_lvl.size()); + while (!curr_lvl.empty()) { + std::unordered_set next_lvl; + for (auto nh : curr_lvl) { + visited.emplace(nh); + ASSERT(nh->dstNodes().size() == 1u); + auto data_nh = nh->dstNodes().front(); + for (auto op_nh : data_nh->dstNodes()) { + if (all_deps_resolved(op_nh)) { + next_lvl.emplace(op_nh); + } + } + } + if (next_lvl.size() > max_parallel_branches) { + max_parallel_branches = static_cast(next_lvl.size()); + } + curr_lvl = std::move(next_lvl); + } +} + +// NB: Build "G" operations according to scenario graph nodes +void buildOperations(Graph& graph, IBuildStrategy::Ptr strategy, const InferenceParamsMap& params_map) { + OpBuilder builder{graph, strategy, params_map}; + for (auto nh : graph.nodes()) { + // NB: Skip data nodes + if (!graph.meta(nh).has()) { + continue; + } + std::visit( + [nh, &builder](const auto& op) { + builder.build(nh, op); + }, + graph.meta(nh).get().kind); + } + + for (auto nh : graph.nodes()) { + // NB: Make sure all data nodes that needs to be + // dumped or validated are graph outputs. + if (!graph.meta(nh).has() && (graph.meta(nh).has() || graph.meta(nh).has())) { + graph.meta(nh).set(GraphOutput{}); + } + } +}; + +void buildComputation(Graph& graph, Protocol& proto) { + cv::GProtoArgs graph_inputs; + cv::GProtoArgs graph_outputs; + + std::unordered_map all_data; + auto sorted = graph.sorted(); + + // NB: Initialize "G" inputs + for (auto nh : sorted) { + if (graph.meta(nh).has()) { + auto it = all_data.emplace(nh, cv::GProtoArg{cv::GMat()}).first; + graph_inputs.push_back(it->second); + } + } + // NB: Apply "G" operations in topological order + for (auto nh : sorted) { + if (graph.meta(nh).has()) { + const auto& operation = graph.meta(nh).get(); + // NB: Map input args to the correct input index. + std::unordered_map idx_to_arg; + auto in_ehs = nh->srcEdges(); + for (auto in_eh : in_ehs) { + ASSERT(graph.meta(in_eh).has()); + const uint32_t in_idx = graph.meta(in_eh).get().idx; + auto arg = all_data.at(in_eh->srcNode()); + idx_to_arg.emplace(in_idx, arg); + } + cv::GProtoArgs in_args; + for (uint32_t idx = 0; idx < idx_to_arg.size(); ++idx) { + in_args.push_back(idx_to_arg.at(idx)); + } + // NB: Link G-API operation with its io data. + auto out_args = operation.on(in_args); + // TODO: Validation in/out amount and types... + // NB: Map output args to the correct index. + auto out_ehs = nh->dstEdges(); + for (auto out_eh : out_ehs) { + ASSERT(graph.meta(out_eh).has()); + const uint32_t out_idx = graph.meta(out_eh).get().idx; + auto out_nh = out_eh->dstNode(); + all_data.emplace(out_nh, out_args[out_idx]); + } + } + } + + // NB: Collect "G" outputs + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + graph_outputs.push_back(all_data.at(nh)); + } + } + + ASSERT(!graph_inputs.empty()) + ASSERT(!graph_outputs.empty()) + // NB: Finally save computation i/o to build GComputation later on + proto = Protocol{std::move(graph_inputs), std::move(graph_outputs)}; +} + +static void collectOutputMeta(Graph& graph, std::vector& out_meta) { + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + out_meta.push_back(graph.meta(nh)); + } + } +} + +} // namespace passes + +ComputationBuilder::ComputationBuilder(IBuildStrategy::Ptr strategy): m_strategy(strategy) { +} + +Computation ComputationBuilder::build(ScenarioGraph& graph, const InferenceParamsMap& infer_params, + const ComputationBuilder::Options& opts) { + uint32_t max_parallel_branches = 1u; + auto compile_args = cv::compile_args(cv::gapi::kernels()); + std::vector outputs_meta; + Protocol proto; + + using namespace std::placeholders; + graph.pass(passes::throwIfCycle); + graph.pass(passes::init); + graph.pass(passes::fuseDelays); + graph.pass(std::bind(passes::findMaxParallelBranches, _1, std::ref(max_parallel_branches))); + graph.pass(std::bind(passes::buildOperations, _1, m_strategy, std::cref(infer_params))); + graph.pass(std::bind(passes::buildComputation, _1, std::ref(proto))); + graph.pass(std::bind(passes::collectOutputMeta, _1, std::ref(outputs_meta))); + + if (opts.add_perf_meta) { + // FIXME: Must work with any G-Type! + ASSERT(cv::util::holds_alternative(proto.graph_outputs.front())); + cv::GMat g = cv::util::get(proto.graph_outputs.front()); + proto.graph_outputs.emplace_back(cv::gapi::streaming::timestamp(g).strip()); + proto.graph_outputs.emplace_back(cv::gapi::streaming::seq_id(g).strip()); + } + + cv::GComputation comp(cv::GProtoInputArgs{std::move(proto.graph_inputs)}, + cv::GProtoOutputArgs{std::move(proto.graph_outputs)}); + + return Computation{std::move(comp), std::move(compile_args), std::move(outputs_meta), {max_parallel_branches}}; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp new file mode 100644 index 00000000000000..6a51b068065284 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp @@ -0,0 +1,74 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "result.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/computation.hpp" +#include "utils/data_providers.hpp" + +#include +#include +#include + +struct InputIdx { + uint32_t idx; +}; + +struct OutputIdx { + uint32_t idx; +}; + +struct GraphInput {}; +struct GraphOutput {}; +struct GData {}; +struct GOperation { + using F = std::function; + F on; +}; + +struct Dump { + std::filesystem::path path; +}; + +struct Validate { + using F = std::function; + F validator; + std::vector reference; +}; + +struct InferDesc { + std::string tag; + LayersInfo input_layers; + LayersInfo output_layers; +}; + +struct IBuildStrategy { + using Ptr = std::shared_ptr; + struct InferBuildInfo { + std::vector providers; + std::vector inputs_meta; + std::vector outputs_meta; + const bool disable_copy; + }; + // NB: Extend for any further node types needed + virtual InferBuildInfo build(const InferDesc& infer) = 0; +}; + +class ComputationBuilder { +public: + explicit ComputationBuilder(IBuildStrategy::Ptr strategy); + + struct Options { + bool add_perf_meta; + }; + + Computation build(ScenarioGraph& graph, const InferenceParamsMap& infer_params, const Options& opts); + +private: + IBuildStrategy::Ptr m_strategy; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp new file mode 100644 index 00000000000000..3b10767b34135f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp @@ -0,0 +1,89 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "dummy_source.hpp" + +#include + +#include "utils/utils.hpp" + +DummySource::DummySource(const uint64_t frames_interval_in_us, const bool drop_frames, + const bool disable_high_resolution_timer) + // NB: 0 is special value means no limit fps for source. + : m_latency_in_us(frames_interval_in_us), + m_drop_frames(drop_frames), + m_timer(SleepTimer::create(disable_high_resolution_timer)), + // NB: Used for simulation, just return 1 byte. + m_mat(utils::createRandom({1}, CV_8U)) { +} + +bool DummySource::pull(cv::gapi::wip::Data& data) { + using namespace std::chrono; + using namespace cv::gapi::streaming; + using ts_t = microseconds; + + // NB: Wait m_latency_in_us before return the first frame. + if (m_next_tick_ts == -1) { + m_next_tick_ts = utils::timestamp() + m_latency_in_us; + } + + int64_t curr_ts = utils::timestamp(); + if (curr_ts < m_next_tick_ts) { + /* + * curr_ts + * | + * ------|----*-----|-------> + * ^ + * m_next_tick_ts + * + * + * NB: New frame will be produced at the m_next_tick_ts point. + */ + m_timer->wait(ts_t{m_next_tick_ts - curr_ts}); + } else if (m_latency_in_us != 0) { + /* + * curr_ts + * +1 +2 | + * |----------|----------|----------|----*-----|-------> + * ^ ^ + * m_next_tick_ts -------------> + * + */ + + // NB: Count how many frames have been produced since last pull (m_next_tick_ts). + int64_t num_frames = static_cast((curr_ts - m_next_tick_ts) / m_latency_in_us); + // NB: Shift m_next_tick_ts to the nearest tick before curr_ts. + m_next_tick_ts += num_frames * m_latency_in_us; + // NB: if drop_frames is enabled, update current seq_id and wait for the next tick, otherwise + // return last written frame (+2 at the picture above) immediately. + if (m_drop_frames) { + // NB: Shift tick to the next frame. + m_next_tick_ts += m_latency_in_us; + // NB: Wait for the next frame. + m_timer->wait(ts_t{m_next_tick_ts - curr_ts}); + // NB: Drop already produced frames + update seq_id for the current. + m_curr_seq_id += num_frames + 1; + } + } + // NB: Just increase reference counter not to release mat memory + // after assigning it to the data. + cv::Mat mat = m_mat; + + data.meta[meta_tag::timestamp] = utils::timestamp(); + data.meta[meta_tag::seq_id] = m_curr_seq_id++; + data = mat; + m_next_tick_ts += m_latency_in_us; + + return true; +} + +cv::GMetaArg DummySource::descr_of() const { + return cv::GMetaArg{cv::descr_of(m_mat)}; +} + +void DummySource::reset() { + m_next_tick_ts = -1; + m_curr_seq_id = 0; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp new file mode 100644 index 00000000000000..304e4e7ef2f512 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp @@ -0,0 +1,37 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include // cv::gapi::wip::IStreamSource + +#include "utils/timer.hpp" +#include "utils/utils.hpp" + +class DummySource final : public cv::gapi::wip::IStreamSource { +public: + using Ptr = std::shared_ptr; + + explicit DummySource(const uint64_t frames_interval_in_us, const bool drop_frames, + const bool disable_high_resolution_timer); + + bool pull(cv::gapi::wip::Data& data) override; + cv::GMetaArg descr_of() const override; + void reset(); + +private: + uint64_t m_latency_in_us; + bool m_drop_frames; + IWaitable::Ptr m_timer; + + cv::Mat m_mat; + int64_t m_next_tick_ts = -1; + int64_t m_curr_seq_id = 0; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp new file mode 100644 index 00000000000000..4a0fa451dace91 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp @@ -0,0 +1,66 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "executor.hpp" +#include "utils/error.hpp" + +#include + +PipelinedExecutor::PipelinedExecutor(cv::GStreamingCompiled&& compiled): m_compiled(std::move(compiled)) { +} + +PipelinedExecutor::Output PipelinedExecutor::runLoop(cv::GRunArgs&& inputs, Callback callback, + ITermCriterion::Ptr criterion) { + if (!criterion) { + THROW_ERROR("Termination criterion hasn't been specified!"); + } + + using namespace std::chrono; + using clock_t = high_resolution_clock; + + m_compiled.setSource(std::move(inputs)); + criterion->init(); + + const auto start_tick = clock_t::now(); + m_compiled.start(); + while (criterion->check()) { + if (!callback(m_compiled)) { + break; + } + criterion->update(); + } + const auto end_tick = clock_t::now(); + // NB: Some frames might be in queue just wait until they processed. + // They shouldn't be taken into account since execution is over. + m_compiled.stop(); + return Output{static_cast(duration_cast(end_tick - start_tick).count())}; +} + +SyncExecutor::SyncExecutor(cv::GCompiled&& compiled): m_compiled(std::move(compiled)) { +} + +SyncExecutor::Output SyncExecutor::runLoop(Callback callback, ITermCriterion::Ptr criterion) { + if (!criterion) { + THROW_ERROR("Termination criterion hasn't been specified!"); + } + + using namespace std::chrono; + using clock_t = high_resolution_clock; + + const auto start_tick = clock_t::now(); + criterion->init(); + while (criterion->check()) { + if (!callback(m_compiled)) { + break; + } + criterion->update(); + } + const auto end_tick = clock_t::now(); + return Output{static_cast(duration_cast(end_tick - start_tick).count())}; +} + +void SyncExecutor::reset() { + m_compiled.prepareForNewStream(); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp new file mode 100644 index 00000000000000..17d32937b8ba54 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp @@ -0,0 +1,42 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include // cv::GCompiled +#include // cv::GStreamingCompiled + +#include "scenario/criterion.hpp" + +class PipelinedExecutor { +public: + explicit PipelinedExecutor(cv::GStreamingCompiled&& compiled); + + struct Output { + uint64_t elapsed_us; + }; + using Callback = std::function; + + Output runLoop(cv::GRunArgs&& inputs, Callback callback, ITermCriterion::Ptr criterion); + +private: + cv::GStreamingCompiled m_compiled; +}; + +class SyncExecutor { +public: + explicit SyncExecutor(cv::GCompiled&& compiled); + + struct Output { + uint64_t elapsed_us; + }; + using Callback = std::function; + + Output runLoop(Callback callback, ITermCriterion::Ptr criterion); + void reset(); + +private: + cv::GCompiled m_compiled; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp new file mode 100644 index 00000000000000..f3b621c68e8f99 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp @@ -0,0 +1,155 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layers_data.hpp" + +#include +#include + +#include "utils/error.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +std::string normalizeLayerName(const std::string& layer_name) { + std::string normalized = layer_name; + std::unordered_set prohibited = {'\\', '/', ':', '*', '?', '"', '<', '>'}; + std::replace_if( + normalized.begin(), normalized.end(), + [&prohibited](char ch) { + return prohibited.find(ch) != prohibited.end(); + }, + '_'); + return normalized; +}; + +std::vector uploadLayerData(const std::filesystem::path& path, const std::string& tag, + const LayerInfo& layer) { + if (!std::filesystem::exists(path) || !std::filesystem::is_directory(path)) { + THROW_ERROR("Failed to find data folder: " << path << " for model: " << tag << ", layer: " << layer.name); + } + std::string iter_file_pattern = "iter_(\\d+)\\.bin"; + std::regex regex(iter_file_pattern); + std::unordered_map iter_files_map; + for (const auto& entry : std::filesystem::directory_iterator{path}) { + std::smatch match; + const auto& filename = entry.path().filename().string(); + if (std::regex_match(filename, match, regex)) { + const auto iter_idx = std::stoi(match[1].str()); + iter_files_map.emplace(iter_idx, entry); + } + } + std::vector out_mats; + for (int i = 0; i < iter_files_map.size(); ++i) { + if (auto it = iter_files_map.find(i); it != iter_files_map.end()) { + cv::Mat mat; + utils::createNDMat(mat, layer.dims, layer.prec); + utils::readFromBinFile(it->second.string(), mat); + out_mats.push_back(std::move(mat)); + } else { + THROW_ERROR("Failed to find data for iteration: " << i << ", model: " << tag << ", layer: " << layer.name); + } + } + return out_mats; +} + +using LayersDataMap = std::unordered_map>; +LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers) { + LayersDataMap layers_data; + for (const auto& layer : layers) { + auto normalized = normalizeLayerName(layer.name); + auto data = uploadLayerData(path / normalized, tag, layer); + if (data.empty()) { + THROW_ERROR("No iterations data found for model: " << tag << ", layer: " << layer.name); + } + LOG_INFO() << " - Found " << data.size() << " iteration(s) for layer: " << layer.name << std::endl; + layers_data.emplace(layer.name, std::move(data)); + } + return layers_data; +} + +LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers, + LayersType type) { + ASSERT(!layers.empty()); + const std::string kLayersTypeStr = type == LayersType::INPUT ? "input" : "output"; + if (!std::filesystem::exists(path)) { + THROW_ERROR("" << path << " must exist to upload layers data!") + } + LayersDataMap layers_data; + if (std::filesystem::is_directory(path)) { + layers_data = uploadFromDirectory(path, tag, layers); + } else { + if (layers.size() > 1u) { + THROW_ERROR("Model: " << tag << " must have exactly one " << kLayersTypeStr + << " layer in order to upload data from: " << path); + } + const auto& layer = layers.front(); + cv::Mat mat; + utils::createNDMat(mat, layer.dims, layer.prec); + utils::readFromBinFile(path.string(), mat); + LOG_INFO() << " - Found single iteration data for model: " << tag << ", layer: " << layer.name << std::endl; + layers_data = {{layer.name, std::vector{mat}}}; + } + // NB: layers_data can't be empty as long as layers vector is non-empty. + const auto kNumPerLayerIterations = layers_data.begin()->second.size(); + // NB: All i/o layers for model must have the equal amount of data. + for (const auto& [layer_name, data_vec] : layers_data) { + if (data_vec.size() != kNumPerLayerIterations) { + THROW_ERROR("Model: " << tag << " has different amount of data for " << kLayersTypeStr + << " layer: " << layer_name << "(" << data_vec.size() << ") and layer: " + << layers_data.begin()->first << "(" << kNumPerLayerIterations << ")"); + } + } + return layers_data; +} + +bool isDirectory(const std::filesystem::path& path) { + if (std::filesystem::exists(path)) { + return std::filesystem::is_directory(path); + } + return path.extension().empty(); +} + +std::vector createConstantProviders(LayersDataMap&& layers_data, + const std::vector& layer_names) { + std::vector providers; + for (const auto& layer_name : layer_names) { + auto layer_data = layers_data.at(layer_name); + providers.push_back(std::make_shared(std::move(layer_data))); + } + return providers; +} + +std::vector createRandomProviders(const LayersInfo& layers, + const std::map& generators) { + std::vector providers; + for (const auto& layer : layers) { + auto generator = generators.at(layer.name); + auto provider = std::make_shared(generator, layer.dims, layer.prec); + LOG_INFO() << " - Random generator: " << generator->str() << " will be used for layer: " << layer.name + << std::endl; + providers.push_back(std::move(provider)); + } + return providers; +} + +std::vector createDirectoryLayout(const std::filesystem::path& path, + const std::vector& layer_names) { + std::vector dirs_path; + std::filesystem::create_directories(path); + for (const auto& layer_name : layer_names) { + // NB: Use normalized layer name to create dir + // to store reference data for particular layer. + std::filesystem::path curr_dir = path / normalizeLayerName(layer_name); + dirs_path.push_back(curr_dir); + std::filesystem::create_directory(curr_dir); + { + // NB: Save the original layer name; + std::ofstream file{curr_dir / "layer_name.txt"}; + ASSERT(file.is_open()); + file << layer_name; + } + } + return dirs_path; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp new file mode 100644 index 00000000000000..6d2b9bc6716212 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "scenario/inference.hpp" +#include "utils/data_providers.hpp" + +std::string normalizeLayerName(const std::string& layer_name); +std::vector uploadLayerData(const std::filesystem::path& path, const std::string& tag, const LayerInfo& layer); + +enum class LayersType { INPUT = 0, OUTPUT }; +using LayersDataMap = std::unordered_map>; +LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers); + +LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers, + LayersType type); + +bool isDirectory(const std::filesystem::path& path); + +std::vector createConstantProviders(LayersDataMap&& layers_data, + const std::vector& layer_names); + +std::vector createRandomProviders(const LayersInfo& layers, + const std::map& generators); + +std::vector createDirectoryLayout(const std::filesystem::path& path, + const std::vector& layer_names); +template +std::map unpackWithDefault(const LayerVariantAttr& attr, const std::vector& layer_names, + const T& def_value) { + std::map result; + if (std::holds_alternative(attr)) { + for (const auto& layer_name : layer_names) { + result.emplace(layer_name, def_value); + } + } else if (std::holds_alternative(attr)) { + auto val = std::get(attr); + for (const auto& layer_name : layer_names) { + result.emplace(layer_name, val); + } + } else { + auto map = std::get>(attr); + for (const auto& layer_name : layer_names) { + if (auto it = map.find(layer_name); it != map.end()) { + result.emplace(layer_name, it->second); + } else { + result.emplace(layer_name, def_value); + } + } + } + return result; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp new file mode 100644 index 00000000000000..72c1e9539773e3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp @@ -0,0 +1,46 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/layers_reader.hpp" +#include "scenario/inference.hpp" +#include "utils/error.hpp" +#include "utils/logger.hpp" + +OpenVINOLayersReader& getOVReader() { + static OpenVINOLayersReader reader; + return reader; +} + +static std::string getModelFileName(const InferenceParams& params) { + if (std::holds_alternative(params)) { + const auto& ov_params = std::get(params); + if (std::holds_alternative(ov_params.path)) { + return std::get(ov_params.path).model; + } else { + ASSERT(std::holds_alternative(ov_params.path)); + return std::get(ov_params.path).blob; + } + } else if (std::holds_alternative(params)) { + return std::get(params).model_path; + } else { + THROW_ERROR("Unsupported model parameters type!"); + } + // NB: Unreachable + ASSERT(false); +} + +InOutLayers LayersReader::readLayers(const InferenceParams& params) { + LOG_INFO() << "Reading model " << getModelFileName(params) << std::endl; + if (std::holds_alternative(params)) { + const auto& ov = std::get(params); + return getOVReader().readLayers(ov); + } + ASSERT(std::holds_alternative(params)); + const auto& ort = std::get(params); + // NB: Using OpenVINO to read the i/o layers information for *.onnx model + OpenVINOParams ov; + ov.path = OpenVINOParams::ModelPath{ort.model_path, ""}; + return getOVReader().readLayers(ov, true /* use_results_names */); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp new file mode 100644 index 00000000000000..1d701272255fb0 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp @@ -0,0 +1,27 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/inference.hpp" + +#include + +struct InOutLayers { + LayersInfo in_layers; + LayersInfo out_layers; +}; + +class OpenVINOLayersReader { +public: + OpenVINOLayersReader(); + InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names = false); + +private: + class Impl; + std::shared_ptr m_impl; +}; + +namespace LayersReader { +InOutLayers readLayers(const InferenceParams& params); +} // namespace LayersReader diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp new file mode 100644 index 00000000000000..1b353dbf6e7288 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp @@ -0,0 +1,131 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/operations.hpp" +#include "utils/error.hpp" + +cv::GProtoArgs InferCall::operator()(const cv::GProtoArgs& inputs) { + cv::GInferInputs infer_inputs; + for (int i = 0; i < inputs.size(); ++i) { + auto gmat = cv::util::get(inputs[i]); + infer_inputs[input_names[i]] = gmat; + } + auto infer_outputs = cv::gapi::infer(tag, infer_inputs); + cv::GProtoArgs outputs; + for (int i = 0; i < output_names.size(); ++i) { + outputs.emplace_back(infer_outputs.at(output_names[i])); + } + return outputs; +} + +std::vector GDummyM::on(const std::vector& ins, const uint64_t delay_in_us, + const std::vector& providers, const bool disable_copy) { + std::vector shapes; + std::vector op_kinds; + std::vector host_ctors; + std::vector gargs; + std::vector out_kinds; + + gargs.emplace_back(providers); + gargs.emplace_back(delay_in_us); + gargs.emplace_back(disable_copy); + + for (int i = 0; i < ins.size(); ++i) { + auto shape = cv::detail::GTypeTraits::shape; + shapes.push_back(shape); + auto op_kind = cv::detail::GTypeTraits::op_kind; + op_kinds.push_back(op_kind); + host_ctors.push_back(cv::detail::GObtainCtor::get()); + gargs.emplace_back(ins[i]); + } + + const size_t num_outputs = providers.size(); + for (int i = 0; i < num_outputs; ++i) { + auto op_kind = cv::detail::GTypeTraits::op_kind; + out_kinds.push_back(op_kind); + } + + using namespace std::placeholders; + cv::GKernel k{GDummyM::id(), + "", + std::bind(&GDummyM::getOutMeta, _1, _2), + std::move(shapes), + std::move(op_kinds), + std::move(host_ctors), + std::move(out_kinds)}; + + cv::GCall call(std::move(k)); + call.setArgs(std::move(gargs)); + + std::vector outs; + outs.reserve(num_outputs); + for (int i = 0; i < num_outputs; ++i) { + outs.push_back(call.yield(i)); + } + + return outs; +} + +cv::GMetaArgs GDummyM::getOutMeta(const cv::GMetaArgs&, const cv::GArgs& args) { + const auto& providers = args.front().get>(); + cv::GMetaArgs out_metas; + out_metas.reserve(providers.size()); + for (auto provider : providers) { + out_metas.emplace_back(provider->desc()); + } + return out_metas; +} + +cv::gapi::GBackend GCPUDummyM::backend() { + return cv::gapi::cpu::backend(); +} + +cv::GCPUKernel GCPUDummyM::kernel() { + return cv::GCPUKernel(&GCPUDummyM::call, &GCPUDummyM::setup); +} + +void GCPUDummyM::setup(const cv::GMetaArgs& metas, cv::GArgs gargs, cv::GArg& state, const cv::GCompileArgs& args) { + state = cv::GArg(std::make_shared()); + auto providers = gargs.front().get>(); + for (auto& provider : providers) { + provider->reset(); + } +} + +void GCPUDummyM::call(cv::GCPUContext& ctx) { + using namespace std::chrono; + const bool disable_copy = ctx.inArg(2u); + uint64_t elapsed = disable_copy ? 0u : utils::measure([&]() { + auto& providers = ctx.inArg>(0u); + for (size_t i = 0; i < providers.size(); ++i) { + providers[i]->pull(ctx.outMatR(static_cast(i))); + } + }); + const auto delay_in_us = ctx.inArg(1u); + utils::busyWait(microseconds{std::max(delay_in_us - elapsed, uint64_t{0})}); +} + +cv::GProtoArgs DummyCall::operator()(const cv::GProtoArgs& inputs) { + std::vector gmats; + gmats.reserve(inputs.size()); + for (auto& in : inputs) { + gmats.emplace_back(cv::util::get(in)); + } + auto outputs = GDummyM::on(gmats, delay_in_us, providers, disable_copy); + cv::GProtoArgs proto_outputs; + for (auto& out : outputs) { + proto_outputs.emplace_back(cv::GProtoArg{out}); + } + return proto_outputs; +} + +cv::GProtoArgs CompoundCall::operator()(const cv::GProtoArgs& inputs) { + ASSERT(inputs.size() == 1) + cv::GMat in = cv::util::get(inputs[0]); + + cv::GProtoArgs proto_outputs; + proto_outputs.emplace_back(GCompound::on(in, function)); + return proto_outputs; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp new file mode 100644 index 00000000000000..cce38c9d83d07f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp @@ -0,0 +1,77 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include // GAPI_OCV_KERNEL +#include // G_API_OP +#include + +#include "utils/data_providers.hpp" +#include "utils/utils.hpp" + +// clang-format off +struct InferCall { + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); + + std::string tag; + std::vector input_names; + std::vector output_names; +}; + +struct DummyState { }; +struct GDummyM { + static const char *id() { return "custom.dummym"; } + static std::vector on(const std::vector &ins, + const uint64_t delay_in_us, + const std::vector &providers, + const bool disable_copy); + static cv::GMetaArgs getOutMeta(const cv::GMetaArgs&, const cv::GArgs &args); +}; + +struct GCPUDummyM: public cv::detail::KernelTag { + using API = GDummyM; + using State = DummyState; + + static cv::gapi::GBackend backend(); + static cv::GCPUKernel kernel(); + static void setup(const cv::GMetaArgs &metas, + cv::GArgs gargs, + cv::GArg &state, + const cv::GCompileArgs &args); + static void call(cv::GCPUContext &ctx); +}; + +struct DummyCall { + std::vector providers; + uint64_t delay_in_us; + // NB: Don't pull data from providers if enabled + bool disable_copy = false; + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); +}; + +using F = std::function; + +G_TYPED_KERNEL(GCompound, , "custom.compound") +{ + static cv::GMatDesc outMeta(cv::GMatDesc in, F){ + return in; + } +}; + +GAPI_OCV_KERNEL(GCPUCompound, GCompound) +{ + static void run(const cv::Mat& in, + F function, + cv::Mat& out) + { + function(); + } +}; + +struct CompoundCall { + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); + F function; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp new file mode 100644 index 00000000000000..57527cef0cc4aa --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp @@ -0,0 +1,215 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/layers_reader.hpp" + +#include // CV_* +#include + +#include "utils/error.hpp" + +#include + +class OpenVINOLayersReader::Impl { +public: + InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names); + +private: + InOutLayers readFromBlob(const std::string& blob, const std::string& device, + const std::map& config); + + InOutLayers readFromModel(const std::string& xml, const std::string& bin, const OpenVINOParams& params, + const bool use_results_names); + +private: + ov::Core m_core; +}; + +OpenVINOLayersReader::OpenVINOLayersReader(): m_impl(new OpenVINOLayersReader::Impl{}) { +} + +static ov::element::Type toElementType(int cvdepth) { + switch (cvdepth) { + case CV_8U: + return ov::element::u8; + case CV_32S: + return ov::element::i32; + case CV_32F: + return ov::element::f32; + case CV_16F: + return ov::element::f16; + } + throw std::logic_error("Failed to convert opencv depth to ov::element::Type"); +} + +static std::vector toDims(const std::vector& sz_vec) { + std::vector result; + result.reserve(sz_vec.size()); + for (auto sz : sz_vec) { + // FIXME: Probably requires some check... + result.push_back(static_cast(sz)); + } + return result; +} + +static int toPrecision(ov::element::Type prec) { + switch (prec) { + case ov::element::u8: + return CV_8U; + case ov::element::i32: + return CV_32S; + case ov::element::f32: + return CV_32F; + case ov::element::f16: + return CV_16F; + case ov::element::i64: + return CV_32S; + } + throw std::logic_error("Unsupported OV precision"); +} + +template +std::vector ovToLayersInfo(const InfoVec& vec) { + std::vector layers; + layers.reserve(vec.size()); + std::transform(vec.begin(), vec.end(), std::back_inserter(layers), [](const auto& node) { + return LayerInfo{node.get_any_name(), toDims(node.get_shape()), toPrecision(node.get_element_type())}; + }); + return layers; +}; + +static void cfgInputPreproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr& model, + const AttrMap& input_precision, const AttrMap& input_layout, + const AttrMap& input_model_layout) { + for (const auto& input : model->inputs()) { + const auto& name = input.get_any_name(); + auto& ii = ppp.input(name); + + const auto ip = lookUp(input_precision, name); + if (ip.has_value()) { + ii.tensor().set_element_type(toElementType(*ip)); + } + + const auto il = lookUp(input_layout, name); + if (il.has_value()) { + ii.tensor().set_layout(ov::Layout(*il)); + } + + const auto iml = lookUp(input_model_layout, name); + if (iml.has_value()) { + ii.model().set_layout(ov::Layout(*iml)); + } + } +} + +static void cfgOutputPostproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr& model, + const AttrMap& output_precision, const AttrMap& output_layout, + const AttrMap output_model_layout) { + for (const auto& output : model->outputs()) { + const auto& name = output.get_any_name(); + auto& oi = ppp.output(name); + + const auto op = lookUp(output_precision, name); + if (op.has_value()) { + oi.tensor().set_element_type(toElementType(*op)); + } + + const auto ol = lookUp(output_layout, name); + if (ol.has_value()) { + oi.tensor().set_layout(ov::Layout(*ol)); + } + + const auto oml = lookUp(output_model_layout, name); + if (oml.has_value()) { + oi.model().set_layout(ov::Layout(*oml)); + } + } +} + +static std::vector extractLayerNames(const std::vector>& nodes) { + std::vector names; + std::transform(nodes.begin(), nodes.end(), std::back_inserter(names), [](const auto& node) { + return node.get_any_name(); + }); + return names; +} + +InOutLayers OpenVINOLayersReader::Impl::readFromModel(const std::string& model_path, const std::string& bin_path, + const OpenVINOParams& params, const bool use_results_names) { + auto model = m_core.read_model(model_path, bin_path); + { + ov::preprocess::PrePostProcessor ppp(model); + + const auto& input_names = extractLayerNames(model->inputs()); + const auto ip_map = unpackLayerAttr(params.input_precision, input_names, "input precision"); + const auto il_map = unpackLayerAttr(params.input_layout, input_names, "input layout"); + const auto iml_map = unpackLayerAttr(params.input_model_layout, input_names, "input model layout"); + cfgInputPreproc(ppp, model, ip_map, il_map, iml_map); + + const auto& output_names = extractLayerNames(model->outputs()); + const auto op_map = unpackLayerAttr(params.output_precision, output_names, "output precision"); + const auto ol_map = unpackLayerAttr(params.output_layout, output_names, "output layout"); + const auto oml_map = unpackLayerAttr(params.output_model_layout, output_names, "output model layout"); + cfgOutputPostproc(ppp, model, op_map, ol_map, oml_map); + + model = ppp.build(); + } + + auto input_layers = ovToLayersInfo(model->inputs()); + auto output_layers = ovToLayersInfo(model->outputs()); + + // FIXME: UGLY WA in order to use layer names obtained by OV reader in ONNXRT. + // Ideally there should be corresponding ONNXRT reader instead!!! + // Result nodes friendly names preserve the names from original model, + // so the could be used in different framework (not only OpenVINO) + if (use_results_names) { + const auto& results = model->get_results(); + for (int i = 0; i < results.size(); ++i) { + auto result_name = results[i]->get_friendly_name(); + // This suffix is hardcoded at the OpenVINO side + const std::string suffix = "/sink_port_0"; + const auto kSuffixStartPos = result_name.length() - suffix.length(); + // Check that suffix is still presented at the OpenVINO side + ASSERT(result_name.substr(kSuffixStartPos) == suffix); + // Drop the suffix as it's not needed and update the name + result_name = result_name.substr(0, kSuffixStartPos); + output_layers[i].name = result_name; + } + } + + return {std::move(input_layers), std::move(output_layers)}; +} + +InOutLayers OpenVINOLayersReader::Impl::readFromBlob(const std::string& blob, const std::string& device, + const std::map& config) { + std::ifstream file(blob, std::ios_base::in | std::ios_base::binary); + if (!file.is_open()) { + THROW_ERROR("Failed to import model from: " << blob); + } + + auto compiled_model = m_core.import_model(file, device, {config.begin(), config.end()}); + + auto input_layers = ovToLayersInfo(compiled_model.inputs()); + auto output_layers = ovToLayersInfo(compiled_model.outputs()); + + return {std::move(input_layers), std::move(output_layers)}; +} + +InOutLayers OpenVINOLayersReader::Impl::readLayers(const OpenVINOParams& params, const bool use_results_names) { + if (std::holds_alternative(params.path)) { + const auto& path = std::get(params.path); + return readFromModel(path.model, path.bin, params, use_results_names); + } + ASSERT(std::holds_alternative(params.path)); + // NB: use_results_names is WA for reading layer names for the further usage in ONNXRT + // since ONNXRT is always ModelPath case (*.onnx format), no need to handle this for *.blob's + ASSERT(!use_results_names); + const auto& path = std::get(params.path); + return readFromBlob(path.blob, params.device, params.config); +} + +InOutLayers OpenVINOLayersReader::readLayers(const OpenVINOParams& params, const bool use_results_names) { + return m_impl->readLayers(params, use_results_names); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp new file mode 100644 index 00000000000000..4e47b34e3d2d35 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp @@ -0,0 +1,337 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "performance_mode.hpp" + +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs +#include // ov::benchmark_mode{} + +#include + +class PerformanceMetrics { +public: + PerformanceMetrics(const uint64_t elapsed, const std::vector latency, const std::vector seq_ids); + friend std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics); + +private: + // TODO: avg, min, max statistics can be encapsulated. + double avg_latency_ms; + double min_latency_ms; + double max_latency_ms; + int64_t total_frames; + double fps; + int64_t dropped; +}; + +PerformanceMetrics::PerformanceMetrics(const uint64_t elapsed_us, const std::vector latency_us, + const std::vector seq_ids) { + avg_latency_ms = utils::avg(latency_us) / 1000.0; + min_latency_ms = utils::min(latency_us) / 1000.0; + max_latency_ms = utils::max(latency_us) / 1000.0; + double elapsed_ms = static_cast(elapsed_us / 1000.0); + fps = latency_us.size() / elapsed_ms * 1000; + + dropped = 0; + int64_t prev_seq_id = seq_ids[0]; + for (size_t i = 1; i < seq_ids.size(); ++i) { + dropped += seq_ids[i] - prev_seq_id - 1; + prev_seq_id = seq_ids[i]; + } + total_frames = seq_ids.back() + 1; +} + +std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics) { + os << "throughput: " << metrics.fps << " FPS, latency: min: " << metrics.min_latency_ms + << " ms, avg: " << metrics.avg_latency_ms << " ms, max: " << metrics.max_latency_ms + << " ms, frames dropped: " << metrics.dropped << "/" << metrics.total_frames; + return os; +} + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const PerformanceSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + const InferDesc& infer; + const PerformanceSimulation::Options& opts; + std::vector providers; +}; + +void InputDataVisitor::operator()(std::monostate) { + LOG_INFO() << "Input data path for model: " << infer.tag << " hasn't been provided. Will be generated randomly" + << std::endl; + auto initializers = opts.initializers_map.at(infer.tag); + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto per_layer_initializers = + unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer); + providers = createRandomProviders(infer.input_layers, per_layer_initializers); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + const std::filesystem::path path{path_str}; + if (std::filesystem::exists(path)) { + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers)); + } else { + auto initializers = opts.initializers_map.at(infer.tag); + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto per_layer_initializers = + unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer); + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag + << " provided but doesn't exist - will be generated randomly" << std::endl; + providers = createRandomProviders(infer.input_layers, per_layer_initializers); + } +} + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Performance mode supports input data in form of either directory or single file!"); +}; + +} // anonymous namespace + +PerformanceStrategy::PerformanceStrategy(const PerformanceSimulation::Options& _opts): opts(_opts){}; + +IBuildStrategy::InferBuildInfo PerformanceStrategy::build(const InferDesc& infer) { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + // NB: No special I/O meta for this mode + std::vector inputs_meta(infer.input_layers.size(), Meta{}); + std::vector outputs_meta(infer.output_layers.size(), Meta{}); + return {std::move(in_data_visitor.providers), std::move(inputs_meta), std::move(outputs_meta), opts.inference_only}; +} + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + struct Options { + uint32_t after_iter_delay_in_us = 0u; + }; + + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, const size_t num_outputs, + const Options& options); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + void reset(); + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_mats; + int64_t m_ts, m_seq_id; + + std::vector m_per_iter_latency; + std::vector m_per_iter_seq_ids; + + Options m_opts; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + const size_t num_outputs); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + cv::optional m_ts, m_seq_id; + std::vector> m_opt_mats; + + std::vector m_per_iter_latency; + std::vector m_per_iter_seq_ids; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + const size_t num_outputs, const SyncSimulation::Options& options) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_mats(num_outputs), + m_ts(-1), + m_seq_id(-1), + m_opts(options) { + LOG_DEBUG() << "Run warm-up iteration" << std::endl; + this->run(std::make_shared(1u)); + LOG_DEBUG() << "Warm-up has finished successfully." << std::endl; +} + +void SyncSimulation::reset() { + for (auto src : m_sources) { + src->reset(); + } + m_exec.reset(); +}; + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + auto out = m_exec.runLoop(cb, criterion); + PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids); + m_per_iter_latency.clear(); + m_per_iter_seq_ids.clear(); + std::stringstream ss; + ss << metrics; + this->reset(); + return Success{ss.str()}; +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + using ts_t = std::chrono::microseconds; + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + pipeline_outputs += cv::gout(m_ts); + pipeline_outputs += cv::gout(m_seq_id); + + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + const auto curr_ts = utils::timestamp(); + m_per_iter_latency.push_back(curr_ts - m_ts); + m_per_iter_seq_ids.push_back(m_seq_id); + + // NB: Do extra busy wait to simulate the user's post processing after stream. + if (m_opts.after_iter_delay_in_us != 0) { + utils::busyWait(std::chrono::microseconds{m_opts.after_iter_delay_in_us}); + } + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + const size_t num_outputs) + : m_exec(std::move(compiled)), m_sources(std::move(sources)), m_opt_mats(num_outputs) { + LOG_DEBUG() << "Run warm-up iteration" << std::endl; + this->run(std::make_shared(1u)); + LOG_DEBUG() << "Warm-up has finished successfully." << std::endl; +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + auto out = m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids); + m_per_iter_latency.clear(); + m_per_iter_seq_ids.clear(); + + std::stringstream ss; + ss << metrics; + + // NB: Reset sources since they may have their state changed. + for (auto src : m_sources) { + src->reset(); + } + return Success{ss.str()}; +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + using ts_t = std::chrono::microseconds; + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + pipeline_outputs.emplace_back(cv::gout(m_ts)[0]); + pipeline_outputs.emplace_back(cv::gout(m_seq_id)[0]); + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + const auto curr_ts = utils::timestamp(); + ASSERT(m_ts.has_value()); + ASSERT(m_seq_id.has_value()); + m_per_iter_latency.push_back(curr_ts - *m_ts); + m_per_iter_seq_ids.push_back(*m_seq_id); + return has_data; +} + +} // anonymous namespace + +PerformanceSimulation::PerformanceSimulation(Simulation::Config&& cfg, PerformanceSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {true /* add performance meta */})) { +} + +std::shared_ptr PerformanceSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + if (m_opts.inference_only) { + // TODO: Extend also for ONNXRT backend + compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{}); + } + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + return std::make_shared(std::move(compiled), std::move(sources), m_comp.getOutMeta().size()); +} + +std::shared_ptr PerformanceSimulation::compileSync(const bool drop_frames) { + auto compile_args = cv::compile_args(getNetworksPackage()); + if (m_opts.inference_only) { + // TODO: Extend also for ONNXRT backend + compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{}); + } + + const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches(); + if (max_parallel_branches > 1u) { + LOG_INFO() << "Found at most " << max_parallel_branches + << " parallel branches in graph," + " so threaded executor will be used" + << std::endl; + ; + compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches}); + } + + auto sources = createSources(drop_frames); + SyncSimulation::Options options{0u}; + if (m_opts.target_latency.has_value()) { + if (!drop_frames) { + THROW_ERROR("Target latency for the stream is only supported when frames drop is enabled!"); + } + // NB: There is no way to specify more than one source currently so assert if it happened. + ASSERT(sources.size() == 1u); + const double target_latency_in_ms = m_opts.target_latency.value(); + const uint64_t source_latency_in_ms = m_cfg.frames_interval_in_us / 1000u; + if (target_latency_in_ms > source_latency_in_ms) { + THROW_ERROR("Target latency must be less or equal than source latency!"); + } + options.after_iter_delay_in_us = static_cast(source_latency_in_ms - target_latency_in_ms) * 1000u; + } + + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + return std::make_shared(std::move(compiled), std::move(sources), m_comp.getOutMeta().size(), + options); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp new file mode 100644 index 00000000000000..16eff684c4e2de --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp @@ -0,0 +1,41 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "simulation/computation.hpp" +#include "simulation/computation_builder.hpp" +#include "simulation/simulation.hpp" + +struct PerformanceStrategy; +class PerformanceSimulation : public Simulation { +public: + struct Options { + IRandomGenerator::Ptr global_initializer; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + const bool inference_only; + std::optional target_latency; + }; + explicit PerformanceSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compiler_args) override; + std::shared_ptr compileSync(const bool drop_frames) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; + +struct PerformanceStrategy : public IBuildStrategy { + explicit PerformanceStrategy(const PerformanceSimulation::Options& opts); + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override; + + const PerformanceSimulation::Options& opts; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp new file mode 100644 index 00000000000000..6eb55ee11fcc30 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp @@ -0,0 +1,361 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reference_mode.hpp" + +#include + +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const CalcRefSimulation::Options& opts; + // NB: Relevant when input reference data already exists and need to + // generate exactly the same amount of output data. + // Note that this value must be the same for all models within stream. + cv::util::optional model_required_iterations; + std::vector providers; + std::vector metas; +}; + +void InputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + // NB: Single path provided - either single file or directory. + const auto input_names = extractLayerNames(infer.input_layers); + const auto& initializers = opts.initializers_map.at(infer.tag); + + std::filesystem::path path{path_str}; + if (std::filesystem::exists(path)) { + // NB: Provided path exists - upload input data from there. + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + // NB: The Number of iterations for every layer is ALWAYS the same. + model_required_iterations = cv::util::make_optional(layers_data.begin()->second.size()); + providers = createConstantProviders(std::move(layers_data), input_names); + } else { + // NB: Provided path doesn't exist - generate data and dump. + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag + << " doesn't exist - input data will be generated and dumped" << std::endl; + std::vector dump_path_vec; + if (isDirectory(path)) { + // NB: When the directory is provided, the number of input iterations to be generated aren't + // bounded so the "random" providers will generate input data on every iteration that will + // be dumped on the disk afterwards. + dump_path_vec = createDirectoryLayout(path, input_names); + } else { + // NB: When the single file is provided, the execution must be limited to perform + // only 1 iteration. + model_required_iterations = cv::util::optional(1ul); + if (infer.input_layers.size() > 1) { + THROW_ERROR("Model: " << infer.tag + << " must have exactly one input layer in order to dump input data to file: " + << path); + } + // NB: In case directories in that path don't exist. + std::filesystem::create_directories(path.parent_path()); + dump_path_vec = {path}; + } + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto layer_initializers = unpackWithDefault(initializers, input_names, default_initialzer); + providers = createRandomProviders(infer.input_layers, std::move(layer_initializers)); + for (uint32_t i = 0; i < infer.input_layers.size(); ++i) { + metas[i].set(Dump{dump_path_vec[i]}); + } + } +} + +struct OutputDataVisitor { + OutputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts) + : infer(_infer), opts(_opts), metas(infer.output_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const CalcRefSimulation::Options& opts; + std::vector metas; +}; + +void OutputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const std::string& path_str) { + std::filesystem::path path{path_str}; + // NB: It doesn't matter if path exist or not - regenerate and dump outputs anyway. + std::vector dump_path_vec; + if (isDirectory(path)) { + dump_path_vec = createDirectoryLayout(path, extractLayerNames(infer.output_layers)); + } else { + if (infer.output_layers.size() > 1) { + THROW_ERROR("Model: " << infer.tag + << " must have exactly one output layer in order to dump output data to file: " + << path); + } + dump_path_vec = {path}; + } + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + const auto& layer = infer.output_layers[i]; + metas[i].set(Dump{dump_path_vec[i]}); + } +} + +} // anonymous namespace + +class ReferenceStrategy : public IBuildStrategy { +public: + explicit ReferenceStrategy(const CalcRefSimulation::Options& opts); + + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override; + + // NB: If specified will force execution to perform exactly require_num_iterations + // regardless what user specified. + // Use case is when N input iterations are provided, + // generate exactly the same amount of output iterations. + // Another use case is when there is only single file provided + // so only one input / output iteration must be generated. + cv::optional required_num_iterations; + const CalcRefSimulation::Options& opts; +}; + +ReferenceStrategy::ReferenceStrategy(const CalcRefSimulation::Options& _opts): opts(_opts) { +} + +IBuildStrategy::InferBuildInfo ReferenceStrategy::build(const InferDesc& infer) { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + // NB: Check if there is required number iterations for current model + // and fail if it's different comparing to other models in stream. + if (in_data_visitor.model_required_iterations) { + const uint64_t required_iters_value = in_data_visitor.model_required_iterations.value(); + LOG_INFO() << "Model: " << infer.tag << " will perform at most " << required_iters_value << " iteration(s)" + << std::endl; + if (!required_num_iterations) { + required_num_iterations = in_data_visitor.model_required_iterations; + } else { + if (required_iters_value != required_num_iterations.value()) { + THROW_ERROR("All models in stream are required to have the same number of iterations!"); + } + } + } + + const auto& output_data = opts.output_data_map.at(infer.tag); + OutputDataVisitor out_data_visitor{infer, opts}; + std::visit(out_data_visitor, output_data); + + return {std::move(in_data_visitor.providers), std::move(in_data_visitor.metas), std::move(out_data_visitor.metas)}; +} + +static void updateCriterion(ITermCriterion::Ptr* criterion, cv::util::optional required_num_iterations) { + if (required_num_iterations.has_value()) { + if (*criterion) { + // NB: Limit user's termination criterion to perfom at most m_required_num_iterations + *criterion = std::make_shared( + *criterion, std::make_shared(required_num_iterations.value())); + } else { + *criterion = std::make_shared(required_num_iterations.value()); + } + } +} + +static void dumpIterOutput(const cv::Mat& mat, const Dump& dump, const size_t iter) { + auto dump_path = dump.path; + if (isDirectory(dump.path)) { + std::stringstream ss; + ss << "iter_" << iter << ".bin"; + dump_path = dump_path / ss.str(); + } + utils::writeToBinFile(dump_path.string(), mat); +}; + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, std::vector&& out_meta, + cv::util::optional required_num_iterations); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector m_out_mats; + size_t m_iter_idx; + cv::optional m_required_num_iterations; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, cv::util::optional required_num_iterations); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector> m_opt_mats; + size_t m_iter_idx; + cv::optional m_required_num_iterations; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, cv::util::optional required_num_iterations) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_out_mats(m_out_meta.size()), + m_iter_idx(0u), + m_required_num_iterations(required_num_iterations) { +} + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + for (auto src : m_sources) { + src->reset(); + } + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + updateCriterion(&criterion, m_required_num_iterations); + m_exec.runLoop(cb, criterion); + std::stringstream ss; + ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)"; + return Success{ss.str()}; +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + for (size_t i = 0; i < m_out_mats.size(); ++i) { + if (m_out_meta[i].has()) { + const auto& dump = m_out_meta[i].get(); + dumpIterOutput(m_out_mats[i], dump, m_iter_idx); + } + } + ++m_iter_idx; + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, + cv::util::optional required_num_iterations) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_opt_mats(m_out_meta.size()), + m_iter_idx(0u), + m_required_num_iterations(required_num_iterations) { +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + updateCriterion(&criterion, m_required_num_iterations); + m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + std::stringstream ss; + ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)"; + return Success{ss.str()}; +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + for (size_t i = 0; i < m_out_meta.size(); ++i) { + if (m_out_meta[i].has()) { + const auto& dump = m_out_meta[i].get(); + ASSERT(m_opt_mats[i].has_value()); + dumpIterOutput(m_opt_mats[i].value(), dump, m_iter_idx); + } + } + ++m_iter_idx; + return has_data; +} + +} // anonymous namespace + +CalcRefSimulation::CalcRefSimulation(Simulation::Config&& cfg, CalcRefSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) { +} + +std::shared_ptr CalcRefSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta), + m_strategy->required_num_iterations); +} + +std::shared_ptr CalcRefSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta), + m_strategy->required_num_iterations); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp new file mode 100644 index 00000000000000..22d2fd92cce2c6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp @@ -0,0 +1,35 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "simulation/computation.hpp" +#include "simulation/simulation.hpp" + +class ReferenceStrategy; +class CalcRefSimulation : public Simulation { +public: + struct Options { + // FIXME: In fact, there should be only input data initializers + // and the path where to dump outputs + IRandomGenerator::Ptr global_initializer; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + }; + + explicit CalcRefSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) override; + std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp new file mode 100644 index 00000000000000..52f57c2881a3b6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp @@ -0,0 +1,131 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/simulation.hpp" + +#include "scenario/inference.hpp" +#include "utils/error.hpp" + +#include // onnx::Params +#include // ov::Params + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const OpenVINOParams& params) { + using P = cv::gapi::ov::Params; + std::unique_ptr

network; + if (std::holds_alternative(params.path)) { + const auto& model_path = std::get(params.path); + network = std::make_unique

(tag, model_path.model, model_path.bin, params.device); + } else { + GAPI_Assert(std::holds_alternative(params.path)); + const auto& blob_path = std::get(params.path); + network = std::make_unique

(tag, blob_path.blob, params.device); + } + + network->cfgPluginConfig(params.config); + network->cfgNumRequests(params.nireq); + + // NB: Pre/Post processing can be configured only for Model case. + if (std::holds_alternative(params.path)) { + if (std::holds_alternative(params.output_precision)) { + network->cfgOutputTensorPrecision(std::get(params.output_precision)); + } else if (std::holds_alternative>(params.output_precision)) { + network->cfgOutputTensorPrecision(std::get>(params.output_precision)); + } + + if (std::holds_alternative(params.input_layout)) { + network->cfgInputTensorLayout(std::get(params.input_layout)); + } else if (std::holds_alternative>(params.input_layout)) { + network->cfgInputTensorLayout(std::get>(params.input_layout)); + } + + if (std::holds_alternative(params.output_layout)) { + network->cfgOutputTensorLayout(std::get(params.output_layout)); + } else if (std::holds_alternative>(params.output_layout)) { + network->cfgOutputTensorLayout(std::get>(params.output_layout)); + } + + if (std::holds_alternative(params.input_model_layout)) { + network->cfgInputModelLayout(std::get(params.input_model_layout)); + } else if (std::holds_alternative>(params.input_model_layout)) { + network->cfgInputModelLayout(std::get>(params.input_model_layout)); + } + + if (std::holds_alternative(params.output_model_layout)) { + network->cfgOutputModelLayout(std::get(params.output_model_layout)); + } else if (std::holds_alternative>(params.output_model_layout)) { + network->cfgOutputModelLayout(std::get>(params.output_model_layout)); + } + } + return cv::gapi::networks(*network); +} + +static void cfgExecutionProvider(cv::gapi::onnx::Params& network, + const ONNXRTParams::OpenVINO& ovep) { + network.cfgAddExecutionProvider(cv::gapi::onnx::ep::OpenVINO{ovep.params_map}); +} + +static void cfgExecutionProvider(cv::gapi::onnx::Params& network, const ONNXRTParams::EP& ep) { + // NB: Nothing to configure for default MLAS EP + if (std::holds_alternative(ep)) { + return; + } + // TODO: Extend for any other available execution provider + ASSERT(std::holds_alternative(ep)); + cfgExecutionProvider(network, std::get(ep)); +} + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const ONNXRTParams& params) { + cv::gapi::onnx::Params network{tag, params.model_path}; + network.cfgSessionOptions(params.session_options); + cfgExecutionProvider(network, params.ep); + return cv::gapi::networks(network); +} + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const InferenceParams& params) { + if (std::holds_alternative(params)) { + return getNetPackage(tag, std::get(params)); + } + ASSERT(std::holds_alternative(params)); + return getNetPackage(tag, std::get(params)); +} + +cv::gapi::GNetPackage Simulation::getNetworksPackage() const { + cv::gapi::GNetPackage networks; + for (const auto& [tag, params] : m_cfg.params) { + networks += getNetPackage(tag, params); + } + return networks; +} + +Simulation::Simulation(Config&& cfg): m_cfg(std::move(cfg)){}; + +std::vector Simulation::createSources(const bool drop_frames) { + auto src = std::make_shared(m_cfg.frames_interval_in_us, drop_frames, + m_cfg.disable_high_resolution_timer); + return {src}; +}; + +std::shared_ptr Simulation::compilePipelined(const bool drop_frames) { + if (drop_frames) { + THROW_ERROR("Pipelined simulation doesn't support frames drop!"); + } + // NB: Hardcoded for pipelining mode as the best option + auto compile_args = cv::compile_args(getNetworksPackage()); + compile_args += cv::compile_args(cv::gapi::streaming::queue_capacity{1u}); + return compilePipelined(createSources(drop_frames), std::move(compile_args)); +} + +std::shared_ptr Simulation::compileSync(const bool drop_frames) { + auto compile_args = cv::compile_args(getNetworksPackage()); + return compileSync(createSources(drop_frames), std::move(compile_args)); +} + +std::shared_ptr Simulation::compilePipelined(DummySources&&, cv::GCompileArgs&&) { + THROW_ERROR("Not implemented!"); +}; + +std::shared_ptr Simulation::compileSync(DummySources&&, cv::GCompileArgs&&) { + THROW_ERROR("Not implemented!"); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp new file mode 100644 index 00000000000000..b60eaf6b5a3148 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "result.hpp" +#include "scenario/criterion.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/dummy_source.hpp" + +#include // cv::gapi::GNetPackage + +struct ICompiled { + using Ptr = std::shared_ptr; + virtual Result run(ITermCriterion::Ptr) = 0; +}; + +struct PipelinedCompiled : public ICompiled {}; +struct SyncCompiled : public ICompiled {}; + +using DummySources = std::vector; + +class Simulation { +public: + using Ptr = std::shared_ptr; + + struct Config { + std::string stream_name; + uint64_t frames_interval_in_us; + bool disable_high_resolution_timer; + ScenarioGraph graph; + InferenceParamsMap params; + }; + + explicit Simulation(Config&& cfg); + + virtual std::shared_ptr compilePipelined(const bool drop_frames); + virtual std::shared_ptr compileSync(const bool drop_frames); + + virtual ~Simulation() = default; + +protected: + virtual std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args); + virtual std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args); + + std::vector createSources(const bool drop_frames); + cv::gapi::GNetPackage getNetworksPackage() const; + +protected: + Config m_cfg; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp new file mode 100644 index 00000000000000..c6544522287048 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp @@ -0,0 +1,363 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/validation_mode.hpp" + +#include "scenario/accuracy_metrics.hpp" +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "simulation/validation_mode.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs + +class LayerValidator { +public: + LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric); + Result operator()(const cv::Mat& lhs, const cv::Mat& rhs); + +private: + std::string m_tag; + std::string m_layer_name; + IAccuracyMetric::Ptr m_metric; +}; + +LayerValidator::LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric) + : m_tag(tag), m_layer_name(layer_name), m_metric(metric) { +} + +Result LayerValidator::operator()(const cv::Mat& lhs, const cv::Mat& rhs) { + auto result = m_metric->compare(lhs, rhs); + if (!result) { + std::stringstream ss; + ss << "Model: " << m_tag << ", Layer: " << m_layer_name << ", Metric: " << m_metric->str() + << ", Reason: " << result.str() << ";"; + return Error{ss.str()}; + } + return Success{"Passed"}; +} + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const ValSimulation::Options& opts; + std::vector providers; + std::vector metas; +}; + +void InputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Validation mode requires input data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Validation mode requires input data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + std::filesystem::path path{path_str}; + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers)); +}; + +struct OutputDataVisitor { + OutputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts) + : infer(_infer), opts(_opts), metas(infer.output_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const ValSimulation::Options& opts; + std::vector metas; +}; + +void OutputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Validation mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Validation mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const std::string& path_str) { + auto default_metric = opts.global_metric ? opts.global_metric : std::make_shared(0.0); + auto per_layer_metrics = + unpackWithDefault(opts.metrics_map.at(infer.tag), extractLayerNames(infer.output_layers), default_metric); + std::filesystem::path path{path_str}; + LOG_INFO() << "Reference output data path: " << path << " for model: " << infer.tag + << " exists - data will be uploaded" << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.output_layers, LayersType::OUTPUT); + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + const auto& layer = infer.output_layers[i]; + LayerValidator validator{infer.tag, layer.name, per_layer_metrics.at(layer.name)}; + metas[i].set(Validate{std::move(validator), layers_data.at(layer.name)}); + } +} + +} // anonymous namespace + +class ValidationStrategy : public IBuildStrategy { +public: + explicit ValidationStrategy(const ValSimulation::Options& _opts): opts(_opts) { + } + + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + + const auto& output_data = opts.output_data_map.at(infer.tag); + OutputDataVisitor out_data_visitor{infer, opts}; + std::visit(out_data_visitor, output_data); + + if (opts.per_iter_outputs_path.has_value()) { + auto model_dir = opts.per_iter_outputs_path.value() / infer.tag; + // NB: Remove the data from the previous run if such exist + LOG_INFO() << "Actual output data for model: " << infer.tag + << " will be dumped and replaced at path: " << model_dir << std::endl; + std::filesystem::remove_all(model_dir); + auto dump_path_vec = createDirectoryLayout(model_dir, extractLayerNames(infer.output_layers)); + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + out_data_visitor.metas[i].set(Dump{dump_path_vec[i]}); + } + } + + // NB: No special input meta for this mode. + std::vector input_meta(infer.input_layers.size(), Meta{}); + return {std::move(in_data_visitor.providers), std::move(input_meta), std::move(out_data_visitor.metas)}; + } + + const ValSimulation::Options& opts; +}; + +struct FailedIter { + size_t iter_idx; + std::vector reasons; +}; + +static Result reportValidationResult(const std::vector& failed_iters, const size_t total_iters) { + std::stringstream ss; + if (!failed_iters.empty()) { + const auto kItersToShow = 10u; + const auto kLimit = failed_iters.size() < kItersToShow ? failed_iters.size() : kItersToShow; + ss << "Accuraccy check failed on " << failed_iters.size() << " iteration(s)" + << " (first " << kLimit << "):"; + ss << "\n"; + for (uint32_t i = 0; i < kLimit; ++i) { + ss << "Iteration " << failed_iters[i].iter_idx << ":\n"; + for (const auto& reason : failed_iters[i].reasons) { + ss << " " << reason << "\n"; + } + } + return Error{ss.str()}; + } + ss << "Validation has passed for " << total_iters << " iteration(s)"; + return Success{ss.str()}; +} + +static std::vector validateOutputs(const std::vector& out_mats, const std::vector& out_meta, + const size_t iter_idx) { + std::vector failed_list; + for (size_t i = 0; i < out_mats.size(); ++i) { + if (out_meta[i].has()) { + const auto& val = out_meta[i].get(); + const auto& refvec = val.reference; + ASSERT(!refvec.empty()); + const auto& refmat = refvec[iter_idx % refvec.size()]; + auto result = val.validator(refmat, out_mats[i]); + if (!result) { + failed_list.push_back(std::move(result.str())); + } + } + } + return failed_list; +} + +static void dumpOutputs(const std::vector& out_mats, const std::vector& out_meta, + const size_t iter_idx) { + for (size_t i = 0; i < out_mats.size(); ++i) { + if (out_meta[i].has()) { + std::stringstream ss; + ss << "iter_" << iter_idx << ".bin"; + auto dump_path = out_meta[i].get().path / ss.str(); + utils::writeToBinFile(dump_path.string(), out_mats[i]); + } + } +} + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, std::vector&& out_meta); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector m_out_mats; + size_t m_iter_idx; + std::vector m_failed_iters; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector> m_opt_mats; + size_t m_iter_idx; + std::vector m_failed_iters; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_out_mats(m_out_meta.size()), + m_iter_idx(0u) { +} + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + for (auto src : m_sources) { + src->reset(); + } + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + m_exec.runLoop(cb, criterion); + return reportValidationResult(m_failed_iters, m_iter_idx); +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + + dumpOutputs(m_out_mats, m_out_meta, m_iter_idx); + auto failed_list = validateOutputs(m_out_mats, m_out_meta, m_iter_idx); + if (!failed_list.empty()) { + m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)}); + } + ++m_iter_idx; + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_opt_mats(m_out_meta.size()), + m_iter_idx(0u) { +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + return reportValidationResult(m_failed_iters, m_iter_idx); +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + std::vector out_mats; + out_mats.reserve(m_opt_mats.size()); + for (auto opt_mat : m_opt_mats) { + ASSERT(opt_mat.has_value()); + out_mats.push_back(opt_mat.value()); + } + + dumpOutputs(out_mats, m_out_meta, m_iter_idx); + auto failed_list = validateOutputs(out_mats, m_out_meta, m_iter_idx); + if (!failed_list.empty()) { + m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)}); + } + ++m_iter_idx; + return has_data; +} + +} // anonymous namespace + +ValSimulation::ValSimulation(Simulation::Config&& cfg, ValSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) { +} + +std::shared_ptr ValSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta)); +} + +std::shared_ptr ValSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) { + const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches(); + if (max_parallel_branches > 1u) { + LOG_INFO() << "Found at most " << max_parallel_branches + << " parallel branches in graph," + " so threaded executor will be used" + << std::endl; + ; + compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches}); + } + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta)); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp new file mode 100644 index 00000000000000..180c802803a68c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "simulation/computation.hpp" +#include "simulation/simulation.hpp" + +class ValidationStrategy; +class ValSimulation : public Simulation { +public: + struct Options { + IAccuracyMetric::Ptr global_metric; + ModelsAttrMap metrics_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + std::optional per_iter_outputs_path; + }; + explicit ValSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) override; + std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp new file mode 100644 index 00000000000000..f3eaf7756e1793 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp @@ -0,0 +1,64 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "data_providers.hpp" + +#include + +#include "utils.hpp" +#include "utils/error.hpp" + +UniformGenerator::UniformGenerator(double low, double high): m_low(low), m_high(high) { + ASSERT(low <= high); +} + +void UniformGenerator::generate(cv::Mat& mat) { + cv::randu(mat, m_low, m_high); +} + +std::string UniformGenerator::str() const { + std::stringstream ss; + ss << "{dist: uniform, range: [" << m_low << ", " << m_high << "]}"; + return ss.str(); +} + +RandomProvider::RandomProvider(IRandomGenerator::Ptr impl, const std::vector& dims, const int depth) + : m_impl(impl), m_dims(dims), m_depth(depth) { +} + +void RandomProvider::pull(cv::Mat& mat) { + utils::createNDMat(mat, m_dims, m_depth); + m_impl->generate(mat); +} + +cv::GMatDesc RandomProvider::desc() { + if (m_dims.size() == 2u) { + return cv::GMatDesc{m_depth, 1, cv::Size(m_dims[1], m_dims[0])}; + } + return cv::GMatDesc{m_depth, m_dims}; +} + +CircleBuffer::CircleBuffer(const std::vector& buffer): m_buffer(buffer), m_pos(0u) { + ASSERT(!m_buffer.empty()); +} + +CircleBuffer::CircleBuffer(std::vector&& buffer): m_buffer(std::move(buffer)), m_pos(0u) { + ASSERT(!m_buffer.empty()); +} + +CircleBuffer::CircleBuffer(cv::Mat mat): CircleBuffer(std::vector{mat}) { +} + +void CircleBuffer::pull(cv::Mat& mat) { + m_buffer[m_pos++].copyTo(mat); + if (m_pos == m_buffer.size()) { + m_pos = 0; + } +} + +cv::GMatDesc CircleBuffer::desc() { + return cv::descr_of(m_buffer[0]); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp new file mode 100644 index 00000000000000..2bd45b7f19cc25 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp @@ -0,0 +1,70 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +struct IDataProvider { + using Ptr = std::shared_ptr; + virtual void pull(cv::Mat& mat) = 0; + virtual cv::GMatDesc desc() = 0; + virtual void reset() = 0; + virtual ~IDataProvider() = default; +}; + +class IRandomGenerator { +public: + using Ptr = std::shared_ptr; + virtual void generate(cv::Mat& mat) = 0; + virtual ~IRandomGenerator() = default; + virtual std::string str() const = 0; +}; + +class UniformGenerator : public IRandomGenerator { +public: + using Ptr = std::shared_ptr; + UniformGenerator(double low, double high); + void generate(cv::Mat& mat) override; + virtual std::string str() const override; + +private: + double m_low, m_high; +}; + +class RandomProvider : public IDataProvider { +public: + RandomProvider(IRandomGenerator::Ptr impl, const std::vector& dims, const int depth); + + void pull(cv::Mat& mat) override; + cv::GMatDesc desc() override; + void reset() override { /* do nothing */ + } + +private: + IRandomGenerator::Ptr m_impl; + std::vector m_dims; + int m_depth; +}; + +class CircleBuffer : public IDataProvider { +public: + CircleBuffer(const std::vector& buffer); + CircleBuffer(std::vector&& buffer); + CircleBuffer(cv::Mat mat); + + void pull(cv::Mat& mat) override; + cv::GMatDesc desc() override; + void reset() override { + m_pos = 0; + } + +private: + std::vector m_buffer; + uint64_t m_pos; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp new file mode 100644 index 00000000000000..23cb2a8f46436c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp @@ -0,0 +1,39 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace details { + +[[noreturn]] inline void assert_abort(const char* str, const int line, const char* file, const char* func) { + std::stringstream ss; + ss << file << ":" << line << ": Assertion " << str << " in function " << func << " failed\n"; + std::cerr << ss.str() << std::flush; + abort(); +} + +[[noreturn]] inline void throw_error(const char* str) { + std::stringstream ss; + ss << "An exception thrown! " << str << std::flush; + throw std::logic_error(ss.str()); +} + +} // namespace details + +#define ASSERT(expr) \ + { \ + if (!(expr)) \ + ::details::assert_abort(#expr, __LINE__, __FILE__, __func__); \ + } + +#define THROW_ERROR(msg) \ + { \ + std::ostringstream os; \ + os << msg; \ + ::details::throw_error(os.str().c_str()); \ + } diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp new file mode 100644 index 00000000000000..ccba64e701975c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp @@ -0,0 +1,32 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils/logger.hpp" + +#include + +LogLevel Logger::global_lvl = LogLevel::None; + +Logger::Logger(LogLevel lvl): m_lvl(lvl) { +} + +std::stringstream& Logger::stream() { + return m_ss; +} + +Logger::~Logger() { + if (m_lvl <= Logger::global_lvl) { + switch (m_lvl) { + case LogLevel::Info: + std::cout << "[ INFO ] " << m_ss.str(); + break; + case LogLevel::Debug: + std::cout << "[ DEBUG ] " << m_ss.str(); + break; + default: + /* do nothing */; + } + } +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp new file mode 100644 index 00000000000000..e8b1f5df7f8fa3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp @@ -0,0 +1,29 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +enum class LogLevel { + None = 0, + Info = 1, + Debug = 2, +}; + +class Logger { +public: + static LogLevel global_lvl; + explicit Logger(LogLevel lvl); + std::stringstream& stream(); + ~Logger(); + +private: + LogLevel m_lvl; + std::stringstream m_ss; +}; + +#define LOG_INFO() Logger{LogLevel::Info}.stream() +#define LOG_DEBUG() Logger{LogLevel::Debug}.stream() diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp new file mode 100644 index 00000000000000..a1fc0f4c2643c4 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp @@ -0,0 +1,73 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "timer.hpp" +#include "utils.hpp" + +#include + +#if defined(_WIN32) +#include + +class WinTimer : public SleepTimer { +public: + WinTimer(bool disable_high_resolution_timer); + void wait(std::chrono::microseconds time) override; + ~WinTimer(); + +private: + HANDLE m_handle = nullptr; +}; + +WinTimer::WinTimer(bool disable_high_resolution_timer) { + // FIXME: It should be called once. + timeBeginPeriod(1); + m_handle = CreateWaitableTimerEx( + NULL, NULL, disable_high_resolution_timer ? 0 : CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); +} + +void WinTimer::wait(std::chrono::microseconds time) { + LARGE_INTEGER li; + using ns_t = std::chrono::nanoseconds; + using ns_100_t = std::chrono::duration, ns_t::period>>; + + li.QuadPart = -std::chrono::duration_cast(time).count(); + if (!SetWaitableTimer(m_handle, &li, 0, NULL, NULL, false)) { + CloseHandle(m_handle); + throw std::logic_error("WinTimer failed to setup"); + } + + if (WaitForSingleObject(m_handle, INFINITE) != WAIT_OBJECT_0) { + CloseHandle(m_handle); + throw std::logic_error("WinTimer failed to sleep"); + } +} + +WinTimer::~WinTimer() { + CancelWaitableTimer(m_handle); + CloseHandle(m_handle); +} + +#endif // defined(_WIN32) + +class ChronoTimer : public SleepTimer { + void wait(std::chrono::microseconds time) override; +}; + +void ChronoTimer::wait(std::chrono::microseconds time) { + std::this_thread::sleep_for(time); +} + +SleepTimer::Ptr SleepTimer::create(bool disable_high_resolution_timer) { +#if defined(_WIN32) + return std::make_shared(disable_high_resolution_timer); +#else + return std::make_shared(); +#endif +} + +void BusyTimer::wait(std::chrono::microseconds time) { + utils::busyWait(time); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp new file mode 100644 index 00000000000000..423966ad2300a9 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp @@ -0,0 +1,25 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +struct IWaitable { + using Ptr = std::shared_ptr; + virtual void wait(std::chrono::microseconds time) = 0; + virtual ~IWaitable() = default; +}; + +struct SleepTimer : public IWaitable { + using Ptr = std::shared_ptr; + static Ptr create(bool disable_high_resolution_timer = false); +}; + +struct BusyTimer : public IWaitable { + void wait(std::chrono::microseconds time) override; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp new file mode 100644 index 00000000000000..94081dd295229e --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp @@ -0,0 +1,84 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils.hpp" + +#include + +#include + +namespace utils { + +void createNDMat(cv::Mat& mat, const std::vector& dims, int depth) { + GAPI_Assert(!dims.empty()); + mat.create(dims, depth); + if (dims.size() == 1) { + // FIXME: Well-known 1D mat WA + mat.dims = 1; + } +} + +void generateRandom(cv::Mat& out) { + switch (out.depth()) { + case CV_8U: + cv::randu(out, 0, 255); + break; + case CV_32S: + cv::randu(out, 0, 255); + break; + case CV_32F: + cv::randu(out, 0.f, 255.f); + break; + case CV_16F: { + std::vector dims; + for (int i = 0; i < out.size.dims(); ++i) { + dims.push_back(out.size[i]); + } + cv::Mat fp32_mat; + createNDMat(fp32_mat, dims, CV_32F); + cv::randu(fp32_mat, 0.f, 255.f); + fp32_mat.convertTo(out, out.type()); + break; + } + default: + throw std::logic_error("Unsupported preprocessing depth"); + } +} + +cv::Mat createRandom(const std::vector& dims, int depth) { + cv::Mat mat; + createNDMat(mat, dims, depth); + generateRandom(mat); + return mat; +} + +void readFromBinFile(const std::string& filepath, cv::Mat& mat) { + std::ifstream ifs(filepath, std::ios::binary | std::ios::ate); + + if (!ifs.is_open()) { + throw std::logic_error("Failed to open: " + filepath); + } + + const auto file_byte_size = ifs.tellg(); + ifs.seekg(0, std::ios::beg); + + const auto mat_byte_size = mat.total() * mat.elemSize(); + if (file_byte_size != mat_byte_size) { + throw std::logic_error("Failed to read cv::Mat from binary file: " + filepath + ". Mat size: " + + std::to_string(mat_byte_size) + ", File size: " + std::to_string(file_byte_size)); + } + + ifs.read(mat.ptr(), mat_byte_size); +} + +void writeToBinFile(const std::string& filepath, const cv::Mat& mat) { + std::ofstream fout(filepath, std::ios::out | std::ios::binary); + if (!fout.is_open()) { + throw std::logic_error("Failed to open/create: " + filepath); + } + fout.write(mat.ptr(), mat.total() * mat.elemSize()); +} + +} // namespace utils diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp new file mode 100644 index 00000000000000..a2ee4bdcf742d5 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace utils { + +void createNDMat(cv::Mat& mat, const std::vector& dims, int depth); +void generateRandom(cv::Mat& out); +cv::Mat createRandom(const std::vector& dims, int depth); + +template +typename duration_t::rep measure(std::function f) { + using namespace std::chrono; + auto start = high_resolution_clock::now(); + f(); + return duration_cast(high_resolution_clock::now() - start).count(); +} + +template +typename duration_t::rep timestamp() { + using namespace std::chrono; + auto now = high_resolution_clock::now(); + return duration_cast(now.time_since_epoch()).count(); +} + +inline void busyWait(std::chrono::microseconds delay) { + auto start_ts = timestamp(); + auto end_ts = start_ts; + auto time_to_wait = delay.count(); + + while (end_ts - start_ts < time_to_wait) { + end_ts = timestamp(); + } +} + +template +double avg(const std::vector& vec) { + return std::accumulate(vec.begin(), vec.end(), 0.0) / vec.size(); +} + +template +T max(const std::vector& vec) { + return *std::max_element(vec.begin(), vec.end()); +} + +template +T min(const std::vector& vec) { + return *std::min_element(vec.begin(), vec.end()); +} + +void readFromBinFile(const std::string& filepath, cv::Mat& mat); +void writeToBinFile(const std::string& filepath, const cv::Mat& mat); + +} // namespace utils diff --git a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt index 09ed0db315785c..e6c24566777d4b 100644 --- a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt +++ b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt @@ -26,7 +26,7 @@ foreach(LIB opencv_core opencv_imgproc opencv_imgcodecs) endforeach() if(NOT MISSING_DEPENDENCIES STREQUAL "") - message(WARNING "${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}") + message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}") return() endif() From 03c9ae38292a90ecb5cbfe2c8d5472eed0ec1aa9 Mon Sep 17 00:00:00 2001 From: Eddy Kim Date: Fri, 18 Oct 2024 22:35:26 +0900 Subject: [PATCH 067/112] [GPU] Removed redundant part for dump file name (#27123) ### Details: - Fixed dump file name to not have layer ID twice. --- src/plugins/intel_gpu/src/graph/debug_helper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp index 7f7071e704683e..c2c41fdfab2373 100644 --- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp @@ -295,7 +295,7 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst) debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) { std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":"; for (size_t i = 0; i < m_inst.dependencies().size(); i++) { - std::string name = get_file_prefix() + layer_name + "_src" + std::to_string(i); + std::string name = get_file_prefix() + "_src" + std::to_string(i); auto input_mem = m_inst.dep_memory_ptr(i); if (input_mem == nullptr) { GPU_DEBUG_COUT << " input_mem_" << i << " is nullptr. Nothing to dump." << std::endl; From 373cf9083fdba52f6bea94a5c89f279c8ce99f35 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Fri, 18 Oct 2024 15:49:37 +0200 Subject: [PATCH 068/112] [GHA] Enable thread sanitizer (#26634) ### Details: - Enable thread sanitizer - Fixed protobuf build ### Tickets: - *ticket-id* --- .github/workflows/linux_sanitizers.yml | 234 +++++++++--------- .../compile_flags/sanitizer.cmake | 10 +- .../frontend/tensorflow/node_context.hpp | 3 +- tests/{ => sanitizers}/asan/ignore.txt | 0 tests/{ => sanitizers}/asan/suppressions.supp | 0 tests/{ => sanitizers}/lsan/suppressions.txt | 0 tests/sanitizers/tsan/suppressions.txt | 15 ++ thirdparty/dependencies.cmake | 9 + 8 files changed, 155 insertions(+), 116 deletions(-) rename tests/{ => sanitizers}/asan/ignore.txt (100%) rename tests/{ => sanitizers}/asan/suppressions.supp (100%) rename tests/{ => sanitizers}/lsan/suppressions.txt (100%) create mode 100644 tests/sanitizers/tsan/suppressions.txt diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index e1a71fe92dc1a3..f13f3765d4f353 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -1,10 +1,9 @@ -name: Linux Sanitizers (Ubuntu 20.04, Python 3.11) +name: Linux Sanitizers (Ubuntu 20.04, Python 3.9) on: schedule: # run daily at 00:00 - cron: '0 0 * * *' workflow_dispatch: - # pull_request: concurrency: # github.ref is not unique in post-commit @@ -14,22 +13,69 @@ concurrency: permissions: read-all env: - PIP_CACHE_PATH: /mount/caches/pip/linux - PYTHON_VERSION: '3.11' TARGET_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }} jobs: + Smart_CI: + runs-on: ubuntu-latest + outputs: + affected_components: "${{ steps.smart_ci.outputs.affected_components }}" + changed_components: "${{ steps.smart_ci.outputs.changed_components }}" + skip_workflow: "${{ steps.smart_ci.outputs.skip_workflow }}" + steps: + - name: checkout action + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + sparse-checkout: .github/actions/smart-ci + + - name: Get affected components + id: smart_ci + uses: ./.github/actions/smart-ci + with: + repository: ${{ github.repository }} + pr: ${{ github.event.number }} + commit_sha: ${{ github.sha }} + ref_name: ${{ github.ref_name }} + component_pattern: "category: (.*)" + repo_token: ${{ secrets.GITHUB_TOKEN }} + skip_when_only_listed_labels_set: 'docs' + skip_when_only_listed_files_changed: '*.md,*.rst,*.png,*.jpg,*.svg,*/layer_tests_summary/*,*/conformance/*' + + Docker: + needs: Smart_CI + runs-on: aks-linux-4-cores-16gb-docker-build + container: + image: openvinogithubactions.azurecr.io/docker_build:0.2 + volumes: + - /mount:/mount + outputs: + images: "${{ steps.handle_docker.outputs.images }}" + steps: + - name: Checkout + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: ./.github/actions/handle_docker + id: handle_docker + with: + images: | + ov_build/ubuntu_22_04_x64 + registry: 'openvinogithubactions.azurecr.io' + dockerfiles_root_dir: '.github/dockerfiles' + changed_components: ${{ needs.smart_ci.outputs.changed_components }} + Build: + needs: [Smart_CI, Docker] timeout-minutes: 500 defaults: run: shell: bash - runs-on: aks-linux-16-cores-32gb + runs-on: aks-linux-16-cores-64gb if: ${{ github.repository_owner == 'openvinotoolkit' }} container: - image: openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04 + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }} volumes: - /mount:/mount + options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING strategy: max-parallel: 3 fail-fast: false @@ -39,10 +85,9 @@ jobs: SANITIZER_CMAKE_OPTION: '-DENABLE_SANITIZER=ON' - SANITIZER: 'UndefinedBehavior' SANITIZER_CMAKE_OPTION: '-DENABLE_UB_SANITIZER=ON' -# - SANITIZER: 'Thread' # Problems with protobuf -# SANITIZER_CMAKE_OPTION: '-DENABLE_THREAD_SANITIZER=ON' + - SANITIZER: 'Thread' + SANITIZER_CMAKE_OPTION: '-DENABLE_THREAD_SANITIZER=ON' env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input CMAKE_BUILD_TYPE: 'RelWithDebInfo' CMAKE_GENERATOR: 'Ninja' GITHUB_WORKSPACE: '/__w/openvino/openvino' @@ -51,20 +96,16 @@ jobs: INSTALL_DIR: /__w/openvino/openvino/openvino_install INSTALL_TEST_DIR: /__w/openvino/openvino/tests_install BUILD_DIR: /__w/openvino/openvino/openvino_build - LSAN_IGNORE: /__w/openvino/openvino/openvino/tests/lsan/suppressions.txt - ASAN_IGNORE: /__w/openvino/openvino/openvino/tests/asan/suppressions.supp - CXX: clang++ - CC: clang + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + SCCACHE_IGNORE_SERVER_IO_ERROR: 1 + SCCACHE_SERVER_PORT: 35555 + SCCACHE_ERROR_LOG: /__w/openvino/sccache_log.txt + SCCACHE_LOG: warn + SCCACHE_AZURE_KEY_PREFIX: sanitizers_lin_${{ matrix.SANITIZER }}_master + SCCACHE_CACHE_SIZE: 50G steps: - - name: Set apt retries - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - - name: Install git - run: | - apt-get update - apt-get install --assume-yes --no-install-recommends git ca-certificates - - name: Clone OpenVINO uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: @@ -79,31 +120,9 @@ jobs: submodules: 'true' ref: ${{ env.TARGET_BRANCH }} - # - # Print system info - # - - name: System info uses: ./openvino/.github/actions/system_info - - # - # Dependencies - # - - - name: Install build dependencies - run: | - bash ${OPENVINO_REPO}/install_build_dependencies.sh - apt --assume-yes install clang lld - - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - show-cache-info: 'true' - + - name: Install python dependencies run: | # For Python API: build and wheel packaging @@ -120,17 +139,15 @@ jobs: # For running Paddle frontend unit tests python3 -m pip install -r ${OPENVINO_REPO}/src/frontends/paddle/tests/requirements.txt - + # # Build # - + - name: Clean sccache stats + run: ${SCCACHE_PATH} --zero-stats + - name: CMake configure - OpenVINO run: | - export ASAN_OPTIONS=halt_on_error=0:suppressions=${ASAN_IGNORE} - export LSAN_OPTIONS=suppressions=${LSAN_IGNORE}:NEOReadDebugKeys=1:DisableDeepBind=1 - export CC=clang - export CXX=clang++ cmake \ -G "${{ env.CMAKE_GENERATOR }}" \ -DENABLE_CPPLINT=OFF \ @@ -147,24 +164,26 @@ jobs: -DENABLE_OV_PYTORCH_FRONTEND=ON \ -DENABLE_OV_JAX_FRONTEND=ON \ -DENABLE_OV_ONNX_FRONTEND=ON \ + -DENABLE_INTEL_NPU=OFF \ -DENABLE_ONEDNN_FOR_GPU=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ + -DENABLE_WHEEL=OFF \ ${{ matrix.SANITIZER_CMAKE_OPTION }} \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} - name: Cmake build - OpenVINO run: | - export ASAN_OPTIONS=halt_on_error=0:suppressions=${ASAN_IGNORE} - export LSAN_OPTIONS=suppressions=${LSAN_IGNORE}:NEOReadDebugKeys=1:DisableDeepBind=1 - cmake --build ${BUILD_DIR} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} - + cmake --build ${BUILD_DIR} --parallel $(nproc) --config ${{ env.CMAKE_BUILD_TYPE }} + + - name: Show sccache stats + run: ${SCCACHE_PATH} --show-stats + - name: Cmake install - OpenVINO run: | cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_TEST_DIR} -DCOMPONENT=tests -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} -DCOMPONENT=python_wheels -P ${BUILD_DIR}/cmake_install.cmake --config ${{ env.CMAKE_BUILD_TYPE }} - + - name: Remove unused files to free space run: rm -rf ${BUILD_DIR}/* @@ -185,6 +204,13 @@ jobs: # # Upload build artifacts # + - name: Upload sccache log + if: ${{ always() }} + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + name: sccache_log_${{ matrix.SANITIZER }} + path: ${{ env.SCCACHE_ERROR_LOG }} + if-no-files-found: 'error' - name: Upload openvino package if: ${{ always() }} @@ -205,11 +231,11 @@ jobs: CXX_Unit_Tests: name: C++ unit tests if: ${{ github.repository_owner == 'openvinotoolkit' }} - needs: Build - timeout-minutes: 100 - runs-on: 'aks-linux-16-cores-32gb' + needs: [Docker, Build] + timeout-minutes: 120 + runs-on: aks-linux-16-cores-32gb container: - image: 'openvinogithubactions.azurecr.io/dockerhub/ubuntu:20.04' + image: ${{ fromJSON(needs.docker.outputs.images).ov_build.ubuntu_22_04_x64 }} defaults: run: shell: bash @@ -220,18 +246,18 @@ jobs: include: - SANITIZER: 'AddressAndLeak' - SANITIZER: 'UndefinedBehavior' -# - SANITIZER: 'Thread' # Problems with protobuf at the Build stage + - SANITIZER: 'Thread' env: - DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input - INSTALL_DIR: ${{ github.workspace }}/install - INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + OPENVINO_REPO: /__w/openvino/openvino/openvino + INSTALL_DIR: /__w/openvino/openvino/install + INSTALL_TEST_DIR: /__w/openvino/openvino/install/tests + BUILD_DIR: /__w/openvino/openvino/openvino_build TBB_ENABLE_SANITIZERS: 1 - CC: clang - CXX: clang++ + ASAN_OPTIONS: halt_on_error=0:suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/asan/suppressions.supp + LSAN_OPTIONS: suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/lsan/suppressions.txt:NEOReadDebugKeys=1:DisableDeepBind=1 + TSAN_OPTIONS: suppressions=/__w/openvino/openvino/openvino/tests/sanitizers/tsan/suppressions.txt + steps: - - name: Set apt retries - run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - - name: Download OpenVINO package uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: @@ -244,16 +270,6 @@ jobs: name: ${{ format('openvino_tests_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_TEST_DIR }} - # Needed as ${{ github.workspace }} is not working correctly when using Docker - - name: Setup Variables - continue-on-error: true - run: | - echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" - echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" - - echo "ASAN_OPTIONS=halt_on_error=0:suppressions=$GITHUB_WORKSPACE/openvino/tests/asan/suppressions.supp" >> "$GITHUB_ENV" - echo "LSAN_OPTIONS=suppressions=$GITHUB_WORKSPACE/openvino/tests/lsan/suppressions.txt:NEOReadDebugKeys=1:DisableDeepBind=1" >> "$GITHUB_ENV" - - name: Extract OpenVINO packages run: | pushd $INSTALL_DIR @@ -263,77 +279,71 @@ jobs: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} popd - - name: Install dependencies (Linux) - run: | - $INSTALL_DIR/install_dependencies/install_openvino_dependencies.sh -c=core -c=dev -c=gpu -y - apt update && apt --assume-yes install clang lld - - name: Fetch Sanitizer Suppression Lists uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: sparse-checkout: | - tests/lsan/suppressions.txt - tests/asan/suppressions.supp + tests/sanitizers/lsan/suppressions.txt + tests/sanitizers/asan/suppressions.supp + tests/sanitizers/tsan/suppressions.txt sparse-checkout-cone-mode: false - path: 'openvino' + path: ${{ env.OPENVINO_REPO }} # # Tests # - name: OpenVINO Core Unit Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVCoreUT.xml - name: OpenVINO Inference Functional Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh - ${INSTALL_TEST_DIR}/ov_inference_functional_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceFunc.xml - name: OpenVINO Inference Unit Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_inference_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-InferenceUnit.xml - name: Low Precision Transformations Tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh - ${INSTALL_TEST_DIR}/ov_lp_transformations_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-LpTransformations.xml - name: OpenVINO Conditional compilation tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_conditional_compilation_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ConditionalCompilation.xml - name: IR frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_ir_frontend_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-IRFrontend.xml - name: PaddlePaddle frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/paddle_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-PaddleTests.xml - name: ONNX frontend tests - if: always() + if: ${{ !cancelled() && matrix.SANITIZER != 'Thread' }} # Ticket: 155291 run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_onnx_frontend_tests --gtest_print_time=1 \ @@ -341,14 +351,14 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ONNXFrontend.xml - name: TensorFlow Common frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_tensorflow_common_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TensorFlowCommonFrontend.xml - name: TensorFlow frontend tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh @@ -371,56 +381,56 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-Transformations.xml - name: Common test utils tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_util_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CommonUtilTests.xml - name: Snippets func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_snippets_func_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SnippetsFuncTests.xml - name: CPU plugin unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml - name: ov_subgraphs_dumper_tests tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_subgraphs_dumper_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_subgraphs_dumper_tests.xml - name: Template OpImpl tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_op_conformance_tests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml - name: AUTO unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_unit_tests.xml - name: AUTO func Tests - if: always() + if: ${{ 'false' }} # Issue 155210 run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_auto_func_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-ov_auto_func_tests.xml - name: Template plugin func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_template_func_tests --gtest_print_time=1 \ @@ -428,32 +438,32 @@ jobs: --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateFuncTests.xml - name: OpenVINO C API tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_capi_test --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpenVINOCAPITests.xml - name: AutoBatch unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_unit_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_unit_tests.xml - name: AutoBatch func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_auto_batch_func_tests --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-ov_auto_batch_func_tests.xml --gtest_filter="*smoke*" - name: Proxy Plugin func tests - if: always() + if: ${{ !cancelled() }} run: | source ${INSTALL_DIR}/setupvars.sh ${INSTALL_TEST_DIR}/ov_proxy_plugin_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVProxyTests.xml - name: Hetero unit tests - if: always() + if: ${{ !cancelled() }} run: | source ${{ env.INSTALL_DIR }}/setupvars.sh ${{ env.INSTALL_TEST_DIR }}/ov_hetero_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroUnitTests.xml diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake index 73f109d726c88b..5fc24c4f862239 100644 --- a/cmake/developer_package/compile_flags/sanitizer.cmake +++ b/cmake/developer_package/compile_flags/sanitizer.cmake @@ -17,7 +17,7 @@ if (ENABLE_SANITIZER) "https://github.com/openvinotoolkit/openvino/wiki/AddressSanitizer-and-LeakSanitizer") endif() elseif(OV_COMPILER_IS_CLANG) - set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/asan/ignore.txt") + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/sanitizers/asan/ignore.txt") if(BUILD_SHARED_LIBS) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -shared-libasan") endif() @@ -27,7 +27,7 @@ if (ENABLE_SANITIZER) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address") endif() - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/asan/ignore.txt") + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address -fsanitize-blacklist=${OpenVINO_SOURCE_DIR}/tests/sanitizers/asan/ignore.txt") if(BUILD_SHARED_LIBS) set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -shared-libasan") endif() @@ -89,7 +89,11 @@ if(ENABLE_THREAD_SANITIZER) message(FATAL_ERROR "Thread sanitizer is not supported in Windows with MSVC compiler. Please, use clang-cl or mingw") elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=thread") - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread") + if(OV_COMPILER_IS_CLANG) + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -ltsan") + else() + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread") + endif() else() message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") endif() diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp index f2dba04b49dca7..c23890b90dcab4 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp @@ -8,6 +8,7 @@ #include "exception.hpp" #include "openvino/core/any.hpp" #include "openvino/frontend/node_context.hpp" +#include "openvino/frontend/tensorflow/visibility.hpp" #include "variable.hpp" #include "variables_map.hpp" @@ -18,7 +19,7 @@ class TranslateSession; /// Keep necessary data for a single node in the original FW graph to facilitate /// conversion process in the rules code. -class NodeContext : public ov::frontend::NodeContext { +class TENSORFLOW_API NodeContext : public ov::frontend::NodeContext { public: using Ptr = std::shared_ptr; NodeContext(const std::shared_ptr& decoder, diff --git a/tests/asan/ignore.txt b/tests/sanitizers/asan/ignore.txt similarity index 100% rename from tests/asan/ignore.txt rename to tests/sanitizers/asan/ignore.txt diff --git a/tests/asan/suppressions.supp b/tests/sanitizers/asan/suppressions.supp similarity index 100% rename from tests/asan/suppressions.supp rename to tests/sanitizers/asan/suppressions.supp diff --git a/tests/lsan/suppressions.txt b/tests/sanitizers/lsan/suppressions.txt similarity index 100% rename from tests/lsan/suppressions.txt rename to tests/sanitizers/lsan/suppressions.txt diff --git a/tests/sanitizers/tsan/suppressions.txt b/tests/sanitizers/tsan/suppressions.txt new file mode 100644 index 00000000000000..0814ce119a0d1d --- /dev/null +++ b/tests/sanitizers/tsan/suppressions.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This is a ThreadSanitizer (TSan) suppression file which provides a default +# configuration for the builds with -DENABLE_SANITIZER=ON. +# More information about LSan suppressions on +# https://github.com/google/sanitizers/wiki/threadsanitizersuppressions + +# race conditions from thirdparty libs +race:libtbb +mutex:libtbb +race:libhwloc +# race conditions from std libs +race:libstdc++ +race:libc \ No newline at end of file diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index 8313ca73178283..0e8536a1714a35 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -357,6 +357,15 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND endif() else() add_subdirectory(thirdparty/protobuf EXCLUDE_FROM_ALL) + # protobuf fails to build with -fsanitize=thread by clang + if(ENABLE_THREAD_SANITIZER AND OV_COMPILER_IS_CLANG) + foreach(proto_target protoc libprotobuf libprotobuf-lite) + if(TARGET ${proto_target}) + target_compile_options(${proto_target} PUBLIC -fno-sanitize=thread) + target_link_options(${proto_target} PUBLIC -fno-sanitize=thread) + endif() + endforeach() + endif() endif() # forward additional variables used in the other places From 0648cd0c5f5ad3dd91560a731b365acfdfb3c676 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 18 Oct 2024 18:57:26 +0200 Subject: [PATCH 069/112] [PT FE] Fix sym GPTQ pattern to have consistent graph (#27037) ### Details: - *Fix sym GPTQ pattern to have consistent graph* ### Tickets: - *ticket-id* --- .../src/openvino/frontend/pytorch/gptq.py | 3 ++- .../src/transforms/u4_block_repack.cpp | 23 +++++++++++++++++-- tests/model_hub_tests/pytorch/test_llm.py | 2 +- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py index 3fe1ba465dfd1f..a1c6aecc45d421 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -77,7 +77,8 @@ def patched_forward_sym(self, *args, **kwargs): unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation - unpacked_weights = unpacked_weights.to(dtype) * self.scales + unpacked_weights = (unpacked_weights.to(torch.int8) - torch.tensor(8, dtype=torch.int8)) + unpacked_weights = unpacked_weights.to(dtype) * self.scales unpacked_weights = unpacked_weights.view(-1, self.width) out = x @ unpacked_weights diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 675a293269002b..5130424d0c60ed 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -7,6 +7,7 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -53,6 +54,7 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + auto pattern_root = reshape2; if (constant->get_element_type() != element::u4) return false; @@ -76,9 +78,26 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto get_number = get_u4; auto constant_dtype = element::u4; + NodeVector copy_from{std::move(constant), std::move(reshape1), std::move(transpose), reshape2}; if (is_symmetrical) { get_number = get_i4; constant_dtype = element::i4; + // find pattern Convert(W, i8) -> Subtract(8) + auto reshape_targets = reshape2->output(0).get_target_inputs(); + if (reshape_targets.size() != 1) + return false; + auto convert = reshape_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(convert)) + return false; + auto convert_targets = convert->output(0).get_target_inputs(); + if (convert_targets.size() != 1) + return false; + auto subtract = convert_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(subtract)) + return false; + pattern_root = subtract; + copy_from.push_back(std::move(convert)); + copy_from.push_back(subtract); } auto new_const = std::make_shared(constant_dtype, destination_shape); auto dst = const_cast( // const_cast? @@ -96,8 +115,8 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { } } - copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), reshape2}, new_const); - replace_node(reshape2, new_const); + copy_runtime_info(copy_from, new_const); + replace_node(pattern_root, new_const); return true; }); diff --git a/tests/model_hub_tests/pytorch/test_llm.py b/tests/model_hub_tests/pytorch/test_llm.py index 9acf8e2100c520..e444f93db9d7ec 100644 --- a/tests/model_hub_tests/pytorch/test_llm.py +++ b/tests/model_hub_tests/pytorch/test_llm.py @@ -128,7 +128,7 @@ def load_model(self, name, type): example["past_key_values"] = pkv example["attention_mask"] = torch.cat( [example["attention_mask"], am], -1) - if atype not in ["opt", "falcon", "mbart_gptq", "mpt"]: + if atype not in ["opt", "falcon", "mbart", "mpt"]: ids = torch.cumsum(example["attention_mask"] != 0, dim=1) - 1 example["position_ids"] = ids[:, - example["input_ids"].shape[1]:] From 8c36c0047377303b0406a74c45195cf29a460b2f Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Fri, 18 Oct 2024 21:57:50 +0400 Subject: [PATCH 070/112] [CORE] Skip unnecessary convert_to_supported_precision if ConstantFolding is omitted (#26756) Details: It's a modification of https://github.com/openvinotoolkit/openvino/pull/22674 f16 LLM (llama was tested) compilation time on ARM is unreasonable huge. Perf report shows that every ConstantFolding transformation takes several seconds even if the graph is not modified. The root cause is util::convert_to_supported_precision call even if constant folding is skipped. The suggested fix is to skip util::convert_to_supported_precision call if folding is not applied. Tickets: CVS-152428 --------- Co-authored-by: Aleksandr Voron Co-authored-by: Andrii Staikov --- src/core/include/openvino/core/node.hpp | 1 + src/core/include/openvino/op/assign.hpp | 2 +- src/core/include/openvino/op/constant.hpp | 2 +- src/core/include/openvino/op/convert_like.hpp | 1 + .../include/openvino/op/fake_quantize.hpp | 3 ++- .../include/openvino/op/random_uniform.hpp | 2 +- src/core/include/openvino/op/read_value.hpp | 2 +- src/core/include/openvino/op/reshape.hpp | 1 + src/core/include/openvino/op/result.hpp | 2 +- src/core/include/openvino/op/shape_of.hpp | 2 ++ src/core/include/openvino/op/squeeze.hpp | 1 + .../include/openvino/op/strided_slice.hpp | 1 + src/core/include/openvino/op/unsqueeze.hpp | 1 + .../include/openvino/op/util/gather_base.hpp | 1 + src/core/src/node.cpp | 14 ++++++++--- src/core/src/op/assign.cpp | 2 +- src/core/src/op/constant.cpp | 2 +- src/core/src/op/convert_like.cpp | 6 ++++- src/core/src/op/random_uniform.cpp | 2 +- src/core/src/op/read_value.cpp | 2 +- src/core/src/op/reshape.cpp | 6 ++++- src/core/src/op/result.cpp | 2 +- src/core/src/op/shape_of.cpp | 12 ++++++++-- src/core/src/op/squeeze.cpp | 6 ++++- src/core/src/op/strided_slice.cpp | 6 ++++- src/core/src/op/unsqueeze.cpp | 6 ++++- src/core/src/op/util/gather_base.cpp | 17 +++++++------ src/core/src/pass/constant_folding.cpp | 24 ++++++++++++------- 28 files changed, 90 insertions(+), 39 deletions(-) diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp index f5a63911abc502..59a4ab29253ded 100644 --- a/src/core/include/openvino/core/node.hpp +++ b/src/core/include/openvino/core/node.hpp @@ -207,6 +207,7 @@ class OPENVINO_API Node : public std::enable_shared_from_this { virtual bool evaluate_upper(ov::TensorVector& output_values) const; virtual bool evaluate_symbol(TensorSymbolVector& output_symbols) const; + virtual bool can_constant_fold(const OutputVector& inputs_values) const; virtual bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values); /// \brief Decomposes the FusedOp into a sub-graph consisting of core openvino ops /// diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp index c3f8492e54b4f8..895f6619778951 100644 --- a/src/core/include/openvino/op/assign.hpp +++ b/src/core/include/openvino/op/assign.hpp @@ -67,7 +67,7 @@ class OPENVINO_API Assign : public util::AssignBase { const TensorVector& inputs, const EvaluationContext& evaluation_context) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v6 } // namespace op diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 62b70a883fc1a5..ccaae01586d612 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -215,7 +215,7 @@ class OPENVINO_API Constant : public Op { bool evaluate_upper(TensorVector& outputs) const override; // Don't constant fold a constant; it would make a copy - bool constant_fold(OutputVector& outputs, const OutputVector& inputs) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \brief Returns the value of the constant node as a Shape object /// Can only be used on element::i64 nodes and interprets diff --git a/src/core/include/openvino/op/convert_like.hpp b/src/core/include/openvino/op/convert_like.hpp index 244d0f4c7d70b4..0d7f73075e21b9 100644 --- a/src/core/include/openvino/op/convert_like.hpp +++ b/src/core/include/openvino/op/convert_like.hpp @@ -27,6 +27,7 @@ class OPENVINO_API ConvertLike : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v1 } // namespace op diff --git a/src/core/include/openvino/op/fake_quantize.hpp b/src/core/include/openvino/op/fake_quantize.hpp index b47c7016c8709e..52caca885a02cc 100644 --- a/src/core/include/openvino/op/fake_quantize.hpp +++ b/src/core/include/openvino/op/fake_quantize.hpp @@ -69,7 +69,8 @@ class OPENVINO_API FakeQuantize : public Op { bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override { + + bool can_constant_fold(const OutputVector& inputs_values) const override { return false; } diff --git a/src/core/include/openvino/op/random_uniform.hpp b/src/core/include/openvino/op/random_uniform.hpp index 6a4de83715e30a..22f06f79402135 100644 --- a/src/core/include/openvino/op/random_uniform.hpp +++ b/src/core/include/openvino/op/random_uniform.hpp @@ -42,7 +42,7 @@ class OPENVINO_API RandomUniform : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; /// \return Turns off constant folding for RandomUniform operation. - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \return The output tensor type. const ov::element::Type& get_out_type() const; diff --git a/src/core/include/openvino/op/read_value.hpp b/src/core/include/openvino/op/read_value.hpp index 27447644037211..e37d6baa11c01c 100644 --- a/src/core/include/openvino/op/read_value.hpp +++ b/src/core/include/openvino/op/read_value.hpp @@ -80,7 +80,7 @@ class OPENVINO_API ReadValue : public util::ReadValueBase { const EvaluationContext& evaluation_context) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v6 } // namespace op diff --git a/src/core/include/openvino/op/reshape.hpp b/src/core/include/openvino/op/reshape.hpp index f3a9e7aa8e59c1..48bc08f8c3d947 100644 --- a/src/core/include/openvino/op/reshape.hpp +++ b/src/core/include/openvino/op/reshape.hpp @@ -52,6 +52,7 @@ class OPENVINO_API Reshape : public Op { bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; protected: bool m_special_zero; diff --git a/src/core/include/openvino/op/result.hpp b/src/core/include/openvino/op/result.hpp index dc8162a10b6627..00e805d1f2aeb5 100644 --- a/src/core/include/openvino/op/result.hpp +++ b/src/core/include/openvino/op/result.hpp @@ -30,7 +30,7 @@ class OPENVINO_API Result : public Op { bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override; bool has_evaluate() const override; - bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; /// \brief Returns current layout, or empty Layout if it is not set Layout get_layout() const; diff --git a/src/core/include/openvino/op/shape_of.hpp b/src/core/include/openvino/op/shape_of.hpp index c8245d91069ed0..375d087f7e6cf8 100644 --- a/src/core/include/openvino/op/shape_of.hpp +++ b/src/core/include/openvino/op/shape_of.hpp @@ -38,6 +38,7 @@ class OPENVINO_API ShapeOf : public util::ShapeOfBase { bool evaluate_upper(TensorVector& output_values) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; private: element::Type m_output_type; @@ -64,6 +65,7 @@ class OPENVINO_API ShapeOf : public util::ShapeOfBase { bool evaluate_upper(TensorVector& output_values) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& input_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; }; } // namespace v0 } // namespace op diff --git a/src/core/include/openvino/op/squeeze.hpp b/src/core/include/openvino/op/squeeze.hpp index f7cb41f974db2f..8c27f29d66df66 100644 --- a/src/core/include/openvino/op/squeeze.hpp +++ b/src/core/include/openvino/op/squeeze.hpp @@ -27,6 +27,7 @@ class OPENVINO_API Squeeze : public Op { bool evaluate_upper(TensorVector& outputs) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/src/core/include/openvino/op/strided_slice.hpp b/src/core/include/openvino/op/strided_slice.hpp index 2ba4f84c0936bf..aa080bc6563b90 100644 --- a/src/core/include/openvino/op/strided_slice.hpp +++ b/src/core/include/openvino/op/strided_slice.hpp @@ -114,6 +114,7 @@ class OPENVINO_API StridedSlice : public Op { bool evaluate_upper(TensorVector& outputs) const override; bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; private: AxisSet convert_mask_to_axis_set(const std::vector& mask) const; diff --git a/src/core/include/openvino/op/unsqueeze.hpp b/src/core/include/openvino/op/unsqueeze.hpp index d9839c7d68d719..4701df2dd4d4ec 100644 --- a/src/core/include/openvino/op/unsqueeze.hpp +++ b/src/core/include/openvino/op/unsqueeze.hpp @@ -30,6 +30,7 @@ class OPENVINO_API Unsqueeze : public Op { bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; }; diff --git a/src/core/include/openvino/op/util/gather_base.hpp b/src/core/include/openvino/op/util/gather_base.hpp index f7846b83cfe465..9fa8387aee6b3a 100644 --- a/src/core/include/openvino/op/util/gather_base.hpp +++ b/src/core/include/openvino/op/util/gather_base.hpp @@ -34,6 +34,7 @@ class OPENVINO_API GatherBase : public Op { bool evaluate_symbol(TensorSymbolVector& output_symbols) const override; bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override; + bool can_constant_fold(const OutputVector& inputs_values) const override; const int64_t& get_batch_dims() const; void set_batch_dims(int64_t batch_dims); diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 0341e4477f4cfb..8b9936b5496e7c 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -696,8 +696,8 @@ bool ov::Node::evaluate_symbol(TensorSymbolVector& output_symbols) const { return false; } -bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& input_values) { - OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::constant_fold"); +bool ov::Node::can_constant_fold(const OutputVector& input_values) const { + OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::can_constant_fold"); if (is_const_fold_disabled()) { return false; @@ -707,8 +707,16 @@ bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& in bool all_constants = std::all_of(input_values.begin(), input_values.end(), [](const Output& input) { return ov::as_type_ptr(input.get_node_shared_ptr()); }); - if (!all_constants) + + return all_constants; +} + +bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& input_values) { + OV_ITT_SCOPED_TASK(ov::itt::domains::core, "Node::constant_fold"); + + if (!Node::can_constant_fold(input_values)) { return false; + } NodeVector nodes; TensorVector input_tensors; diff --git a/src/core/src/op/assign.cpp b/src/core/src/op/assign.cpp index bf6e55c11b1d39..7798d4328049af 100644 --- a/src/core/src/op/assign.cpp +++ b/src/core/src/op/assign.cpp @@ -134,7 +134,7 @@ bool Assign::has_evaluate() const { return true; } -bool Assign::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Assign::can_constant_fold(const OutputVector& input_values) const { return false; } } // namespace v6 diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index 95df6379ba284e..e06718ef4e1fd5 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -663,7 +663,7 @@ bool Constant::evaluate_upper(TensorVector& outputs) const { return evaluate(outputs, {}); } -bool Constant::constant_fold(OutputVector&, const OutputVector&) { +bool Constant::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/convert_like.cpp b/src/core/src/op/convert_like.cpp index 3dc0159bb556be..4ae4ea982f8cd9 100644 --- a/src/core/src/op/convert_like.cpp +++ b/src/core/src/op/convert_like.cpp @@ -29,9 +29,13 @@ std::shared_ptr ConvertLike::clone_with_new_inputs(const OutputVector& new return std::make_shared(new_args.at(0), new_args.at(1)); } +bool ConvertLike::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled(); +} + bool ConvertLike::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v1_ConvertLike_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } diff --git a/src/core/src/op/random_uniform.cpp b/src/core/src/op/random_uniform.cpp index e62be4d26afc58..9aafed881086b6 100644 --- a/src/core/src/op/random_uniform.cpp +++ b/src/core/src/op/random_uniform.cpp @@ -88,7 +88,7 @@ std::shared_ptr RandomUniform::clone_with_new_inputs(const OutputVector& n } /// \return Turns off constant folding for RandomUniform operation. -bool RandomUniform::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool RandomUniform::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/read_value.cpp b/src/core/src/op/read_value.cpp index 162cb5067bc00a..0d63456a3b8348 100644 --- a/src/core/src/op/read_value.cpp +++ b/src/core/src/op/read_value.cpp @@ -176,7 +176,7 @@ bool ReadValue::has_evaluate() const { return true; } -bool ReadValue::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool ReadValue::can_constant_fold(const OutputVector& input_values) const { return false; } } // namespace v6 diff --git a/src/core/src/op/reshape.cpp b/src/core/src/op/reshape.cpp index ab0e0a0c17cbde..477e210f574269 100644 --- a/src/core/src/op/reshape.cpp +++ b/src/core/src/op/reshape.cpp @@ -97,7 +97,7 @@ bool Reshape::evaluate_symbol(TensorSymbolVector& output_symbols) const { } bool Reshape::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { + if (!can_constant_fold(inputs_values)) { return false; } @@ -108,6 +108,10 @@ bool Reshape::constant_fold(OutputVector& output_values, const OutputVector& inp return false; } } + +bool Reshape::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} } // namespace v1 } // namespace op } // namespace ov diff --git a/src/core/src/op/result.cpp b/src/core/src/op/result.cpp index 3667e5ff22b422..237d6bd7a2084a 100644 --- a/src/core/src/op/result.cpp +++ b/src/core/src/op/result.cpp @@ -67,7 +67,7 @@ bool Result::has_evaluate() const { return true; } -bool Result::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { +bool Result::can_constant_fold(const OutputVector& input_values) const { return false; } diff --git a/src/core/src/op/shape_of.cpp b/src/core/src/op/shape_of.cpp index 293c1b5fc5a59c..9676a5704ec99c 100644 --- a/src/core/src/op/shape_of.cpp +++ b/src/core/src/op/shape_of.cpp @@ -168,9 +168,13 @@ bool ShapeOf::evaluate_symbol(TensorSymbolVector& output_symbols) const { return shape_of::evaluate_symbol(this, output_symbols); } +bool ShapeOf::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled() && input_values[0].get_partial_shape().is_static(); +} + bool ShapeOf::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v3_ShapeOf_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } return shape_of::constant_fold_shape_of(this, output_values[0], input_values[0]); @@ -222,9 +226,13 @@ bool ShapeOf::has_evaluate() const { } } +bool ShapeOf::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled() && input_values[0].get_partial_shape().is_static(); +} + bool ShapeOf::constant_fold(OutputVector& output_values, const OutputVector& input_values) { OV_OP_SCOPE(v0_ShapeOf_constant_fold); - if (is_const_fold_disabled()) { + if (!can_constant_fold(input_values)) { return false; } return shape_of::constant_fold_shape_of(this, output_values[0], input_values[0]); diff --git a/src/core/src/op/squeeze.cpp b/src/core/src/op/squeeze.cpp index 3abc0a773192d2..1b34a4e48a4faf 100644 --- a/src/core/src/op/squeeze.cpp +++ b/src/core/src/op/squeeze.cpp @@ -104,9 +104,13 @@ bool Squeeze::evaluate_symbol(TensorSymbolVector& output_symbols) const { return validate::axes_has_and_set_bound(*this) && ov::util::default_symbol_evaluator(this, output_symbols); } +bool Squeeze::can_constant_fold(const OutputVector& inputs_values) const { + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + bool Squeeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { OV_OP_SCOPE(v0_Squeeze_constant_fold); - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { + if (!can_constant_fold(inputs_values)) { return false; } diff --git a/src/core/src/op/strided_slice.cpp b/src/core/src/op/strided_slice.cpp index deb89fa9a531d4..83ac3dec7a5f4f 100644 --- a/src/core/src/op/strided_slice.cpp +++ b/src/core/src/op/strided_slice.cpp @@ -283,9 +283,13 @@ bool StridedSlice::evaluate_symbol(TensorSymbolVector& output_symbols) const { default_symbol_evaluator(this, {0}, output_symbols); } +bool StridedSlice::can_constant_fold(const OutputVector& input_values) const { + return !is_const_fold_disabled(); +} + bool StridedSlice::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { auto is_folded = Node::constant_fold(output_values, inputs_values); - if (!is_const_fold_disabled() && !is_folded) { + if (can_constant_fold(inputs_values) && !is_folded) { // If all ignored mask are set for all begin or end then replace this input by dummy constant // to avoid return false from `could_propagate` during bound evaluation (value of const will be ignored). auto get_indices_input = [&inputs_values](size_t port, const std::vector& mask) -> Output { diff --git a/src/core/src/op/unsqueeze.cpp b/src/core/src/op/unsqueeze.cpp index d199c43a2479b5..f8c14a08f70d30 100644 --- a/src/core/src/op/unsqueeze.cpp +++ b/src/core/src/op/unsqueeze.cpp @@ -77,8 +77,12 @@ bool ov::op::v0::Unsqueeze::evaluate_symbol(TensorSymbolVector& output_symbols) return ov::util::default_symbol_evaluator(this, output_symbols); } +bool ov::op::v0::Unsqueeze::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && !is_const_fold_disabled(); +} + bool ov::op::v0::Unsqueeze::constant_fold(OutputVector& output_values, const OutputVector& inputs_values) { - if (get_output_partial_shape(0).is_dynamic() || is_const_fold_disabled()) { + if (!can_constant_fold(inputs_values)) { return false; } diff --git a/src/core/src/op/util/gather_base.cpp b/src/core/src/op/util/gather_base.cpp index 92e41781b1de55..dd35edf695ec16 100644 --- a/src/core/src/op/util/gather_base.cpp +++ b/src/core/src/op/util/gather_base.cpp @@ -32,10 +32,6 @@ Shape out_shape_infer(const Shape& data_shape, const Shape& indices_shape, int64 bool cf_gather_with_subgraph(OutputVector& output_values, const OutputVector& input_values, const PartialShape& gather_ps) { - if (gather_ps.is_dynamic() || input_values.size() != 3) { - return false; - } - const auto concat = std::dynamic_pointer_cast(input_values[0].get_node_shared_ptr()); const auto indices = std::dynamic_pointer_cast(input_values[1].get_node_shared_ptr()); const auto axis = std::dynamic_pointer_cast(input_values[2].get_node_shared_ptr()); @@ -67,7 +63,6 @@ bool cf_gather_with_subgraph(OutputVector& output_values, const auto raw_index = indices->cast_vector()[0]; const auto positive_index = ov::util::normalize(raw_index, rank); OPENVINO_ASSERT(positive_index >= 0 && positive_index < rank); - // gather takes exactly one element out of the Concat output const auto gathered_concat_input = concat_inputs[positive_index].get_source_output().get_node_shared_ptr(); // Concat inputs are 1D, resulting tensor shape depends on Gather indices @@ -77,9 +72,7 @@ bool cf_gather_with_subgraph(OutputVector& output_values, const auto axis_const = v0::Constant::create(element::i64, Shape{1}, {0}); gathered = std::make_shared(gathered_concat_input, axis_const); } - output_values[0] = gathered; - return true; } @@ -262,13 +255,19 @@ bool GatherBase::evaluate_symbol(TensorSymbolVector& output_symbols) const { return gather::have_indices_and_axis_bound_set(this) && ov::util::default_symbol_evaluator(this, output_symbols); } +bool GatherBase::can_constant_fold(const OutputVector& input_values) const { + return get_output_partial_shape(0).is_static() && input_values.size() == 3; +} + bool GatherBase::constant_fold(OutputVector& output_values, const OutputVector& input_values) { // try the regular constant folding just for the Gather node if (Node::constant_fold(output_values, input_values)) { return true; - } else { - return gather::cf_gather_with_subgraph(output_values, input_values, get_output_partial_shape(0)); } + if (!can_constant_fold(input_values)) { + return false; + } + return gather::cf_gather_with_subgraph(output_values, input_values, get_output_partial_shape(0)); } } // namespace util } // namespace op diff --git a/src/core/src/pass/constant_folding.cpp b/src/core/src/pass/constant_folding.cpp index 3de91829f91b0c..cc1a7cea5b5add 100644 --- a/src/core/src/pass/constant_folding.cpp +++ b/src/core/src/pass/constant_folding.cpp @@ -105,6 +105,21 @@ bool ov::pass::ConstantFolding::run_on_model(const std::shared_ptr& m for (const auto& original_node : model->get_ordered_ops()) { auto node = original_node; + if (!original_node->can_constant_fold(original_node->input_values())) { + if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { + // recursively constant fold operators containing subgraphs (ie: TensorIterator, Loop) + size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size(); + for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) { + rewritten = + run_on_model(sub_graph_node->get_function(static_cast(sub_graph_ind))) || rewritten; + } + } + rewritten = restore_original_input_precision(original_node) || rewritten; + if (rewritten) { + original_node->validate_and_infer_types(); + } + continue; + } if (node_has_requires_precision_conversion_attribute(node)) { remove_requires_precision_conversion_attribute(node); node = util::convert_to_supported_precision(node.get()); @@ -143,15 +158,6 @@ bool ov::pass::ConstantFolding::run_on_model(const std::shared_ptr& m } } } else { - if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { - // recursively constant fold operators containing subgraphs (ie: TensorIterator, Loop) - size_t sub_graphs_num = sub_graph_node->get_internal_subgraphs_size(); - for (size_t sub_graph_ind = 0; sub_graph_ind < sub_graphs_num; ++sub_graph_ind) { - rewritten = - run_on_model(sub_graph_node->get_function(static_cast(sub_graph_ind))) || rewritten; - } - } - // if CF was unsuccessful remove original precision attribute from inputs bool restored = restore_original_input_precision(original_node); if (restored) { From 13becaa48b24e12964e849d0f2af71d2f5b854ab Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 18 Oct 2024 20:04:12 +0200 Subject: [PATCH 071/112] [TESTS] Print test names in log (#27121) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .github/workflows/job_pytorch_layer_tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml index abf614c70cff4e..c6cd97422f2b95 100644 --- a/.github/workflows/job_pytorch_layer_tests.yml +++ b/.github/workflows/job_pytorch_layer_tests.yml @@ -121,7 +121,7 @@ jobs: - name: PyTorch Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 # due to CVS-152795, parallel run is not possible on Windows - run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 @@ -130,7 +130,7 @@ jobs: - name: PyTorch torch.export Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests ${PARALLEL} -m precommit_torch_export -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch_export.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 @@ -140,7 +140,7 @@ jobs: - name: PyTorch torch.compile TORCHFX Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.os != 'macOS' && runner.arch != 'ARM64' && runner.os != 'Windows' }} # Ticket: 126287 run: | - python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch.xml + python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/pytorch_tests -m precommit_fx_backend -v --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-pytorch_compile.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 From f33e25565bbf99208630d959c80921437da36536 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Fri, 18 Oct 2024 23:54:26 +0400 Subject: [PATCH 072/112] [TFL FE] Export public API symbols for TFLite Delegate (#27140) **Details:** We need to properly export public API in tensorflow_lite_frontend shared library so that TFLite delagate can import them. All abstract classes that TFLite Delegate implements on its own should be exported. `QuantizationInfo` also should be exported to avoid duplications/re-definitions in binaries. **Ticket:** TBD Signed-off-by: Kazantsev, Roman --- .../include/openvino/frontend/tensorflow_lite/decoder.hpp | 8 ++++---- .../openvino/frontend/tensorflow_lite/graph_iterator.hpp | 2 +- .../frontend/tensorflow_lite/quantization_info.hpp | 2 +- .../openvino/frontend/tensorflow_lite/sparsity_info.hpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp index b3415cf288c4be..a2cafe16e075fb 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -struct TensorMetaInfo { +struct TENSORFLOW_LITE_API TensorMetaInfo { std::shared_ptr m_quantization_info; std::shared_ptr m_sparsity_info; ov::PartialShape m_partial_shape; @@ -23,11 +23,11 @@ struct TensorMetaInfo { std::string m_tensor_name; }; -class DecoderBase : public ov::frontend::DecoderBase {}; +class TENSORFLOW_LITE_API DecoderBase : public ov::frontend::DecoderBase {}; // DecoderBaseOperation corresponds to operation node to retrieve its attributes and information about input and output // tensors -class DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_API DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get input tensor name by index /// Operation nodes are connected between each other by tensors. @@ -71,7 +71,7 @@ class DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { // DecoderBaseTensor corresponds to tensor node to retrieve information about type, shapem quantization and sparsity // information -class DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_API DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get tensor info virtual TensorMetaInfo get_tensor_info() const = 0; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp index 8ec2bc3f05c358..2084147c9ab284 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp @@ -24,7 +24,7 @@ namespace tensorflow_lite { /// DecoderBaseOperation (for op 1), ..., DecoderBaseOperation (for op k), /// where n - number of inputs in the model, m - number of outputs in the model k - number of operation nodes. /// NOTE: constants are ignored and no decoder object is returned for constant. -class GraphIterator : ::ov::RuntimeAttribute { +class TENSORFLOW_LITE_API GraphIterator : ::ov::RuntimeAttribute { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp index bd0f1e28283a27..66977db1caa5d4 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp @@ -15,7 +15,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class QuantizationInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_API QuantizationInfo : public ov::RuntimeAttribute { public: OPENVINO_RTTI("QuantizationInfo"); QuantizationInfo() = default; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp index 596cb651763d57..c1ab8d4fd04941 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp @@ -16,7 +16,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class SparsityInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_API SparsityInfo : public ov::RuntimeAttribute { public: struct SparsityDataDesc { uint8_t segments_type; From c5025cc6ca06753b0cf7091438d28cbac44139d0 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Sat, 19 Oct 2024 14:25:26 +0200 Subject: [PATCH 073/112] [TRANSFORMATIONS] Introduce a new method of testing SDPAToPA transformation (#27067) [TRANSFORMATIONS] Introduce a new method of testing SDPAToPA transformation Introduce a new method of testing the SDPAToPA transformation by not only checking if PagedAttentionExtension nodes appeared in a graph, but also check if the changes in number of nodes involved in the transformation aligned with the reference numbers for each model. Add a script for fast generation of reference values. Signed-off-by: Andrii Staikov - Tickets: * CVS-152290 --- .../workflows/job_pytorch_models_tests.yml | 2 +- .../generate_ref_diffs.py | 94 +++ .../models/hf-tiny-random-models-precommit | 5 +- .../transformation_tests/sdpa2pa_ref_diff.py | 612 ++++++++++++++++++ .../test_pa_transformation.py | 30 +- 5 files changed, 731 insertions(+), 12 deletions(-) create mode 100644 tests/model_hub_tests/transformation_tests/generate_ref_diffs.py create mode 100644 tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 8f3699f6ab42a2..22a09dffba779f 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -137,7 +137,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2 + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -vvv -s --tb=short -n 2 env: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False diff --git a/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py new file mode 100644 index 00000000000000..6823256b3ccfc5 --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/generate_ref_diffs.py @@ -0,0 +1,94 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +''' +Use this script if you need to regenerate reference diffs for each model +to test SDPAToPA transformation. + +The script will produce sdpa2pa_ref_diff.txt (or sdpa2pa_ref_diff_cache_eviction.txt +if using cache-eviction) containing a map in the +following format with nodes number changes for each model: + +ref_diff_map = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + . + . + . +} + +The map has to be pasted into sdpa2pa_ref_diff.py (same directory) for +includes to test SDPAToPA transformation. + +Run the script by using 'python generate_ref_diffs.py' or 'python generate_ref_diffs.py True' +for generating the same map, but utilizing cache-eviction. +''' + +import os +import sys +from pathlib import Path +import models_hub_common.utils as utils +from openvino._offline_transformations import paged_attention_transformation +from openvino._pyopenvino.op import _PagedAttentionExtension, Parameter, Result +from optimum.intel import OVModelForCausalLM + +nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign") + +def main(): + use_cache_eviction = False + if len(sys.argv) >= 2: + use_cache_eviction = sys.argv[1].lower() in 'true' + + OUTPUT_FILE = Path(os.path.join(os.path.dirname(__file__)), 'sdpa2pa_ref_diff' + ('_cache_eviction.txt' if use_cache_eviction else '.txt')) + + if OUTPUT_FILE.exists() and OUTPUT_FILE.is_file(): + OUTPUT_FILE.unlink() + + with open(OUTPUT_FILE, 'w') as file: + model_list = utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit")) + print(OUTPUT_FILE) + print('ref_diff_map_cache_eviction = {' if use_cache_eviction else 'ref_diff_map = {', file=file) + + for model_id, _, _, _ in model_list: + # wrapping in try/catch block to continue printing models even if one has failed + try: + model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) + except: + continue + + before_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 + + # wrapping in try/catch block to continue printing models even if one has failed + try: + paged_attention_transformation(model.model, use_cache_eviction, use_cache_eviction) + except: + continue + + after_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 + + print(f'\t"{model_id}" : {{', file=file) + for op in set(after_map.keys()) | set(before_map.keys()): + print(f'\t\t"{op}" : {after_map.get(op, 0) - before_map.get(op, 0)},', file=file) + print('\t},', file=file) + print('}', file=file) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit index c3ec331fcda0bc..7c89c451ea4be5 100644 --- a/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit +++ b/tests/model_hub_tests/transformation_tests/models/hf-tiny-random-models-precommit @@ -40,7 +40,4 @@ Xenova/tiny-random-Phi3ForCausalLM,https://huggingface.co/Xenova/tiny-random-Phi facebook/opt-125m,https://huggingface.co/facebook/opt-125m facebook/opt-350m,https://huggingface.co/facebook/opt-350m katuni4ka/tiny-random-chatglm2,https://huggingface.co/katuni4ka/tiny-random-chatglm2 -katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 -hf-internal-testing/tiny-random-BioGptForCausalLM,https://huggingface.co/hf-internal-testing/tiny-random-BioGptForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 -hf-internal-testing/tiny-random-XGLMForCausalLM,https://huggingface.co/hf-tiny-model-private/tiny-random-XGLMForCausalLM,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 -katuni4ka/tiny-random-orion,https://huggingface.co/katuni4ka/tiny-random-orion,xfail,No ScaledDotProductAttention operation observed in the graph CVS-145820 \ No newline at end of file +katuni4ka/tiny-random-glm4,https://huggingface.co/katuni4ka/tiny-random-glm4 \ No newline at end of file diff --git a/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py new file mode 100644 index 00000000000000..23af913d9d102f --- /dev/null +++ b/tests/model_hub_tests/transformation_tests/sdpa2pa_ref_diff.py @@ -0,0 +1,612 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +nodes_to_compare = ("ScaledDotProductAttention", "PagedAttentionExtension", "Parameter", "ReadValue", "Assign") + +ref_diff_map = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTJForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-GPTNeoForCausalLM" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "hf-internal-testing/tiny-random-GPTNeoXForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-MistralForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CodeGenForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/Mixtral-tiny" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -5, + "Assign" : -5, + }, + "hf-internal-testing/tiny-random-Starcoder2ForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-BloomForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-gpt2" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 13, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-BlenderbotForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PegasusForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 8, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PhiForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-MptForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-StableLmForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PersimmonForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-FalconForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "hf-tiny-model-private/tiny-random-OPTForCausalLM" : { + "PagedAttentionExtension" : 5, + "ScaledDotProductAttention" : -5, + "Parameter" : 14, + "ReadValue" : -10, + "Assign" : -10, + }, + "katuni4ka/tiny-random-xverse" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2-13b" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquilachat" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquila2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen1.5-moe" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-codegen2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-olmo-hf" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-jais" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm2" : { + "PagedAttentionExtension" : 4, + "ScaledDotProductAttention" : -4, + "Parameter" : 11, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-minicpm" : { + "ReadValue" : -8, + "ScaledDotProductAttention" : -4, + "Assign" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 11, + }, + "katuni4ka/tiny-random-falcon-40b" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-dbrx" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "fxmarty/tiny-random-GemmaForCausalLM" : { + "PagedAttentionExtension" : 1, + "ScaledDotProductAttention" : -1, + "Parameter" : 5, + "ReadValue" : -2, + "Assign" : -2, + }, + "fxmarty/tiny-dummy-qwen2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "fxmarty/really-tiny-falcon-testing" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "Xenova/tiny-random-Phi3ForCausalLM" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "facebook/opt-125m" : { + "PagedAttentionExtension" : 12, + "ScaledDotProductAttention" : -12, + "Parameter" : 28, + "ReadValue" : -24, + "Assign" : -24, + }, + "facebook/opt-350m" : { + "PagedAttentionExtension" : 24, + "ScaledDotProductAttention" : -24, + "Parameter" : 52, + "ReadValue" : -48, + "Assign" : -48, + }, + "katuni4ka/tiny-random-chatglm2" : { + "PagedAttentionExtension" : 2, + "ScaledDotProductAttention" : -2, + "Parameter" : 7, + "ReadValue" : -4, + "Assign" : -4, + }, + "katuni4ka/tiny-random-glm4" : { + "PagedAttentionExtension" : 6, + "ScaledDotProductAttention" : -6, + "Parameter" : 15, + "ReadValue" : -12, + "Assign" : -12, + }, +} + +ref_diff_map_cache_eviction = { + "hf-internal-testing/tiny-random-LlamaForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CohereForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTJForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-GPTNeoForCausalLM" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "hf-internal-testing/tiny-random-GPTNeoXForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-MistralForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-CodeGenForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/Mixtral-tiny" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -5, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -5, + }, + "hf-internal-testing/tiny-random-Starcoder2ForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-BloomForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-gpt2" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 17, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-BlenderbotForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 9, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PegasusForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 9, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PhiForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-MptForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "hf-internal-testing/tiny-random-StableLmForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-PersimmonForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-internal-testing/tiny-random-FalconForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "hf-tiny-model-private/tiny-random-OPTForCausalLM" : { + "ScaledDotProductAttention" : -5, + "ReadValue" : -10, + "PagedAttentionExtension" : 5, + "Parameter" : 18, + "Assign" : -10, + }, + "katuni4ka/tiny-random-xverse" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2-13b" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquilachat" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-aquila2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-qwen1.5-moe" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "katuni4ka/tiny-random-codegen2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-olmo-hf" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-baichuan2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-jais" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-internlm2" : { + "ScaledDotProductAttention" : -4, + "ReadValue" : -8, + "PagedAttentionExtension" : 4, + "Parameter" : 14, + "Assign" : -8, + }, + "katuni4ka/tiny-random-minicpm" : { + "ScaledDotProductAttention" : -4, + "Parameter" : 14, + "PagedAttentionExtension" : 4, + "ReadValue" : -8, + "Assign" : -8, + }, + "katuni4ka/tiny-random-falcon-40b" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-dbrx" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "fxmarty/tiny-random-GemmaForCausalLM" : { + "ScaledDotProductAttention" : -1, + "ReadValue" : -2, + "PagedAttentionExtension" : 1, + "Parameter" : 5, + "Assign" : -2, + }, + "fxmarty/tiny-dummy-qwen2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "fxmarty/really-tiny-falcon-testing" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "Xenova/tiny-random-Phi3ForCausalLM" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "facebook/opt-125m" : { + "ScaledDotProductAttention" : -12, + "ReadValue" : -24, + "PagedAttentionExtension" : 12, + "Parameter" : 39, + "Assign" : -24, + }, + "facebook/opt-350m" : { + "ScaledDotProductAttention" : -24, + "ReadValue" : -48, + "PagedAttentionExtension" : 24, + "Parameter" : 75, + "Assign" : -48, + }, + "katuni4ka/tiny-random-chatglm2" : { + "ScaledDotProductAttention" : -2, + "ReadValue" : -4, + "PagedAttentionExtension" : 2, + "Parameter" : 8, + "Assign" : -4, + }, + "katuni4ka/tiny-random-glm4" : { + "ScaledDotProductAttention" : -6, + "ReadValue" : -12, + "PagedAttentionExtension" : 6, + "Parameter" : 20, + "Assign" : -12, + }, +} diff --git a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py index dc65324d4f028b..02481439818f28 100644 --- a/tests/model_hub_tests/transformation_tests/test_pa_transformation.py +++ b/tests/model_hub_tests/transformation_tests/test_pa_transformation.py @@ -6,6 +6,7 @@ from optimum.intel import OVModelForCausalLM from models_hub_common.utils import retry import models_hub_common.utils as utils +from sdpa2pa_ref_diff import ref_diff_map, ref_diff_map_cache_eviction, nodes_to_compare import pytest import os import re @@ -14,15 +15,28 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_outputs): model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) + before_map = {} + for op in model.model.get_ordered_ops(): + if op.get_type_name() in nodes_to_compare: + before_map[op.get_type_name()] = before_map.get(op.get_type_name(), 0) + 1 + paged_attention_transformation(model.model, use_block_indices_inputs, use_score_outputs) - # Test that a _PagedAttentionExtension node appeared after the transformation. - pa_counter = 0 + after_map = {} for op in model.model.get_ordered_ops(): - if isinstance(op, _PagedAttentionExtension): - pa_counter += 1 + if op.get_type_name() in nodes_to_compare: + after_map[op.get_type_name()] = after_map.get(op.get_type_name(), 0) + 1 + + # Collect the changes of nodes from nodes_to_compare + # And check if the numbers correspond to the reference ones + resulting_map = {} + for op in set(after_map.keys()) | set(before_map.keys()): + resulting_map[op] = after_map.get(op, 0) - before_map.get(op, 0) + + use_cache_eviction = use_block_indices_inputs and use_score_outputs + reference_map = ref_diff_map_cache_eviction[model_id] if use_cache_eviction else ref_diff_map[model_id] - assert pa_counter > 0, f"The model '{model_id}' has no _PagedAttentionExtension present." + assert reference_map == resulting_map model_inputs = model.model.inputs for input in model_inputs: @@ -45,7 +59,8 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o if re.search(block_indices_pattern, name): block_indices_counter += 1 - assert(block_indices_counter == pa_counter) + assert block_indices_counter == resulting_map["PagedAttentionExtension"], \ + f"The number of block_indices inputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}" if (use_score_outputs): score_pattern = r'scores\.[0-9]+' @@ -57,7 +72,8 @@ def run_pa(tmp_path, model_id, model_link, use_block_indices_inputs, use_score_o if re.search(score_pattern, name): score_outputs_counter += 1 - assert(score_outputs_counter == pa_counter) + assert block_indices_counter == resulting_map["PagedAttentionExtension"], \ + f"The number of scores outputs doesn't correspond to the expected value. Expected {resulting_map['PagedAttentionExtension']}, received {block_indices_counter}" @pytest.mark.precommit @pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "hf-tiny-random-models-precommit"))) From 9a02e5475b946ee710a55c30d391a937f46c1d0a Mon Sep 17 00:00:00 2001 From: Halm Zenger Date: Sun, 20 Oct 2024 15:17:09 +0100 Subject: [PATCH 074/112] [JAX FE] Support lax.argmax operation for JAX (#26671) ### Details: - Support lax.argmax for JAX and create relevant layer test - 2 util improvements - Fix `num_inputs_check` not checking max inputs - Better error message when param name not exist ### Tickets: - #26574 --------- Co-authored-by: Roman Kazantsev --- .../openvino/frontend/jax/node_context.hpp | 1 + src/frontends/jax/src/op/argmax.cpp | 42 +++++++++++++ src/frontends/jax/src/op_table.cpp | 2 + src/frontends/jax/src/utils.cpp | 1 + tests/layer_tests/jax_tests/test_argmax.py | 62 +++++++++++++++++++ 5 files changed, 108 insertions(+) create mode 100644 src/frontends/jax/src/op/argmax.cpp create mode 100644 tests/layer_tests/jax_tests/test_argmax.py diff --git a/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp b/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp index 101161a4ec03c9..015713ad72847d 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/node_context.hpp @@ -101,6 +101,7 @@ class NodeContext : public frontend::NodeContext { } Output get_param(const std::string& name) const { + FRONT_END_GENERAL_CHECK(m_param_name_to_id.count(name), "No param id corresponding name exists: ", name); auto id = m_param_name_to_id.at(name); FRONT_END_GENERAL_CHECK(m_tensor_map->count(id), "No tensor corresponding param id: ", id, " exist."); return m_tensor_map->at(id); diff --git a/src/frontends/jax/src/op/argmax.cpp b/src/frontends/jax/src/op/argmax.cpp new file mode 100644 index 00000000000000..60d852c6d0f358 --- /dev/null +++ b/src/frontends/jax/src/op/argmax.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/op/topk.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +using namespace ov::op; + +OutputVector translate_argmax(const NodeContext& context) { + num_inputs_check(context, 1, 1); + Output input = context.get_input(0); + auto axis_val = context.const_named_param("axes"); + auto axis = context.const_named_param>("axes"); + auto dtype = convert_dtype(context.const_named_param("index_dtype")); + + auto k = std::make_shared(element::i64, Shape{}, 1); + auto topk = std::make_shared(input, + k, + axis_val, + v11::TopK::Mode::MAX, + v1::TopK::SortType::SORT_VALUES, + dtype, + true); + auto indices = topk->output(1); + + auto res = std::make_shared(indices, axis); + return {res}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/jax/src/op_table.cpp b/src/frontends/jax/src/op_table.cpp index 5e92e3de6e212a..500226594fea13 100644 --- a/src/frontends/jax/src/op_table.cpp +++ b/src/frontends/jax/src/op_table.cpp @@ -36,6 +36,7 @@ namespace op { template \ OutputVector op(const ov::frontend::jax::NodeContext& node) +OP_CONVERTER(translate_argmax); OP_T_CONVERTER(translate_binary_op); OP_CONVERTER(translate_broadcast_in_dim); OP_CONVERTER(translate_concatenate); @@ -59,6 +60,7 @@ OP_CONVERTER(translate_transpose); // Supported ops for Jaxpr const std::map get_supported_ops_jaxpr() { return {{"add", op::translate_1to1_match_2_inputs}, + {"argmax", op::translate_argmax}, {"broadcast_in_dim", op::translate_broadcast_in_dim}, {"concatenate", op::translate_concatenate}, {"constant", op::translate_constant}, diff --git a/src/frontends/jax/src/utils.cpp b/src/frontends/jax/src/utils.cpp index d47abfbba56188..f626031ec8dc58 100644 --- a/src/frontends/jax/src/utils.cpp +++ b/src/frontends/jax/src/utils.cpp @@ -16,6 +16,7 @@ namespace jax { void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs) { auto inputs = context.inputs(); FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= min_inputs, "Got less inputs than expected"); + FRONT_END_OP_CONVERSION_CHECK(inputs.size() <= max_inputs, "Got more inputs than expected"); } void num_inputs_check(const NodeContext& context, size_t min_inputs) { diff --git a/tests/layer_tests/jax_tests/test_argmax.py b/tests/layer_tests/jax_tests/test_argmax.py new file mode 100644 index 00000000000000..372aede2b4ba33 --- /dev/null +++ b/tests/layer_tests/jax_tests/test_argmax.py @@ -0,0 +1,62 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from jax import lax +from jax import numpy as jnp + +from jax_layer_test_class import JaxLayerTest + +rng = np.random.default_rng(706670) + + +class TestArgmax(JaxLayerTest): + def _prepare_input(self): + if np.issubdtype(self.input_type, np.floating): + x = rng.uniform(-5.0, 5.0, + self.input_shape).astype(self.input_type) + elif np.issubdtype(self.input_type, np.signedinteger): + x = rng.integers(-8, 8, self.input_shape).astype(self.input_type) + else: + x = rng.integers(0, 8, self.input_shape).astype(self.input_type) + + if self.input_duplicate: + x = np.concatenate((x, x), axis=self.axis) + + x = jnp.array(x) + return [x] + + def create_model(self, input_shape, axis, input_type, index_dtype, input_duplicate): + self.input_shape = input_shape + self.axis = axis + self.input_type = input_type + self.input_duplicate = input_duplicate + + def jax_argmax(inp): + out = lax.argmax(inp, axis, index_dtype) + return out + + return jax_argmax, None, 'argmax' + + # Only [0, rank - 1] are valid axes for lax.argmax + @pytest.mark.parametrize('input_shape, axis', [([64], 0), + ([64, 16], 0), + ([64, 16], 1), + ([48, 23, 54], 0), + ([48, 23, 54], 1), + ([48, 23, 54], 2), + ([2, 18, 32, 25], 0), + ([2, 18, 32, 25], 1), + ([2, 18, 32, 25], 2), + ([2, 18, 32, 25], 3)]) + @pytest.mark.parametrize('input_type', [np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64, + np.float16, np.float32, np.float64]) + @pytest.mark.parametrize("index_dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("input_duplicate", [False, True]) + @pytest.mark.nightly + @pytest.mark.precommit_jax_fe + def test_argmax(self, ie_device, precision, ir_version, input_shape, axis, input_type, index_dtype, input_duplicate): + self._test(*self.create_model(input_shape, axis, input_type, index_dtype, input_duplicate), + ie_device, precision, ir_version) From 2be7e5f945a2eb116181a670ebb338a8fa533f6a Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Sun, 20 Oct 2024 21:50:57 -0700 Subject: [PATCH 075/112] [GPU] Fix fused op macro for dynamic shape eltwise fusing into convolution, fix deconvolution attribute kernel param when 1d (#27010) ### Details: - fix fused op input load macro for convolution with dynamic shape eltwise fusing - fix deconvolution kernel stride, pad, dilation axis extension for 1d ### Tickets: - 152406 --- .../src/graph/impls/ocl/deconvolution.cpp | 16 +- .../intel_gpu/src/kernel_selector/jitter.cpp | 16 +- .../convolution_backprop_data.cpp | 36 +++ .../dynamic/convolution.cpp | 253 ++++++++++++++++++ .../dynamic/convolution_backprop_data.cpp | 49 +++- 5 files changed, 358 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp index 5e3462a6256364..95bd66867c1b8f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/deconvolution.cpp @@ -5,6 +5,7 @@ #include "primitive_base.hpp" #include "deconvolution_inst.h" +#include "intel_gpu/plugin/common_utils.hpp" #include "deconvolution/deconvolution_kernel_selector.h" #include "deconvolution/deconvolution_kernel_base.h" @@ -54,19 +55,16 @@ struct deconvolution_impl : typed_primitive_impl_ocl { params.filterSize = { kx, ky, kz }; - uint32_t pad_z = std::max(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0); - uint32_t pad_y = std::max(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0); - uint32_t pad_x = std::max(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0); + uint32_t pad_x, pad_y, pad_z; + std::tie(pad_x, pad_y, pad_z) = ov::intel_gpu::get_xyz(pad, 0); params.padding = {pad_x, pad_y, pad_z}; - uint32_t stride_z = stride.size() >= 3 ? static_cast(stride[stride.size() - 3]) : 1; - uint32_t stride_y = stride.size() >= 2 ? static_cast(stride[stride.size() - 2]) : 1; - uint32_t stride_x = stride.size() >= 1 ? static_cast(stride[stride.size() - 1]) : 1; + uint32_t stride_x, stride_y, stride_z; + std::tie(stride_x, stride_y, stride_z) = ov::intel_gpu::get_xyz(stride, 1); params.stride = {stride_x, stride_y, stride_z}; - uint32_t dilation_z = dilation.size() >= 3 ? static_cast(dilation[dilation.size() - 3]) : 1; - uint32_t dilation_y = dilation.size() >= 2 ? static_cast(dilation[dilation.size() - 2]) : 1; - uint32_t dilation_x = dilation.size() >= 1 ? static_cast(dilation[dilation.size() - 1]) : 1; + uint32_t dilation_x, dilation_y, dilation_z; + std::tie(dilation_x, dilation_y, dilation_z) = ov::intel_gpu::get_xyz(dilation, 1); params.dilation = {dilation_x, dilation_y, dilation_z}; return params; diff --git a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp index 480282b6060f16..33d13429fdcf3f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/jitter.cpp @@ -2197,7 +2197,21 @@ std::string FusedOpsCodeGenerator::GetJitLoad(const FusedOpsConfiguration& conf, if (vec_size > 1) { return block_read; - } else if (input_tensor.LogicalSize() > 1) { + } + + bool multiple_elements = false; + // For dynamic shape input tensor, check any one of static dimension has more than one element. + if (input_tensor.is_dynamic()) { + for (auto dim : input_tensor.GetDims()) { + auto v = dim.v; + if (v > 1) { + multiple_elements = true; + break; + } + } + } + + if (input_tensor.LogicalSize() > 1 || multiple_elements) { // Currently we assume that in such scenario we can safely load sub_group_size elements from the pointer return Broadcast(block_read, input_dt, vec_size); } else { diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index f379b29ce23389..489f4096795361 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -211,4 +211,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::Values(ov::test::utils::DEVICE_GPU)), ConvolutionBackpropDataLayerTest::getTestCaseName); +const std::vector numOutChannels1d = {256}; + +/* ============= 1D ConvolutionBackpropData ============= */ +const std::vector netPrecisions1D = { + ov::element::f32 +}; + +const std::vector> inputShapes1D = {{{1, 512, 577}}}; +const std::vector> kernels1D = {{16}}; +const std::vector> strides1D = {{8}}; +const std::vector> padBegins1D = {{4}}; +const std::vector> padEnds1D = {{4}}; +const std::vector> dilations1D = {{1}}; + + +const std::vector> outputPadding1D = {{0}}; + +const auto conv1DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels1D), + ::testing::ValuesIn(strides1D), + ::testing::ValuesIn(padBegins1D), + ::testing::ValuesIn(padEnds1D), + ::testing::ValuesIn(dilations1D), + ::testing::ValuesIn(numOutChannels1d), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding1D) +); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData1D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv1DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions1D), + ::testing::ValuesIn(ov::test::static_shapes_to_test_representation(inputShapes1D)), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp index 093fca68b482fa..216a1b397c90bc 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution.cpp @@ -4,6 +4,7 @@ #include "common_test_utils/ov_tensor_utils.hpp" #include "common_test_utils/node_builders/activation.hpp" #include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/node_builders/eltwise.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "shared_test_classes/single_op/convolution.hpp" @@ -317,4 +318,256 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic3DAsymPad, Convolu ::testing::Values(false)), ConvolutionLayerGPUTestDynamic::getTestCaseName); +typedef std::tuple< + convSpecificParams, + ov::element::Type, // Model type + std::vector, // Input shapes + std::string, // Device name + bool // activation fusing +> convLayerFusingTestParamsSet; + + +class ConvolutionLayerGPUTestDynamicEltwiseFusing : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + convSpecificParams convParams; + ov::element::Type model_type; + std::vector inputShapes; + std::string targetDevice; + bool activationFusing; + std::tie(convParams, model_type, inputShapes, targetDevice, activationFusing) = obj.param; + + ov::op::PadType padType; + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + + std::ostringstream result; + for (const auto& inputShape : inputShapes) { + result << "IS="; + result << ov::test::utils::partialShape2str({inputShape.first}) << "_"; + result << "TS=("; + for (const auto& shape : inputShape.second) { + result << ov::test::utils::vec2str(shape) << "_"; + } + } + result << ")_"; + result << "K" << ov::test::utils::vec2str(kernel) << "_"; + result << "S" << ov::test::utils::vec2str(stride) << "_"; + result << "PB" << ov::test::utils::vec2str(padBegin) << "_"; + result << "PE" << ov::test::utils::vec2str(padEnd) << "_"; + result << "D=" << ov::test::utils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << model_type << "_"; + result << "trgDev=" << targetDevice << "_"; + result << "activationFusing=" << activationFusing; + + return result.str(); + } + +protected: + void SetUp() override { + convSpecificParams convParams; + std::vector inputShapes; + auto model_type = ov::element::undefined; + bool activationFusing; + std::tie(convParams, model_type, inputShapes, targetDevice, activationFusing) = this->GetParam(); + + init_input_shapes({inputShapes}); + + ov::op::PadType padType; + std::vector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + + ov::ParameterVector inputParams; + for (auto&& shape : inputDynamicShapes) + inputParams.push_back(std::make_shared(model_type, shape)); + + auto convolutionNode = ov::test::utils::make_convolution(inputParams.front(), model_type, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + if (activationFusing) { + auto activationNode = ov::test::utils::make_activation(convolutionNode, model_type, ov::test::utils::ActivationTypes::Relu); + auto eltwiseNode = ov::test::utils::make_eltwise(inputParams.back(), activationNode, ov::test::utils::EltwiseTypes::ADD); + + ov::ResultVector results; + for (size_t i = 0; i < eltwiseNode->get_output_size(); i++) + results.push_back(std::make_shared(eltwiseNode->output(i))); + + function = std::make_shared(results, inputParams, "Convolution"); + } else { + auto eltwiseNode = ov::test::utils::make_eltwise(inputParams.back(), convolutionNode, ov::test::utils::EltwiseTypes::ADD); + + ov::ResultVector results; + for (size_t i = 0; i < eltwiseNode->get_output_size(); i++) + results.push_back(std::make_shared(eltwiseNode->output(i))); + + function = std::make_shared(results, inputParams, "Convolution"); + } + } +}; + +TEST_P(ConvolutionLayerGPUTestDynamicEltwiseFusing, Inference) { + run(); +} +const std::vector> dynInputShapes1D_test = { + { + { + {1, 192, ov::Dimension::dynamic()}, + {{1, 192, 191}} + }, + { + {1, 192, ov::Dimension::dynamic()}, + {{1, 192, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 257}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 257}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 257}} + }, + { + {1, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 192, ov::Dimension::dynamic()}, + {{1, 192, 1}} + }, + { + {1, ov::Dimension::dynamic(), ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_0, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(192), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); + +const std::vector> dynInputShapes1D_test1 = { + { + { + {1, 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 512, ov::Dimension::dynamic()}, + {{1, 512, 1}} + } + }, + { + { + {ov::Dimension::dynamic(), 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 191}} + } + }, + { + { + {ov::Dimension::dynamic(), 512, ov::Dimension::dynamic()}, + {{1, 512, 191}} + }, + { + {1, 1, ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_1, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(512), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test1), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); + +const std::vector> dynInputShapes1D_test2 = { + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 1}} + } + }, + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {ov::Dimension::dynamic(), 1, ov::Dimension::dynamic()}, + {{1, 1, 191}} + } + }, + { + { + {1, 2048, ov::Dimension::dynamic()}, + {{1, 2048, 191}} + }, + { + {ov::Dimension::dynamic(), 1, ov::Dimension::dynamic()}, + {{1, 1, 1}} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionLayerGPUTest_dynamic1D_test_2, ConvolutionLayerGPUTestDynamicEltwiseFusing, + ::testing::Combine( + ::testing::Combine( + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{1}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{0}), + ::testing::Values(std::vector{1}), + ::testing::Values(2048), + ::testing::Values(ov::op::PadType::EXPLICIT)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn(dynInputShapes1D_test2), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(false)), + ConvolutionLayerGPUTestDynamicEltwiseFusing::getTestCaseName); } // namespace diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp index 6b255c9981c08a..98176acfc9bdc7 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/convolution_backprop_data.cpp @@ -102,8 +102,8 @@ class DeconvolutionLayerGPUTest : public testing::WithParamInterface> emptyOutputPadding1d = { {0} }; + +/* ============= Deconvolution params ============= */ +const std::vector numOutChannels1d = { 256 }; + +/* ============= Deconvolution params (1D) ============= */ +const std::vector> kernels1d = { {16} }; +const std::vector> strides1d = { {8} }; +const std::vector> padBegins1d = { {4} }; +const std::vector> padEnds1d = { {4} }; +const std::vector> dilations1d = { {1} }; + +/* ============= Deconvolution (1D) ============= */ +const auto convParams_ExplicitPadding_1D = ::testing::Combine( + ::testing::ValuesIn(kernels1d), + ::testing::ValuesIn(strides1d), + ::testing::ValuesIn(padBegins1d), + ::testing::ValuesIn(padEnds1d), + ::testing::ValuesIn(dilations1d), + ::testing::ValuesIn(numOutChannels1d), + ::testing::Values(ov::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding1d) +); + +const std::vector dyn_1D_inputs_smoke = { + DeconvInputData{ + InputShape{{1, 512, -1}, {{1, 512, 577}}}, + ov::test::utils::InputLayerType::CONSTANT, + {} + }, +}; + +const std::vector netPrecisions1D = { + ov::element::f32 +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Deconv_1D_Dynamic_FP32, DeconvolutionLayerGPUTest, + ::testing::Combine( + convParams_ExplicitPadding_1D, + ::testing::ValuesIn(dyn_1D_inputs_smoke), + ::testing::ValuesIn(netPrecisions1D), + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::Values(emptyAdditionalConfig)), + DeconvolutionLayerGPUTest::getTestCaseName); } // namespace From d0056bd5154d556b748d1e4d2d6bebc62c25444a Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Mon, 21 Oct 2024 07:21:06 +0200 Subject: [PATCH 076/112] [CPU][ARM] Enable fast math in ACL deconvolution executor (#26615) ### Details: - ACL deconvolution `fast_math` option is enabled on `PERFORMANCE` mode. - This option enables fast math computation in ACL. In case this flag were set, ACL could dispatch the fastest implementation available which may introduce a drop of accuracy as well. - Accuracy testing on dataset subset highlights some deviations from reference values. Results are attached to the ticket. ### Tickets: - CVS-152534 --- src/plugins/intel_cpu/src/config.cpp | 6 ++++++ src/plugins/intel_cpu/src/config.h | 3 +++ src/plugins/intel_cpu/src/nodes/deconv.cpp | 3 +++ .../intel_cpu/src/nodes/executors/acl/acl_deconv.cpp | 5 +++-- src/plugins/intel_cpu/src/nodes/executors/deconv.hpp | 3 +++ 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 2b9cdcc4ac1203..421dca07747932 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -400,6 +400,12 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { inferencePrecision = ov::element::undefined; } } + // enable ACL fast math in PERFORMANCE mode +#if defined(OV_CPU_WITH_ACL) + if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { + aclFastMath = true; + } +#endif // disable dynamic quantization and kv quantization for best accuracy if (executionMode == ov::hint::ExecutionMode::ACCURACY) { if (!fcDynamicQuantizationGroupSizeSetExplicitly) { diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index eeb8e78f5fa91a..79cdf3a5e827ec 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -53,6 +53,9 @@ struct Config { uint64_t fcDynamicQuantizationGroupSize = 32; ov::element::Type kvCachePrecision = ov::element::f16; bool fcDynamicQuantizationGroupSizeSetExplicitly = false; +#if defined(OV_CPU_WITH_ACL) + bool aclFastMath = false; +#endif #if defined(OPENVINO_ARCH_X86_64) size_t rtCacheCapacity = 5000ul; #else diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 57046a0a06d55b..8a7f95268b4f3a 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -219,6 +219,9 @@ Deconvolution::Deconvolution(const std::shared_ptr& op, for (size_t i = 0; i < deconvAttrs.dilation.size(); i++) { deconvAttrs.kernel.push_back(weightDims[withGroups + 2 + i]); } +#if defined(OV_CPU_WITH_ACL) + deconvAttrs.aclFastMath = context->getConfig().aclFastMath; +#endif externOutShape = inputShapes.size() == 3; biasPort = externOutShape ? 3 : 2; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp index 1345451669bdec..7d400bf96d7cb0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp @@ -99,7 +99,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs, deconv = std::make_unique(); configureThreadSafe([&] { - deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info); + deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info, deconvAttrs.aclFastMath); }); return true; } @@ -271,7 +271,8 @@ bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs, &weiTensorInfo, deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr, &dstTensorInfo, - deconv_info); + deconv_info, + deconvAttrs.aclFastMath); if (!status) { DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description()); return false; diff --git a/src/plugins/intel_cpu/src/nodes/executors/deconv.hpp b/src/plugins/intel_cpu/src/nodes/executors/deconv.hpp index 9528e5a5ef03e0..c632cc0cf99ad1 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/deconv.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/deconv.hpp @@ -22,6 +22,9 @@ struct DeconvAttrs { std::vector paddingR; ov::CoordinateDiff outputPadding; bool withBiasesParam = false; +#if defined(OV_CPU_WITH_ACL) + bool aclFastMath = false; +#endif }; class DeconvExecutor { From 0064022e0662b1fd8d169c2085b84271819939fb Mon Sep 17 00:00:00 2001 From: Xiuchuan Zhai Date: Mon, 21 Oct 2024 14:25:50 +0800 Subject: [PATCH 077/112] fix the coverity (#26963) ### Details: - *item1* - *...* ### Tickets: - *153061* --- .../intel_cpu/src/nodes/fullyconnected.cpp | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 5d2b6fd9b50212..7f6ed99b1173d7 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -107,13 +107,14 @@ void FullyConnected::needPrepareParamsForTensorParallel() { if (dim < 0) { dim += dims.size(); } - assert(static_cast(dims[dim]) >= tp_cfg.w_size); + OPENVINO_ASSERT(static_cast(dims[dim]) >= tp_cfg.w_size, + getName() + " dim[" + std::to_string(dim) + "] is " + std::to_string(dims[dim]) + ", which is larger than w_size " + std::to_string(tp_cfg.w_size)); auto splited_dim_vec = split_parts(dims[dim], tp_cfg.w_size); - VectorDims new_dims = dims; + VectorDims new_dims = std::move(dims); new_dims[dim] = splited_dim_vec[tp_cfg.w_rank]; auto memory_desc = dst_desc->cloneWithNewDims(new_dims, true); - tp_cfg.cached_dst->redefineDesc(memory_desc); + tp_cfg.cached_dst->redefineDesc(std::move(memory_desc)); memory[ARG_DST] = tp_cfg.cached_dst; } } @@ -133,6 +134,7 @@ void FullyConnected::prepareParams() { void FullyConnected::initTensorParallelSync() { if (tp_cfg.enable_tensor_parallel) { tp_cfg.id = tp_cfg.sub_memory->get_memory_id(tp_cfg.w_rank); + OPENVINO_ASSERT(tp_cfg.id > 0, "Tensor Parallel Config ID cannot be negative."); tp_cfg.sub_memory->set_memory_used(tp_cfg.id, tp_cfg.w_rank); while (true) { std::lock_guard lock(tp_cfg.sub_memory->_flagMutex); @@ -155,7 +157,7 @@ void FullyConnected::execTensorParallelSync() { auto dst = getDstMemoryAtPort(0); auto dst_ptr = static_cast(dst->getData()); - auto shape = dst->getShape(); + auto& shape = dst->getShape(); auto dims = shape.getDims(); auto prec = dst->getPrecision(); @@ -240,6 +242,10 @@ bool FullyConnected::canFuse(const NodePtr& node) const { #endif if (node->getType() == Type::FakeQuantize) { auto* fq = dynamic_cast(node.get()); + if (!fq) { + DEBUG_LOG("Invalid dynamic_cast FakeQuantize pointer"); + return false; + } if (fq->getBroadcastingPolicy() != FakeQuantize::BroadcastingPolicy::PerTensor) { const auto& dstShape = getOutputShapeAtPort(0); auto dataRanks = dstShape.getRank(); @@ -377,7 +383,7 @@ void FullyConnected::needUpdateDQScaleForTensorParallel(std::vector& dequ auto split_offset = tp_cfg.w_rank * split_lens[0]; std::vector newDQScales(split_lens[tp_cfg.w_rank]); std::copy(DQScales.begin() + split_offset, DQScales.begin() + split_offset + split_lens[tp_cfg.w_rank], newDQScales.begin()); - dequantizationScales = newDQScales; + dequantizationScales = std::move(newDQScales); } } @@ -448,21 +454,21 @@ void FullyConnected::needSplitMemoryForTensorParallel() { memory[ARG_SRC] = getSrcMemoryAtPort(DATA_ID); // wgt // split N direction - tp_cfg.cached_splited_weight = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size) - : split_horizontal(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size); + tp_cfg.cached_splited_weight = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), std::move(wgt), 0, tp_cfg.w_rank, tp_cfg.w_size) + : split_horizontal(context->getEngine(), std::move(wgt), 0, tp_cfg.w_rank, tp_cfg.w_size); memory[ARG_WEI] = tp_cfg.cached_splited_weight; // bias if (attrs.withBias) { auto bias = getSrcMemoryAtPort(BIAS_ID); - auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size); - tp_cfg.cached_splited_bias = select_bias; + auto select_bias = split_horizontal(context->getEngine(), std::move(bias), 0, tp_cfg.w_rank, tp_cfg.w_size); + tp_cfg.cached_splited_bias = std::move(select_bias); } else { tp_cfg.cached_splited_bias = MemoryDescUtils::makeEmptyMemory(context); } memory[ARG_BIAS] = tp_cfg.cached_splited_bias; // dst memory[ARG_DST] = getDstMemoryAtPort(0); - tp_cfg.cached_dst = split_horizontal(context->getEngine(), dst, -1, tp_cfg.w_rank, tp_cfg.w_size, false); + tp_cfg.cached_dst = split_horizontal(context->getEngine(), std::move(dst), -1, tp_cfg.w_rank, tp_cfg.w_size, false); } } @@ -471,7 +477,7 @@ void FullyConnected::needUpdateTensorParalelConfig() { // 1. weight shape is dynamic // 2. last dim can be splited. if (tp_cfg.enable_tensor_parallel) { - auto shape = getSrcMemoryAtPort(WEIGHTS_ID)->getShape(); + auto& shape = getSrcMemoryAtPort(WEIGHTS_ID)->getShape(); if (shape.isDynamic()) { tp_cfg.enable_tensor_parallel = false; } else if (shape.getDims()[0] < static_cast(tp_cfg.w_size)) { @@ -520,8 +526,8 @@ void FullyConnected::needUpdateScaleForTensorParallel() { void FullyConnected::needSplitScaleForTensorParallel(const MemoryCPtr& memory) { if (tp_cfg.enable_tensor_parallel && !tp_cfg.cached_scale) { auto scale_mem = std::const_pointer_cast(memory); - tp_cfg.cached_scale = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size) - : split_horizontal(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size); + tp_cfg.cached_scale = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), std::move(scale_mem), 0, tp_cfg.w_rank, tp_cfg.w_size) + : split_horizontal(context->getEngine(), std::move(scale_mem), 0, tp_cfg.w_rank, tp_cfg.w_size); } } @@ -536,7 +542,7 @@ void FullyConnected::needSplitZeroPointForTensorParallel(const MemoryCPtr& memor auto zeropoint_mem = std::const_pointer_cast(memory); auto element_num = memory->getSize() / memory->getPrecision().size(); if (element_num == 1) { - tp_cfg.cached_zeropoint = zeropoint_mem; + tp_cfg.cached_zeropoint = std::move(zeropoint_mem); } else { tp_cfg.cached_zeropoint = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size) : split_horizontal(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size); From 96595eb516b1ec9008b9d626b0b8d50bae76bc09 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 07:14:37 +0000 Subject: [PATCH 078/112] Bump actions/upload-artifact from 4.4.0 to 4.4.3 (#27151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.0 to 4.4.3.

Release notes

Sourced from actions/upload-artifact's releases.

v4.4.3

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.2...v4.4.3

v4.4.2

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.1...v4.4.2

v4.4.1

What's Changed

New Contributors

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.4.0...v4.4.1

Commits
  • b4b15b8 Merge pull request #632 from actions/joshmgross/undo-dependency-changes
  • 92b01eb Undo indirect dependency updates from #627
  • 8448086 Merge pull request #627 from actions/robherley/v4.4.2
  • b1d4642 add explicit relative and absolute symlinks to workflow
  • d50e660 bump version
  • aabe6f8 build with @​actions/artifact v2.1.11
  • 604373d Merge pull request #625 from actions/robherley/artifact-2.1.10
  • 0150148 paste right core version
  • a009b25 update licenses
  • 9f6f6f4 update @​actions/core and @​actions/artifact to latest versions
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4.4.0&new-version=4.4.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/linux_sanitizers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index f13f3765d4f353..e098b637150834 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -206,7 +206,7 @@ jobs: # - name: Upload sccache log if: ${{ always() }} - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: sccache_log_${{ matrix.SANITIZER }} path: ${{ env.SCCACHE_ERROR_LOG }} From 9ea3beae8bc6660fc948546195c8d68e315a5639 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 21 Oct 2024 09:23:23 +0200 Subject: [PATCH 079/112] [TESTS] Fix retry mechanism to raise if retry didn't help (#27139) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../py_frontend_tests/test_torchvision_preprocessor.py | 4 +++- tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py index 94060bf982ad96..1ec25f6c07f500 100644 --- a/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py +++ b/tests/layer_tests/py_frontend_tests/test_torchvision_preprocessor.py @@ -32,15 +32,17 @@ def forward(self, data): def _infer_pipelines(test_input, preprocess_pipeline, input_channels=3): retries = 0 max_retries = 3 + last_e = None while retries < max_retries: try: return _infer_pipelines_impl(test_input, preprocess_pipeline, input_channels) except RuntimeError as e: # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") + last_e = e retries += 1 else: - print("Max retries reached. Function execution failed.") + raise RuntimeError("Max retries reached. Function execution failed.") from last_e def _infer_pipelines_impl(test_input, preprocess_pipeline, input_channels=3): diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 4d1582f0061d59..a44ca8c0117a4b 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -73,19 +73,21 @@ def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_ti **kwargs): retries = 0 max_retries = 3 + last_e = None while retries < max_retries: try: return self._test_impl(model, ref_net, kind, ie_device, precision, ir_version, infer_timeout, dynamic_shapes, **kwargs) except RuntimeError as e: # This is a potentially sporadic issue print(f"An error occurred: {e}. Retrying...") + last_e = e retries += 1 else: - print("Max retries reached. Function execution failed.") + raise RuntimeError("Max retries reached. Function execution failed.") from last_e def _test_impl(self, model, ref_net, kind, ie_device, precision, ir_version, infer_timeout=60, dynamic_shapes=True, - **kwargs): + **kwargs): """ :param enabled_transforms/disabled_transforms: string with idxs of transforms that should be enabled/disabled. Example: "transform_1,transform_2" From ebdf1fc088c02de34d3ed4fd9d411e877fa604e0 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Mon, 21 Oct 2024 15:28:23 +0800 Subject: [PATCH 080/112] [CPU] Fuse SDPA before/after Reshape+Transpose Node to SDPA (#26819) ### Details: - *Pattern: QKV_Reshape -> QKV_Transpose -> SDPA->OUT_Transpse->OUT_Reshape* - *Fuse this pattern to: SDPA* - *This hotspot can be observed after https://github.com/openvinotoolkit/openvino/pull/26130, this PR's implementation doesn't depend on it.* ### Tickets: - *153616* --------- Signed-off-by: xipingya --- src/plugins/intel_cpu/src/cpu_types.cpp | 1 + src/plugins/intel_cpu/src/extension.cpp | 1 + .../intel_cpu/src/nodes/scaled_attn.cpp | 49 +++- .../cpu_opset/common/op/sdpa.cpp | 42 +++ .../cpu_opset/common/op/sdpa.hpp | 39 ++- .../x64/pass/sdpa_fuse_transpose_reshape.cpp | 188 ++++++++++++++ .../x64/pass/sdpa_fuse_transpose_reshape.hpp | 18 ++ .../transformation_pipeline.cpp | 2 + .../x64/fuse_reshape_transpose_to_sdpa.cpp | 245 ++++++++++++++++++ 9 files changed, 569 insertions(+), 16 deletions(-) create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/fuse_reshape_transpose_to_sdpa.cpp diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 8b4ffaefcabfd3..fad6613f36b6cb 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -245,6 +245,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"Ngram", Type::Ngram}, {"ScaledDotProductAttention", Type::ScaledDotProductAttention}, {"ScaledDotProductAttentionWithKVCache", Type::ScaledDotProductAttention}, + {"SDPAWithTransposeReshape", Type::ScaledDotProductAttention}, {"PagedAttentionExtension", Type::PagedAttention}, {"RoPE", Type::RoPE}, {"GatherCompressed", Type::Gather}, diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index f2256d9d03df15..a29282d4af3101 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -75,6 +75,7 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::intel_cpu::PowerStaticNode) \ OP_EXTENSION(ov::intel_cpu::CausalMaskPreprocessNode) \ OP_EXTENSION(ov::intel_cpu::SwishNode) \ + OP_EXTENSION(ov::intel_cpu::SDPAWithTransposeReshape) \ OP_EXTENSION(ov::intel_cpu::NgramNode) \ OP_EXTENSION(ov::op::internal::GatherCompressed) \ OP_EXTENSION(ov::op::internal::NonMaxSuppressionIEInternal) \ diff --git a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp index e70a3932b11b1e..e229ff4bb72c57 100644 --- a/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp +++ b/src/plugins/intel_cpu/src/nodes/scaled_attn.cpp @@ -866,6 +866,7 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt void execute(dnnl::stream strm, const Config& config, const std::vector& inputs, const MemoryPtr output, const MemoryPtr presentk_input, const MemoryPtr presentv_input, const MemoryPtr beam_input, const PlainTensor& k_scale_zp, const PlainTensor& v_scale_zp) override { + bool has_in_reshape = config.config.input_BLHxS; bool has_out_transpose = config.config.output_BLHxS; bool fuse_causal_attn = config.config.fuse_causal_attn; bool is_causal = config.config.is_causal; @@ -881,11 +882,28 @@ struct ScaledDotProductAttention::AttentionExecutor : public ScaledDotProductAtt float scale_input = 0.0f; size_t B, L1, L0, S, SV; + // B,L,H*S->B,L,H,S + auto get_reshape_shape = [&config](const PlainTensor& input) { + // [B,L,H*S] + auto inp_shape = input.shape(); + // [B,L,H,S] + return VectorDims{inp_shape[0], inp_shape[1], config.config.order_HS[0], config.config.order_HS[1]}; + }; + q_input.reset(inputs[0]); k_input.reset(inputs[1]); v_input.reset(inputs[2]); present_key.reset(presentk_input); present_value.reset(presentv_input); + if (has_in_reshape) { + q_input = q_input.reshape(get_reshape_shape(q_input)); + auto kv_shape = get_reshape_shape(k_input); + k_input = k_input.reshape(kv_shape); + v_input = v_input.reshape(kv_shape); + present_key = present_key.reshape(kv_shape); + present_value = present_value.reshape(kv_shape); + } + if (beam_input) beam_table.reset(beam_input); if (input_num > 3) { @@ -985,11 +1003,11 @@ ScaledDotProductAttention::ScaledDotProductAttention(const std::shared_ptr(op); - if (node) { + if (const auto node = std::dynamic_pointer_cast(op)) { m_config.config.is_causal = node->get_causal(); - } else { - const auto node = std::dynamic_pointer_cast(op); + } else if (const auto node = std::dynamic_pointer_cast(op)) { + m_config.config = node->get_config(); + } else if (const auto node = std::dynamic_pointer_cast(op)) { m_config.config = node->get_config(); } } @@ -1142,17 +1160,28 @@ void ScaledDotProductAttention::execute(dnnl::stream strm) { bool ScaledDotProductAttention::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + auto sdpaWithTransposeReshapeOp = std::dynamic_pointer_cast(op); if (!std::dynamic_pointer_cast(op) && - !std::dynamic_pointer_cast(op)) { - errorMessage = "Only ScaledDotProductAttention or ScaledDotProductAttentionWithKVCache operation are supported"; + !std::dynamic_pointer_cast(op) && !sdpaWithTransposeReshapeOp) { + errorMessage = "Only ScaledDotProductAttention, ScaledDotProductAttentionWithKVCache or " + "SDPAWithTransposeReshape operation are supported"; return false; } - // expect shape of q: [B, H, L, S] auto inRank = op->get_input_partial_shape(0).size(); - if (inRank != 4u) { - errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(inRank); - return false; + if (sdpaWithTransposeReshapeOp) { + // inRank expect shape of q: [B, L, H*S] + if (inRank != 3u) { + errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(inRank); + return false; + } + } else { + // inRank expect shape of q: [B, H, L, S] + if (inRank != 4u) { + errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(inRank); + return false; + } } + int orgSDPAInput = static_cast(op->get_input_size()); const auto node = std::dynamic_pointer_cast(op); if (node) { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp index 4421499d10204d..bea56e2b8c833f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.cpp @@ -99,4 +99,46 @@ bool ov::intel_cpu::ScaledDotProductAttentionWithKVCache::visit_attributes(ov::A visitor.on_attribute("permute_axes", m_config.permute_axes); visitor.finish_structure(); return true; +} + +ov::intel_cpu::SDPAWithTransposeReshape::SDPAWithTransposeReshape(const OutputVector& args, const Config& cfg) + : Op(args), + m_config(cfg) {} + +std::shared_ptr ov::intel_cpu::SDPAWithTransposeReshape::clone_with_new_inputs( + const ov::OutputVector& new_args) const { + INTERNAL_OP_SCOPE(SDPAWithTransposeReshape_with_new_inputs); + check_new_args_count(this, new_args); + return std::make_shared(new_args, m_config); +} + +void ov::intel_cpu::SDPAWithTransposeReshape::validate_and_infer_types() { + INTERNAL_OP_SCOPE(SDPAWithTransposeReshape_validate_and_infer_types); + // [B,L,H*S] + auto q_ps = get_input_partial_shape(0); + auto output_ps = q_ps; + NODE_VALIDATION_CHECK(this, m_config.output_BLHxS == true); + NODE_VALIDATION_CHECK(this, m_config.input_BLHxS == true); + NODE_VALIDATION_CHECK(this, q_ps.size() == 3u); + + // permute_axes should be [B, H, L, S] + const auto& permute_axes = this->m_config.permute_axes; + NODE_VALIDATION_CHECK(this, permute_axes.size() == 4u); + + // order_HS should be [H,S] + const auto& order_HS = this->m_config.order_HS; + NODE_VALIDATION_CHECK(this, order_HS.size() == 2u); + + set_output_type(0, get_input_element_type(0), output_ps); +} + +bool ov::intel_cpu::SDPAWithTransposeReshape::visit_attributes(ov::AttributeVisitor& visitor) { + INTERNAL_OP_SCOPE(SDPAWithTransposeReshape_visit_attributes); + visitor.start_structure("config"); + visitor.on_attribute("input_BLHxS", m_config.input_BLHxS); + visitor.on_attribute("output_BLHxS", m_config.output_BLHxS); + visitor.on_attribute("permute_axes", m_config.permute_axes); + visitor.on_attribute("order_HS", m_config.order_HS); + visitor.finish_structure(); + return true; } \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.hpp index 8fe1c9ce4ffa19..8c811f16262734 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/sdpa.hpp @@ -21,13 +21,15 @@ class ScaledDotProductAttentionWithKVCache : public ov::op::Op { ScaledDotProductAttentionWithKVCache() = default; struct Config { - bool output_BLHxS = false; // true implies that output is [B,L,H*S] + bool input_BLHxS = false; // true implies that input is [B,L,H*S] + bool output_BLHxS = false; // true implies that output is [B,L,H*S] - bool fuse_causal_attn = false; // fuse causal mask and attn mask into attn_mask - bool is_causal = false; // apply causal mask internally - bool fuse_concat = false; // fuse (concat->sdp) ==> sdp - std::vector permute_axes; // not empty means input has transpose. output of permutation is [B,H,L,S] - // e.g. [L,B,H,S] -> permute[1, 2, 0, 3] ->[B, H, L, S] + bool fuse_causal_attn = false; // fuse causal mask and attn mask into attn_mask + bool is_causal = false; // apply causal mask internally + bool fuse_concat = false; // fuse (concat->sdp) ==> sdp + std::vector permute_axes; // not empty means input has transpose. output of permutation is [B,H,L,S] + // e.g. [L,B,H,S] -> permute[1, 2, 0, 3] ->[B, H, L, S] + std::vector order_HS; // Reshape[B,L,H*S]->B,L,H,S], H,S are fixed value, when input_BLHxS is true. }; ScaledDotProductAttentionWithKVCache(const OutputVector& args, const Config& cfg); @@ -48,5 +50,30 @@ class ScaledDotProductAttentionWithKVCache : public ov::op::Op { Config m_config; }; +class SDPAWithTransposeReshape : public ov::op::Op { +public: + OPENVINO_OP("SDPAWithTransposeReshape", "cpu_plugin_opset"); + using Config = ScaledDotProductAttentionWithKVCache::Config; + + SDPAWithTransposeReshape() = default; + + SDPAWithTransposeReshape(const OutputVector& args, const Config& cfg); + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + bool visit_attributes(AttributeVisitor& visitor) override; + void validate_and_infer_types() override; + + const Config& get_config() const { + return m_config; + } + + Config& get_config() { + return m_config; + } + +private: + Config m_config; +}; + } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp new file mode 100644 index 00000000000000..3aa0fd0d08e69b --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.cpp @@ -0,0 +1,188 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "sdpa_fuse_transpose_reshape.hpp" + +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/cpu_opset/common/op/sdpa.hpp" + +/* + * Description: SDPA fuse transpose and reshape. + * Original pattern Fused pattern + * + * input1 input2 input3 + * | | | + * q_reshape k_reshape v_reshap + * | | | (qkv transpose and reshape's orders) + * q_transpose k_transpose v_transpose | + * \ | / input1 input2 input3 | + * \ | / \ | / / + * ScaledDotProductAttention ---------> SDPAWithTransposeReshape + * | | + * out_transpose | + * | output + * out_reshpae + * | + * output + */ + +using namespace ov; +using namespace ov::pass::pattern; + +intel_cpu::SDPAFuseTransposeReshape::SDPAFuseTransposeReshape() { + MATCHER_SCOPE(SDPAFuseTransposeReshape); + + auto q_reshape_node = wrap_type({any_input(), any_input()}); + auto k_reshape_node = wrap_type({any_input(), any_input()}); + auto v_reshape_node = wrap_type({any_input(), any_input()}); + + auto q_transpose_order_node = wrap_type(); + auto k_transpose_order_node = wrap_type(); + auto v_transpose_order_node = wrap_type(); + auto q_transpose_node = wrap_type({q_reshape_node, q_transpose_order_node}); + auto k_transpose_node = wrap_type({k_reshape_node, k_transpose_order_node}); + auto v_transpose_node = wrap_type({v_reshape_node, v_transpose_order_node}); + + auto sdpa_node = + wrap_type({q_transpose_node, k_transpose_node, v_transpose_node}); + + auto out_transpose_order_node = wrap_type(); + auto out_transpose_node = wrap_type({sdpa_node, out_transpose_order_node}); + auto out_reshape_node = wrap_type({out_transpose_node, wrap_type()}); + + matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pass::pattern::Matcher& m) { + auto& pattern_map = m.get_pattern_value_map(); + auto sdpa = as_type_ptr(pattern_map.at(sdpa_node).get_node_shared_ptr()); + if (sdpa == nullptr || transformation_callback(sdpa)) { + return false; + } + + // Order=[0, 2, 1, 3] + auto is_expected_transpose = [&](std::shared_ptr& transpose) { + if (transpose) { + const auto orders = as_type_ptr(transpose->get_input_node_shared_ptr(1)); + return orders && (std::vector({0, 2, 1, 3}) == orders->cast_vector()); + } + return false; + }; + + // Reshape [B,L,H*S] -> [B,L,H,S] + auto is_expected_reshape = [&](std::shared_ptr& reshape_node, bool reverse = false) { + if (reshape_node) { + auto inp_shape = reshape_node->get_input_partial_shape(0); + auto outp_shape = reshape_node->get_output_partial_shape(0); + // Expect shape: [?, ?, val] + auto check_dim_3 = [](ov::PartialShape shape) { + return shape.rank().is_static() && shape.rank() == 3 && shape[2].is_static(); + }; + // Expect shape: [?, ?, val, val] + auto check_dim_4 = [](ov::PartialShape shape) { + return shape.rank().is_static() && shape.rank() == 4 && shape[2].is_static() && + shape[3].is_static(); + }; + + if (reverse) { + return check_dim_4(inp_shape) && check_dim_3(outp_shape) && + (outp_shape[2] == inp_shape[2] * inp_shape[3]); + } else { + return check_dim_3(inp_shape) && check_dim_4(outp_shape) && + (inp_shape[2] == outp_shape[2] * outp_shape[3]); + } + } + return false; + }; + + // Pattern: Reshape->Transpose->SDPA + auto q_reshape = as_type_ptr(pattern_map.at(q_reshape_node).get_node_shared_ptr()); + auto k_reshape = as_type_ptr(pattern_map.at(k_reshape_node).get_node_shared_ptr()); + auto v_reshape = as_type_ptr(pattern_map.at(v_reshape_node).get_node_shared_ptr()); + + if (!(is_expected_reshape(q_reshape) && is_expected_reshape(k_reshape) && is_expected_reshape(v_reshape))) { + return false; + } + // K,V Reshape's order should be same node. + auto k_reshape_order = as_type_ptr(k_reshape->get_input_node_shared_ptr(1)); + auto v_reshape_order = as_type_ptr(v_reshape->get_input_node_shared_ptr(1)); + if (k_reshape_order && v_reshape_order) { + if (k_reshape_order->cast_vector() != v_reshape_order->cast_vector()) { + return false; + } + } else if (k_reshape->get_input_node_shared_ptr(1) != v_reshape->get_input_node_shared_ptr(1)) { + return false; + } + + std::shared_ptr qkv_transpose[3] = {}; + std::shared_ptr qkv_transpose_order[3] = {}; + qkv_transpose[0] = as_type_ptr(pattern_map.at(q_transpose_node).get_node_shared_ptr()); + qkv_transpose[1] = as_type_ptr(pattern_map.at(k_transpose_node).get_node_shared_ptr()); + qkv_transpose[2] = as_type_ptr(pattern_map.at(v_transpose_node).get_node_shared_ptr()); + qkv_transpose_order[0] = as_type_ptr(pattern_map.at(q_transpose_order_node).get_node_shared_ptr()); + qkv_transpose_order[1] = as_type_ptr(pattern_map.at(k_transpose_order_node).get_node_shared_ptr()); + qkv_transpose_order[2] = as_type_ptr(pattern_map.at(v_transpose_order_node).get_node_shared_ptr()); + auto out_tranpose = as_type_ptr(pattern_map.at(out_transpose_node).get_node_shared_ptr()); + auto out_transpose_order = as_type_ptr(pattern_map.at(out_transpose_order_node).get_node_shared_ptr()); + + if (!(is_expected_transpose(qkv_transpose[0]) && is_expected_transpose(qkv_transpose[1]) && + is_expected_transpose(qkv_transpose[2]))) { + return false; + } + if (!is_expected_transpose(out_tranpose)) { + return false; + } + + auto out_reshape = as_type_ptr(pattern_map.at(out_reshape_node).get_node_shared_ptr()); + if (!is_expected_reshape(out_reshape, true)) { + return false; + } + + OutputVector args = {q_reshape->get_input_node_shared_ptr(0), + k_reshape->get_input_node_shared_ptr(0), + v_reshape->get_input_node_shared_ptr(0)}; + + // Config + intel_cpu::SDPAWithTransposeReshape::Config config; + config.is_causal = sdpa->get_causal(); + config.fuse_concat = false; + config.output_BLHxS = true; + + // Config::permute_axes + const auto& permute_q = qkv_transpose_order[0]->cast_vector(); + config.permute_axes.resize(permute_q.size()); + for (size_t i = 0; i < permute_q.size(); i++) { + config.permute_axes[i] = static_cast(permute_q[i]); + } + + // Config::order_HS + config.order_HS.resize(2); + auto reshape_out_shape = q_reshape->get_output_partial_shape(0).get_min_shape(); // [?,?,H,S] + config.order_HS[0] = reshape_out_shape[2]; + config.order_HS[1] = reshape_out_shape[3]; + config.input_BLHxS = true; + + auto new_sdpa = std::make_shared(args, config); + new_sdpa->set_friendly_name(sdpa->get_friendly_name() + "/fused_reshape_transpose"); + NodeVector replaced_nodes = {q_reshape, + k_reshape, + v_reshape, + qkv_transpose[0], + qkv_transpose[1], + qkv_transpose[2], + sdpa, + out_tranpose, + out_reshape}; + copy_runtime_info(replaced_nodes, new_sdpa); + ov::replace_node(out_reshape, new_sdpa); + return true; + }; + + auto m = std::make_shared(out_reshape_node, matcher_name); + register_matcher(m, callback); +} diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp new file mode 100644 index 00000000000000..74ba6ec6221d1e --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace intel_cpu { +class SDPAFuseTransposeReshape : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("SDPAFuseTransposeReshape", "0"); + SDPAFuseTransposeReshape(); +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 04808baaebec54..e45b6379d1e968 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -139,6 +139,7 @@ #include "transformations/cpu_opset/common/pass/swap_convert_transpose.hpp" #include "transformations/cpu_opset/common/pass/causal_mask_preprocess_fusion.hpp" #include "transformations/cpu_opset/common/pass/stateful_sdpa_fusion.hpp" +#include "transformations/cpu_opset/x64/pass/sdpa_fuse_transpose_reshape.hpp" // Snippets #include "snippets/pass/tokenization.hpp" @@ -864,6 +865,7 @@ void Transformations::PostLpt() { CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::transpose_sinking::TSShapeOfForward); CPU_REGISTER_PASS_COMMON(postLPTPassManager, StatefulSDPAFusion); + CPU_REGISTER_PASS_X64(postLPTPassManager, ov::intel_cpu::SDPAFuseTransposeReshape); CPU_REGISTER_PASS_X64(postLPTPassManager, ov::pass::RMSFusion, false); CPU_REGISTER_PASS_X64(postLPTPassManager, ov::intel_cpu::DecomposeRMSNorm); CPU_SET_CALLBACK_X64(postLPTPassManager, diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/fuse_reshape_transpose_to_sdpa.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/fuse_reshape_transpose_to_sdpa.cpp new file mode 100644 index 00000000000000..a75156c0f69fcb --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/fuse_reshape_transpose_to_sdpa.cpp @@ -0,0 +1,245 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/include/common_test_utils/ov_tensor_utils.hpp" +#include "openvino/pass/manager.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" +#include "utils/cpu_test_utils.hpp" + +using namespace ov::test; +using namespace CPUTestUtils; + +namespace ov { +namespace test { + +// Subgraph: +/* + * Parameter Parameter + * | | + * Parameter ReadValue ReadValue + * | | \ | \ + * Reshape Reshape Assign Reshape Assign + * | | | + * Transpose Transpoe Transpose + * \ | / + * ScaledDotProductAttention + * | + * Tranpose + * | + * Reshape + * | + * Result + */ + +// +using InputShapeAndReshapeOrder = std::pair, std::vector>; +using FuseSDPAReshapeTransposeTestParams = std::tuple; +class FuseSDPAReshapeTransposeTest : virtual public ov::test::SubgraphBaseTest, + public testing::WithParamInterface, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + ElementType inType; + InputShapeAndReshapeOrder inputShapeAndOrders; + std::tie(inType, inputShapeAndOrders) = obj.param; + std::ostringstream result; + std::vector& inputShapes = inputShapeAndOrders.first; + auto& reshapeOrderHS = inputShapeAndOrders.second; + result << "IS="; + for (const auto& shape : inputShapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << ov::test::utils::vec2str(itr); + } + } + result << ")_"; + } + result << "Prc=" << inType << "_"; + result << "ReshapeOrderHS="; + result << "("; + for (const auto& itr : reshapeOrderHS) { + result << itr << ","; + } + result << ")"; + + return result.str(); + } + + void SetUp() override { + ElementType inType; + InputShapeAndReshapeOrder inputShapeAndOrders; + std::tie(inType, inputShapeAndOrders) = this->GetParam(); + std::vector& inputShapes = inputShapeAndOrders.first; + auto& reshapeOrderHS = inputShapeAndOrders.second; + targetDevice = ov::test::utils::DEVICE_CPU; + rel_threshold = 1e-2f; + configuration[ov::hint::inference_precision.name()] = ov::element::f32; + if (inType == ElementType::bf16) { + configuration[ov::hint::inference_precision.name()] = ov::element::bf16; + rel_threshold = 0.01f; + } + init_input_shapes(inputShapes); + + // pre SDPA reshape->transpose + ov::ParameterVector inputParams(3); + ov::SinkVector sinkNodes; + OutputVector transposes(3); + for (size_t i = 0; i < 3u; i++) { + inputParams[i] = std::make_shared(inType, inputDynamicShapes[0]); + + auto reshape_axis = + ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 0, reshapeOrderHS[0], reshapeOrderHS[1]}); + + std::shared_ptr reshape_input_1 = inputParams[i]; + if (i > 0) { + auto var = std::make_shared( + ov::op::util::VariableInfo{inputDynamicShapes[0], inType, "var_" + std::to_string(i)}); + auto readvalue = std::make_shared(inputParams[i], var); + auto assign = std::make_shared(readvalue, var); + sinkNodes.emplace_back(assign); + reshape_input_1 = readvalue; + } + + auto reshape = std::make_shared(reshape_input_1, reshape_axis, true); + auto transposeOrder = ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3}); + transposes[i] = std::make_shared(reshape, transposeOrder); + } + + auto sdpa = std::make_shared(transposes, false); + sdpa->set_friendly_name("mha"); + + // post SDPA transpose + reshape + auto postOrder = + ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); // BHLS -> BLHS + auto transposeSDPA = std::make_shared(sdpa, postOrder); + + auto constReshape = + ov::op::v0::Constant::create(ov::element::i64, {3}, {0, 0, reshapeOrderHS[0] * reshapeOrderHS[1]}); + auto reshapeSDPA = std::make_shared(transposeSDPA, constReshape, true); // BLHS -> B,L,HxS + + function = std::make_shared(ov::OutputVector{reshapeSDPA}, + sinkNodes, + inputParams, + "FuseSDPAReshapeTranspose"); + targetDevice = ov::test::utils::DEVICE_CPU; + functionRefs = function->clone(); + pass::Manager manager; + // decompose ScaledDotProductAttention + manager.register_pass(); + manager.run_passes(functionRefs); + } + + template + static void strided_iota(IT first, size_t n, T value, T stride) { + for (size_t i = 0; i < n; i++) { + *first++ = value; + value += stride; + } + } + void generate(int idx, const std::vector& targetInputStaticShapes) { + inputs.clear(); + auto create_input = [this] (std::shared_ptr param, ov::Shape shape, float val) { + if (param->get_element_type() == ov::element::i32) { + ov::Tensor t{ov::element::i32, shape}; + auto size = ov::shape_size(shape); + auto* p = static_cast(t.data()); + auto start = static_cast(val); + for (size_t i = 0; i < size; i++) { + p[i] = (start + i) % size; + } + inputs.insert({param, t}); + } else if (param->get_element_type() == ov::element::f32) { + ov::Tensor t{ov::element::f32, shape}; + strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); + inputs.insert({param, t}); + } else { + ASSERT_TRUE(param->get_element_type() == ov::element::bf16); + ov::Tensor t{ov::element::bf16, shape}; + strided_iota(static_cast(t.data()), t.get_size(), val, 0.1f); + inputs.insert({param, t}); + } + }; + // q, k, v + create_input(function->get_parameters()[0], targetInputStaticShapes[0], idx + 1.0f); + create_input(function->get_parameters()[1], targetInputStaticShapes[0], idx + 2.0f); + create_input(function->get_parameters()[2], targetInputStaticShapes[0], idx + 3.0f); + } + void prepare() { + compile_model(); + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + } + void reset() { + for (auto&& state : inferRequest.query_state()) { + state.reset(); + } + } + + std::vector run_test(std::shared_ptr model) { + function = model; + prepare(); + std::vector outputs; + int idx = 0; + for (auto&& shapes : targetStaticShapes) { + generate(idx++, shapes); + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + inferRequest.infer(); + auto outputTensor = inferRequest.get_output_tensor(0); + ov::Tensor copy{outputTensor.get_element_type(), outputTensor.get_shape()}; + outputTensor.copy_to(copy); + outputs.push_back(copy); + reset(); + } + return outputs; + } +}; + +TEST_P(FuseSDPAReshapeTransposeTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + bool reshape_transpose_fused = false; + auto actualOutputs = run_test(function); + CheckNumberOfNodesWithType(compiledModel, "ScaledDotProductAttention", 1); + CheckNumberOfNodesWithType(compiledModel, "Reshape", 0); + CheckNumberOfNodesWithType(compiledModel, "Transpose", 0); + for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) { + if (n->get_friendly_name() == "mha/fused_reshape_transpose") { + reshape_transpose_fused = true; + } + } + ASSERT_TRUE(reshape_transpose_fused); + + auto expectedOutputs = run_test(functionRefs); + for (size_t i = 0; i < actualOutputs.size(); i++) { + ov::test::utils::compare(expectedOutputs[i], actualOutputs[i], abs_threshold, rel_threshold); + } +} + +namespace { +const std::vector inputShapeAndReshapeOrders = { + // + { + {{ + // Q,K,V:[B, L, H*S] + {{-1, -1, 4 * 16}, {{1, 1, 4 * 16}, {1, 2, 4 * 16}, {2, 2, 4 * 16}}}, + }, + // reshapeOrderHS + {4, 16}}, + }}; + +INSTANTIATE_TEST_SUITE_P(smoke_FuseSDPAReshapeTransposeTest, + FuseSDPAReshapeTransposeTest, + ::testing::Combine(::testing::Values(ElementType::f32), + ::testing::ValuesIn(inputShapeAndReshapeOrders)), + FuseSDPAReshapeTransposeTest::getTestCaseName); +} // namespace +} // namespace test +} // namespace ov From 4043e15cc2520f3fec9f0f9d497f6457d8367224 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Mon, 21 Oct 2024 09:38:47 +0200 Subject: [PATCH 081/112] [PyOV] Extend Python API with STFT-15 (#27142) ### Details: - Extend Python API with STFT-15 ### Tickets: - 147160 --- .../src/openvino/runtime/opset15/__init__.py | 1 + .../src/openvino/runtime/opset15/ops.py | 24 +++++++++++++++++++ .../python/tests/test_graph/test_create_op.py | 16 +++++++++++++ 3 files changed, 41 insertions(+) diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 96643a7e93d596..58fd90e7fd1051 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -16,3 +16,4 @@ from openvino.runtime.opset15.ops import bitwise_left_shift from openvino.runtime.opset15.ops import bitwise_right_shift from openvino.runtime.opset15.ops import slice_scatter +from openvino.runtime.opset15.ops import stft diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index 116f63726bfeb6..c278120dab7432 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -303,3 +303,27 @@ def slice_scatter( inputs = as_nodes(data, updates, start, stop, step, axes, name=name) return _get_node_factory_opset15().create("SliceScatter", inputs) + + +@nameable_op +def stft( + data: NodeInput, + window: NodeInput, + frame_size: NodeInput, + frame_step: NodeInput, + transpose_frames: bool, + name: Optional[str] = None, +) -> Node: + """Return a node which generates STFT operation. + + :param data: The node providing input data. + :param window: The node providing window data. + :param frame_size: The node with scalar value representing the size of Fourier Transform. + :param frame_step: The distance (number of samples) between successive window frames. + :param transpose_frames: Flag to set output shape layout. If true the `frames` dimension is at out_shape[2], + otherwise it is at out_shape[1]. + :param name: The optional name for the created output node. + :return: The new node performing STFT operation. + """ + inputs = as_nodes(data, window, frame_size, frame_step, name=name) + return _get_node_factory_opset15().create("STFT", inputs) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index c5023588f5d55b..940f8244f427b8 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -2486,6 +2486,22 @@ def test_slice_scatter(): assert node_default_axes.get_output_shape(0) == data_shape +def test_stft(): + data_shape = [4, 48] + data = ov.parameter(data_shape, name="input", dtype=np.float32) + window = ov.parameter([7], name="window", dtype=np.float32) + frame_size = ov.constant(np.array(11, dtype=np.int32)) + frame_step = ov.constant(np.array(3, dtype=np.int32)) + transpose_frames = True + + op = ov_opset15.stft(data, window, frame_size, frame_step, transpose_frames) + + assert op.get_type_name() == "STFT" + assert op.get_output_size() == 1 + assert op.get_output_element_type(0) == Type.f32 + assert op.get_output_shape(0) == [4, 13, 6, 2] + + def test_parameter_get_attributes(): parameter = ov.parameter([2, 2], dtype=np.float32, name="InputData") parameter_attributes = parameter.get_attributes() From 3f953f4ae4c6d22e6a57f9a51217133b7ce8a529 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 21 Oct 2024 09:44:12 +0200 Subject: [PATCH 082/112] [DOCS] benchmark content restructuring (#26918) --- .../about-openvino/performance-benchmarks.rst | 111 +++---- .../generative-ai-performance.rst | 28 +- .../getting-performance-numbers.rst | 273 ++++++++++++------ .../model-accuracy-int8-fp32.rst | 7 +- .../_static/benchmarks_files/llm_models.csv | 22 ++ .../_static/download/llm_models.csv | 22 -- .../_static/download/llm_models_ovms.csv | 100 ------- 7 files changed, 273 insertions(+), 290 deletions(-) create mode 100644 docs/sphinx_setup/_static/benchmarks_files/llm_models.csv delete mode 100644 docs/sphinx_setup/_static/download/llm_models.csv delete mode 100644 docs/sphinx_setup/_static/download/llm_models_ovms.csv diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 40b94210f6c43d..ed9d39aaf8b9e6 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -16,14 +16,12 @@ Performance Benchmarks Getting Performance Numbers -This page presents benchmark results for +This page presents benchmark results for the `Intel® Distribution of OpenVINO™ toolkit `__ and :doc:`OpenVINO Model Server <../openvino-workflow/model-server/ovms_what_is_openvino_model_server>`, for a representative selection of public neural networks and Intel® devices. The results may help you decide which hardware to use in your applications or plan AI workload for the hardware you have already implemented in your solutions. Click the buttons below to see the chosen benchmark data. -For a more detailed view of performance numbers for generative AI models, check the -:doc:`Generative AI Benchmark Results <./performance-benchmarks/generative-ai-performance>` .. grid:: 1 1 2 2 :gutter: 4 @@ -36,7 +34,7 @@ For a more detailed view of performance numbers for generative AI models, check :outline: :expand: - :material-regular:`bar_chart;1.4em` OpenVINO Benchmark Graphs + :material-regular:`bar_chart;1.4em` OpenVINO Benchmark Graphs (general) .. grid-item:: @@ -46,10 +44,35 @@ For a more detailed view of performance numbers for generative AI models, check :outline: :expand: - :material-regular:`bar_chart;1.4em` OVMS Benchmark Graphs + :material-regular:`bar_chart;1.4em` OVMS Benchmark Graphs (general) + + .. grid-item:: + + .. button-link:: ./performance-benchmarks/generative-ai-performance.html + :class: ov-toolkit-benchmark-genai + :color: primary + :outline: + :expand: + + :material-regular:`table_view;1.4em` LLM performance for AI PC + + .. grid-item:: + + .. button-link:: # + :class: ovms-toolkit-benchmark-llm + :color: primary + :outline: + :expand: + + :material-regular:`bar_chart;1.4em` OVMS for GenAI (coming soon) + + + + -Key performance indicators and workload parameters. + +**Key performance indicators and workload parameters** .. tab-set:: @@ -65,13 +88,13 @@ Key performance indicators and workload parameters. .. tab-item:: Latency :sync: latency - For Vision and NLP models this mhis measures the synchronous execution of inference requests and is reported in - milliseconds. Each inference request (for example: preprocess, infer, postprocess) is - allowed to complete before the next is started. This performance metric is relevant in - usage scenarios where a single image input needs to be acted upon as soon as possible. An - example would be the healthcare sector where medical personnel only request analysis of a - single ultra sound scanning image or in real-time or near real-time applications for - example an industrial robot's response to actions in its environment or obstacle avoidance + For Vision and NLP models this measures the synchronous execution of inference requests and + is reported in milliseconds. Each inference request (for example: preprocess, infer, + postprocess) is allowed to complete before the next one starts. This performance metric is + relevant in usage scenarios where a single image input needs to be acted upon as soon as + possible. An example would be the healthcare sector where medical personnel only request + analysis of a single ultra sound scanning image or in real-time or near real-time applications + such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles. For Transformer models like Stable-Diffusion this measures the time it takes to convert the prompt or input text into a finished image. It is presented in seconds. @@ -97,9 +120,10 @@ Key performance indicators and workload parameters. * input token length: 1024 (the tokens for GenAI models are in English). -.. raw:: html +**Platforms, Configurations, Methodology** -

Platforms, Configurations, Methodology

+To see the methodology used to obtain the numbers and learn how to test performance yourself, +see the guide on :doc:`getting performance numbers `. For a listing of all platforms and configurations used for testing, refer to the following: @@ -130,59 +154,10 @@ For a listing of all platforms and configurations used for testing, refer to the :material-regular:`download;1.5em` Click for Performance Data [XLSX] -The OpenVINO benchmark setup includes a single system with OpenVINO™, as well as the benchmark -application installed. It measures the time spent on actual inference (excluding any pre or post -processing) and then reports on the inferences per second (or Frames Per Second). - -OpenVINO™ Model Server (OVMS) employs the Intel® Distribution of OpenVINO™ toolkit runtime -libraries and exposes a set of models via a convenient inference API over gRPC or HTTP/REST. -Its benchmark results are measured with the configuration of multiple-clients-single-server, -using two hardware platforms connected by ethernet. Network bandwidth depends on both platforms -and models used. It is set not to be a bottleneck for workload intensity. The connection is -dedicated only to measuring performance. - -.. dropdown:: See more details about OVMS benchmark setup - - The benchmark setup for OVMS consists of four main parts: - .. image:: ../assets/images/performance_benchmarks_ovms_02.png - :alt: OVMS Benchmark Setup Diagram - * **OpenVINO™ Model Server** is launched as a docker container on the server platform and it - listens to (and answers) requests from clients. OpenVINO™ Model Server is run on the same - system as the OpenVINO™ toolkit benchmark application in corresponding benchmarking. Models - served by OpenVINO™ Model Server are located in a local file system mounted into the docker - container. The OpenVINO™ Model Server instance communicates with other components via ports - over a dedicated docker network. - * **Clients** are run in separated physical machine referred to as client platform. Clients - are implemented in Python3 programming language based on TensorFlow* API and they work as - parallel processes. Each client waits for a response from OpenVINO™ Model Server before it - will send a new next request. The role played by the clients is also verification of - responses. - - * **Load balancer** works on the client platform in a docker container. HAProxy is used for - this purpose. Its main role is counting of requests forwarded from clients to OpenVINO™ - Model Server, estimating its latency, and sharing this information by Prometheus service. - The reason of locating the load balancer on the client site is to simulate real life - scenario that includes impact of physical network on reported metrics. - - * **Execution Controller** is launched on the client platform. It is responsible for - synchronization of the whole measurement process, downloading metrics from the load - balancer, and presenting the final report of the execution. - - - -.. raw:: html - -

Test performance yourself

- -You can also test performance for your system yourself, following the guide on -:doc:`getting performance numbers `. - -.. raw:: html - -

Disclaimers

+**Disclaimers** * Intel® Distribution of OpenVINO™ toolkit performance results are based on release 2024.3, as of July 31, 2024. @@ -192,12 +167,11 @@ You can also test performance for your system yourself, following the guide on The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service -activation. Learn more at intel.com, or from the OEM or retailer. +activation. Learn more at intel.com, the OEM, or retailer. See configuration disclosure for details. No product can be absolutely secure. Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `__. -Your costs and results may vary. Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products. @@ -205,9 +179,6 @@ for non-Intel products. - - - .. raw:: html diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 35e09f91f72b9c..39b27d12c970fd 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -4,7 +4,7 @@ Most Efficient Large Language Models for AI PC This page is regularly updated to help you identify the best-performing LLMs on the Intel® Core™ Ultra processor family and AI PCs. -The tables below list the key performance indicators for a selection of Large Language Models, +The tables below list key performance indicators for a selection of Large Language Models, running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. @@ -23,24 +23,34 @@ running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. :class: modeldata stripe :name: supportedModelsTableOv :header-rows: 1 - :file: ../../_static/download/llm_models.csv + :file: ../../_static/benchmarks_files/llm_models.csv -For complete information on the system config, see: -`Hardware Platforms [PDF] `__ - -To view the data in an editable form, you can download the .csv file here: - .. grid:: 1 1 2 2 :gutter: 4 .. grid-item:: - .. button-link:: ../../_static/download/llm_models.csv + All models listed here were tested with the following parameters: + + * Framework: PyTorch + * Model precision: INT4 + * Beam: 1 + * Batch size: 1 + + .. grid-item:: + + .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/OV-2024.4-platform_list.pdf :color: primary :outline: :expand: - :material-regular:`download;1.5em` Click for OpenVINO LLM results [CSV] + :material-regular:`download;1.5em` Get full system info [PDF] + + .. button-link:: ../../_static/benchmarks_files/llm_models.csv + :color: primary + :outline: + :expand: + :material-regular:`download;1.5em` Get the data in .csv [CSV] diff --git a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst index 069c940063cf14..e35d42a6a02abc 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/getting-performance-numbers.rst @@ -1,124 +1,201 @@ Getting Performance Numbers =========================== +1. `Benchmarking methodology for OpenVINO <#benchmarking-methodology-for-openvino>`__ + a. `OpenVINO benchmarking (general) <#openvino-benchmarking--general->`__ + b. `OpenVINO Model Server benchmarking (general) <#openvino-model-server-benchmarking--general->`__ + c. `OpenVINO Model Server benchmarking (LLM) <#openvino-model-server-benchmarking--llm->`__ -This guide explains how to use the benchmark_app to get performance numbers. It also explains how the performance -numbers are reflected through internal inference performance counters and execution graphs. It also includes -information on using ITT and Intel® VTune™ Profiler to get performance insights. +2. `How to obtain benchmark results <#how-to-obtain-benchmark-results>`__ + a. `General considerations <#general-considerations>`__ + b. `OpenVINO benchmarking (general) <#openvino-benchmarking--general->`__ + c. `OpenVINO benchmarking (LLM) <#openvino-benchmarking--llm->`__ -.. raw:: html -

Test performance with the benchmark_app

+Benchmarking methodology for OpenVINO +############################################################################################### -You can run OpenVINO benchmarks in both C++ and Python APIs, yet the experience differs in each case. -The Python one is part of OpenVINO Runtime installation, while C++ is available as a code sample. -For a detailed description, see: :doc:`benchmark_app <../../learn-openvino/openvino-samples/benchmark-tool>`. +OpenVINO benchmarking (general) +++++++++++++++++++++++++++++++++++++++++++++ -Make sure to install the latest release package with support for frameworks of the models you want to test. -For the most reliable performance benchmarks, :doc:`prepare the model for use with OpenVINO <../../openvino-workflow/model-preparation>`. +The OpenVINO benchmark setup includes a single system with OpenVINO™, as well as the benchmark +application installed. It measures the time spent on actual inference (excluding any pre or post +processing) and then reports on the inferences per second (or Frames Per Second). +OpenVINO Model Server benchmarking (general) +++++++++++++++++++++++++++++++++++++++++++++ -.. raw:: html +OpenVINO™ Model Server (OVMS) employs the Intel® Distribution of OpenVINO™ toolkit runtime +libraries and exposes a set of models via a convenient inference API over gRPC or HTTP/REST. +Its benchmark results are measured with the configuration of multiple-clients-single-server, +using two hardware platforms connected by ethernet. Network bandwidth depends on both platforms +and models used. It is set not to be a bottleneck for workload intensity. The connection is +dedicated only to measuring performance. -

Running the benchmark application

+.. dropdown:: See more details about OVMS benchmark setup + The benchmark setup for OVMS consists of four main parts: -The benchmark_app includes a lot of device-specific options, but the primary usage is as simple as: + .. image:: ../assets/images/performance_benchmarks_ovms_02.png + :alt: OVMS Benchmark Setup Diagram -.. code-block:: sh + * **OpenVINO™ Model Server** is launched as a docker container on the server platform and it + listens to (and answers) requests from clients. OpenVINO™ Model Server is run on the same + system as the OpenVINO™ toolkit benchmark application in corresponding benchmarking. Models + served by OpenVINO™ Model Server are located in a local file system mounted into the docker + container. The OpenVINO™ Model Server instance communicates with other components via ports + over a dedicated docker network. - benchmark_app -m -d -i + * **Clients** are run in separated physical machine referred to as client platform. Clients + are implemented in Python3 programming language based on TensorFlow* API and they work as + parallel processes. Each client waits for a response from OpenVINO™ Model Server before it + will send a new next request. The role played by the clients is also verification of + responses. + * **Load balancer** works on the client platform in a docker container. HAProxy is used for + this purpose. Its main role is counting of requests forwarded from clients to OpenVINO™ + Model Server, estimating its latency, and sharing this information by Prometheus service. + The reason of locating the load balancer on the client site is to simulate real life + scenario that includes impact of physical network on reported metrics. -Each of the :doc:`OpenVINO supported devices <../compatibility-and-support/supported-devices>` offers -performance settings that contain command-line equivalents in the Benchmark app. + * **Execution Controller** is launched on the client platform. It is responsible for + synchronization of the whole measurement process, downloading metrics from the load + balancer, and presenting the final report of the execution. -While these settings provide really low-level control for the optimal model performance on the *specific* device, -it is recommended to always start performance evaluation with the :doc:`OpenVINO High-Level Performance Hints <../../openvino-workflow/running-inference/optimize-inference/high-level-performance-hints>` first, like so: -.. code-block:: sh +OpenVINO Model Server benchmarking (LLM) +++++++++++++++++++++++++++++++++++++++++ - # for throughput prioritization - benchmark_app -hint tput -m -d - # for latency prioritization - benchmark_app -hint latency -m -d +In the benchmarking results presented here, the load from clients is simulated using the +benchmark_serving.py script from vLLM and the ShareGPT dataset. It represents real life usage +scenarios. Both OpenVINO Model Server and vLLM expose OpenAI-compatible REST endpoints so the +methodology is identical. +In the experiments, we change the average request rate to identify the tradeoff between total +throughput and the TPOT latency. +Note that in the benchmarking, the feature of prefix_caching is not used. -.. raw:: html -

Additional benchmarking considerations

-.. raw:: html +How to obtain benchmark results +############################################################################################### -

1 - Select a Proper Set of Operations to Measure

+General considerations +++++++++++++++++++++++ +.. dropdown:: Select a proper set of operations to measure -When evaluating performance of a model with OpenVINO Runtime, it is required to measure a proper set of operations. + When evaluating performance of a model with OpenVINO Runtime, it is required to measure a + proper set of operations. -- Avoid including one-time costs such as model loading. -- Track operations that occur outside OpenVINO Runtime (such as video decoding) separately. + * Avoid including one-time costs such as model loading. + * Track operations that occur outside OpenVINO Runtime, such as video decoding, separately. + .. note:: -.. note:: + Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. + For more information, refer to + :doc:`Embedding Pre-processing <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` + and + :doc:`General Runtime Optimizations <../../openvino-workflow/running-inference/optimize-inference/general-optimizations>`. - Some image pre-processing can be baked into OpenVINO IR and accelerated accordingly. For more information, - refer to :doc:`Embedding Pre-processing <../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` and - :doc:`General Runtime Optimizations <../../openvino-workflow/running-inference/optimize-inference/general-optimizations>`. +.. dropdown:: Maximize the chance to obtain credible data + Performance conclusions should be build on reproducible data. As for the performance + measurements, they should be done with a large number of invocations of the same routine. + Since the first iteration is almost always significantly slower than the subsequent ones, + an aggregated value can be used for the execution time for final projections: + * If the warm-up run does not help or execution times still vary, you can try running a + large number of iterations and then use the mean value of the results. + * If time values differ too much, consider using a geomean. + * Be aware of potential power-related irregularities, such as throttling. A device may assume + one of several different power states, so it is advisable to fix its frequency when + optimizing, for better performance data reproducibility. + * Note that end-to-end application benchmarking should also be performed under real + operational conditions. -.. raw:: html +.. dropdown:: Compare performance with native/framework code -

2 - Try to Get Credible Data

+ When comparing OpenVINO Runtime performance with the framework or reference code, + make sure that both versions are as similar as possible: -Performance conclusions should be build upon reproducible data. As for the performance measurements, they should -be done with a large number of invocations of the same routine. Since the first iteration is almost always significantly -slower than the subsequent ones, an aggregated value can be used for the execution time for final projections: + * Wrap the exact inference execution (for examples, see :doc:`Benchmark app <../../learn-openvino/openvino-samples/benchmark-tool>`). + * Do not include model loading time. + * Ensure that the inputs are identical for OpenVINO Runtime and the framework. For example, watch out for random values that can be used to populate the inputs. + * In situations when any user-side pre-processing should be tracked separately, consider :doc:`image pre-processing and conversion <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>`. + * When applicable, leverage the :doc:`Dynamic Shapes support <../../openvino-workflow/running-inference/dynamic-shapes>`. + * If possible, demand the same accuracy. For example, TensorFlow allows ``FP16`` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the ``FP16`` as well. -- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations - and then average or find a mean of the results. -- If the time values range too much, consider geomean. -- Be aware of the throttling and other power oddities. A device can exist in one of several different power states. - When optimizing your model, consider fixing the device frequency for better performance data reproducibility. - However, the end-to-end (application) benchmarking should also be performed under real operational conditions. +.. dropdown:: Make sure the benchmarking setup is proper for the selected scenario + * Install the latest release package supporting the frameworks of the tested models. + * For the most reliable performance benchmarks, + :doc:`prepare the model for use with OpenVINO <../../openvino-workflow/model-preparation>`. + * For testing generative AI models, make sure you select the method that best suits your case, + Optimum-Intel or the OpenVINO GenAI package. -.. raw:: html -

3 - Compare Performance with Native/Framework Code

+OpenVINO benchmarking (general) ++++++++++++++++++++++++++++++++ -When comparing the OpenVINO Runtime performance with the framework or another reference code, make sure that both versions are as similar as possible: +The default way of measuring OpenVINO performance is running a piece of code, referred to as +:doc:`the benchmark tool <../../learn-openvino/openvino-samples/benchmark-tool>`. +For Python, it is part of the OpenVINO Runtime installation, while for C++, it is available as +a code sample. -- Wrap the exact inference execution (for examples, see :doc:`Benchmark app <../../learn-openvino/openvino-samples/benchmark-tool>`). -- Do not include model loading time. -- Ensure that the inputs are identical for OpenVINO Runtime and the framework. For example, watch out for random values that can be used to populate the inputs. -- In situations when any user-side pre-processing should be tracked separately, consider :doc:`image pre-processing and conversion <../../openvino-workflow/running-inference/optimize-inference/optimize-preprocessing>`. -- When applicable, leverage the :doc:`Dynamic Shapes support <../../openvino-workflow/running-inference/dynamic-shapes>`. -- If possible, demand the same accuracy. For example, TensorFlow allows ``FP16`` execution, so when comparing to that, make sure to test the OpenVINO Runtime with the ``FP16`` as well. +Running the benchmark application +--------------------------------- + +The benchmark_app includes a lot of device-specific options, but the primary usage is as simple +as: + +.. code-block:: sh + + benchmark_app -m -d -i -.. raw:: html -

Internal Inference Performance Counters and Execution Graphs

+Each of the :doc:`OpenVINO supported devices <../compatibility-and-support/supported-devices>` +offers performance settings that contain command-line equivalents in the Benchmark app. -More detailed insights into inference performance breakdown can be achieved with device-specific performance counters and/or execution graphs. +While these settings provide really low-level control for the optimal model performance on a +*specific* device, it is recommended to always start performance evaluation with the +:doc:`OpenVINO High-Level Performance Hints <../../openvino-workflow/running-inference/optimize-inference/high-level-performance-hints>` +first, like so: + +.. code-block:: sh + + # for throughput prioritization + benchmark_app -hint tput -m -d + # for latency prioritization + benchmark_app -hint latency -m -d + + +Internal Inference Performance Counters and Execution Graphs +------------------------------------------------------------- + +More detailed insights into inference performance breakdown can be achieved with device-specific +performance counters and/or execution graphs. Both :doc:`C++ and Python <../../learn-openvino/openvino-samples/benchmark-tool>` -versions of the *benchmark_app* support a ``-pc`` command-line parameter that outputs internal execution breakdown. +versions of the benchmark_app support a ``-pc`` command-line parameter that outputs an internal +execution breakdown. -For example, the table shown below is part of performance counters for quantized -`TensorFlow implementation of ResNet-50 `__ -model inference on :doc:`CPU Plugin <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. -Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time layers are the same. -Information about layer precision is also stored in the performance counters. +For example, the table below is part of performance counters for +:doc:`CPU inference <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. +of a `TensorFlow implementation of ResNet-50 `__ +Keep in mind that since the device is CPU, the ``realTime`` wall clock and the ``cpu`` time +layers are the same. Information about layer precision is also stored in the performance +counters. =========================================================== ============= ============== ===================== ================= ============== @@ -136,39 +213,63 @@ Information about layer precision is also stored in the performance counters. | The ``execStatus`` column of the table includes the following possible values: | - ``EXECUTED`` - the layer was executed by standalone primitive. -| - ``NOT_RUN`` - the layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive. +| - ``NOT_RUN`` - the layer was not executed by standalone primitive or was fused with + another operation and executed in another layer primitive. | -| The ``execType`` column of the table includes inference primitives with specific suffixes. The layers could have the following marks: -| - The ``I8`` suffix is for layers that had 8-bit data type input and were computed in 8-bit precision. +| The ``execType`` column of the table includes inference primitives with specific suffixes. + The layers could have the following marks: +| - The ``I8`` suffix is for layers that had 8-bit data type input and were computed in + 8-bit precision. | - The ``FP32`` suffix is for layers computed in 32-bit precision. | -| All ``Convolution`` layers are executed in ``int8`` precision. The rest of the layers are fused into Convolutions using post-operation optimization, - as described in :doc:`CPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. This contains layer names - (as seen in OpenVINO IR), type of the layer, and execution statistics. +| All ``Convolution`` layers are executed in ``int8`` precision. The rest of the layers are + fused into Convolutions using post-operation optimization, as described in + :doc:`CPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device>`. + This contains layer names (as seen in OpenVINO IR), type of the layer, and execution + statistics. -Both *benchmark_app* versions also support the ``exec_graph_path`` command-line option. It requires OpenVINO to output the same execution -statistics per layer, but in the form of plugin-specific `Netron-viewable `__ graph to the specified file. +Both *benchmark_app* versions also support the ``exec_graph_path`` command-line option. +It requires OpenVINO to output the same execution statistics per layer, but in the form of +plugin-specific `Netron-viewable `__ graph to the specified file. + +Especially when performance-debugging +:doc:`latency <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency>`, +note that the counters do not reflect the time spent in the ``plugin/device/driver/etc`` queues. +If the sum of the counters is too different from the latency of an inference request, consider +testing with less inference requests. For example, running single +:doc:`OpenVINO stream <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` +with multiple requests would produce nearly identical counters as running a single inference +request, while the actual latency can be quite different. + +Lastly, the performance statistics with both performance counters and execution graphs are +averaged, so such data for the +:doc:`inputs of dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` +should be measured carefully, preferably by isolating the specific shape and executing multiple +times in a loop, to gather reliable data. + +Use ITT to Get Performance Insights +-------------------------------------- + +In general, OpenVINO and its individual plugins are heavily instrumented with Intel® +Instrumentation and Tracing Technology (ITT). Therefore, you can also compile OpenVINO from the +source code with ITT enabled and use tools like +`Intel® VTune™ Profiler `__ +to get detailed inference performance breakdown and additional insights in the application-level +performance on the timeline view. + + +OpenVINO benchmarking (LLM) ++++++++++++++++++++++++++++++++ + +Large Language Models require a different benchmarking approach to static models. A detailed +description will be added soon. -Especially when performance-debugging the :doc:`latency <../../openvino-workflow/running-inference/optimize-inference/optimizing-latency>`, note that the counters -do not reflect the time spent in the ``plugin/device/driver/etc`` queues. If the sum of the counters is too different from the latency -of an inference request, consider testing with less inference requests. For example, running single -:doc:`OpenVINO stream <../../openvino-workflow/running-inference/optimize-inference/optimizing-throughput>` with multiple requests would produce nearly identical -counters as running a single inference request, while the actual latency can be quite different. -Lastly, the performance statistics with both performance counters and execution graphs are averaged, -so such data for the :doc:`inputs of dynamic shapes <../../openvino-workflow/running-inference/dynamic-shapes>` should be measured carefully, -preferably by isolating the specific shape and executing multiple times in a loop, to gather reliable data. -.. raw:: html -

Use ITT to Get Performance Insights

-In general, OpenVINO and its individual plugins are heavily instrumented with Intel® Instrumentation and Tracing Technology (ITT). -Therefore, you can also compile OpenVINO from the source code with ITT enabled and use tools like -`Intel® VTune™ Profiler `__ to get detailed inference performance breakdown and additional -insights in the application-level performance on the timeline view. diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index 8b93e6a1aebe7b..3162bae7254704 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -4,9 +4,10 @@ Model Accuracy The following two tables present the absolute accuracy drop calculated as the accuracy difference -between OV-accuracy and the original frame work accuracy for FP32, and the same for INT8, BF16 and -FP16 representations of a model on three platform architectures. The third table presents the GenAI model accuracies as absolute accuracy values. Please also refer to notes below -the table for more information. +between OV-accuracy and the original framework accuracy for FP32, and the same for INT8, BF16, +and FP16 representations of a model on three platform architectures. The third table presents +the GenAI model accuracies as absolute accuracy values. Refer to notes below the table for more +information. * A - Intel® Core™ i9-9000K (AVX2), INT8 and FP32 * B - Intel® Xeon® 6338, (VNNI), INT8 and FP32 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models.csv new file mode 100644 index 00000000000000..dee8e72a9578fd --- /dev/null +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models.csv @@ -0,0 +1,22 @@ +Model name,"Throughput: (tokens/sec. 2nd token)",1st token latency (msec),Max RSS memory used. (MB),Input tokens,Output tokens +OPT-2.7b,"20.2",2757,7084,937,128 +Phi-3-mini-4k-instruct,"19.9",2776,7028,1062,128 +Orca-mini-3b,"19.2",2966,7032,1024,128 +Phi-2,"17.8",2162,7032,1024,128 +Stable-Zephyr-3b-dpo,"17.0",1791,7007,946,128 +ChatGLM3-6b,"16.5",3569,6741,1024,128 +Dolly-v2-3b,"15.8",6891,6731,1024,128 +Stablelm-3b-4e1t,"15.7",2051,7018,1024,128 +Red-Pajama-Incite-Chat-3b-V1,"14.8",6582,7028,1020,128 +Falcon-7b-instruct,"14.5",4552,7033,1049,128 +Codegen25-7b,"13.3",3982,6732,1024,128 +GPT-j-6b,"13.2",7213,6882,1024,128 +Stablelm-7b,"12.8",6339,7013,1020,128 +Llama-3-8b,"12.8",4356,6953,1024,128 +Llama-2-7b-chat,"12.3",4205,6906,1024,128 +Llama-7b,"11.7",4315,6927,1024,128 +Mistral-7b-v0.1,"10.5",4462,7242,1007,128 +Zephyr-7b-beta,"10.5",4500,7039,1024,128 +Qwen1.5-7b-chat,"9.9",4318,7034,1024,128 +Baichuan2-7b-chat,"9.8",4668,6724,1024,128 +Qwen-7b-chat,"9.0",5141,6996,1024,128 \ No newline at end of file diff --git a/docs/sphinx_setup/_static/download/llm_models.csv b/docs/sphinx_setup/_static/download/llm_models.csv deleted file mode 100644 index 2ff93f503a6d3b..00000000000000 --- a/docs/sphinx_setup/_static/download/llm_models.csv +++ /dev/null @@ -1,22 +0,0 @@ -Model name,"Throughput: (tokens/sec. 2nd token)",1st token latency (msec),Max RSS memory used. (MB),Input tokens,Output tokens,Model Precision,Beam,Batch size,Framework -OPT-2.7b,20.2,2757,7084,937,128,INT4,1,1,PT -Phi-3-mini-4k-instruct,19.9,2776,7028,1062,128,INT4,1,1,PT -Orca-mini-3b,19.2,2966,7032,1024,128,INT4,1,1,PT -Phi-2,17.8,2162,7032,1024,128,INT4,1,1,PT -Stable-Zephyr-3b-dpo,17.0,1791,7007,946,128,INT4,1,1,PT -ChatGLM3-6b,16.5,3569,6741,1024,128,INT4,1,1,PT -Dolly-v2-3b,15.8,6891,6731,1024,128,INT4,1,1,PT -Stablelm-3b-4e1t,15.7,2051,7018,1024,128,INT4,1,1,PT -Red-Pajama-Incite-Chat-3b-V1,14.8,6582,7028,1020,128,INT4,1,1,PT -Falcon-7b-instruct,14.5,4552,7033,1049,128,INT4,1,1,PT -Codegen25-7b,13.3,3982,6732,1024,128,INT4,1,1,PT -GPT-j-6b,13.2,7213,6882,1024,128,INT4,1,1,PT -Stablelm-7b,12.8,6339,7013,1020,128,INT4,1,1,PT -Llama-3-8b,12.8,4356,6953,1024,128,INT4,1,1,PT -Llama-2-7b-chat,12.3,4205,6906,1024,128,INT4,1,1,PT -Llama-7b,11.7,4315,6927,1024,128,INT4,1,1,PT -Mistral-7b-v0.1,10.5,4462,7242,1007,128,INT4,1,1,PT -Zephyr-7b-beta,10.5,4500,7039,1024,128,INT4,1,1,PT -Qwen1.5-7b-chat,9.9,4318,7034,1024,128,INT4,1,1,PT -Baichuan2-7b-chat,9.8,4668,6724,1024,128,INT4,1,1,PT -Qwen-7b-chat,9.0,5141,6996,1024,128,INT4,1,1,PT \ No newline at end of file diff --git a/docs/sphinx_setup/_static/download/llm_models_ovms.csv b/docs/sphinx_setup/_static/download/llm_models_ovms.csv deleted file mode 100644 index d481fd3b6a56e8..00000000000000 --- a/docs/sphinx_setup/_static/download/llm_models_ovms.csv +++ /dev/null @@ -1,100 +0,0 @@ -Product,Model,Framework,Precision,Node,Request Rate,Throughput [tok/s],TPOT Mean Latency -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.2,92.75,75.75 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.3,137.89,98.6 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.4,182.68,144.36 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.5,227.02,238.54 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.6,259.06,679.07 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.7,267.24,785.75 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.8,267.77,815.11 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,0.9,270.01,827.09 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,1.0,268.92,840.1 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,2.0,269.6,847.81 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8380,inf,270.55,839.37 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,0.2,92.63,63.23 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,0.4,183.51,105.0 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,0.6,272.59,95.34 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,0.8,359.28,126.61 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,1.0,442.69,169.24 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,1.2,521.61,195.94 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,1.4,589.34,267.43 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,1.6,650.25,291.68 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,1.8,655.39,308.64 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,2.0,680.45,302.09 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8480+,inf,702.42,307.82 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,0.2,92.89,54.69 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,0.4,184.37,77.0 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,0.6,273.06,101.81 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,0.8,360.22,135.38 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,1.0,442.46,170.65 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,1.2,519.5,208.44 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,1.4,590.11,252.86 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,1.6,651.09,286.93 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,1.8,670.74,298.02 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,2.0,684.4,299.41 -ovms,meta-llama/Llama-2-7b-chat-hf,PT,INT8-CW,Xeon Platinum 8580,inf,701.91,305.9 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.2,79.24,73.06 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.3,118.42,90.31 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.4,157.04,113.23 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.5,193.85,203.97 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.6,232.36,253.17 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.7,260.56,581.45 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.8,271.97,761.05 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,0.9,273.36,787.74 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,1.0,272.54,811.37 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,2.0,278.07,809.3 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8380,inf,275.71,810.89 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,0.2,78.3,60.37 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,0.4,156.42,69.27 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,0.6,232.27,77.79 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,0.8,307.37,90.07 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,1.0,380.61,104.71 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,1.2,452.18,127.36 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,1.4,519.44,156.18 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,1.6,587.62,169.44 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,1.8,649.94,198.44 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,2.0,707.46,234.44 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8480+,inf,799.46,265.5 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,0.2,78.61,54.12 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,0.4,156.19,70.38 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,0.6,232.36,81.83 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,0.8,307.01,101.66 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,1.0,376.36,139.62 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,1.2,447.75,158.53 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,1.4,519.74,160.26 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,1.6,582.37,190.22 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,1.8,635.46,231.31 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,2.0,698.38,247.77 -ovms,meta-llama/Meta-Llama-3-8B-Instruct,PT,INT8-CW,Xeon Platinum 8580,inf,843.51,252.12 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.2,87.18,74.96 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.3,130.74,92.67 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.4,172.94,117.03 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.5,214.71,172.69 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.6,255.45,282.74 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.7,280.38,629.68 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.8,280.55,765.16 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,0.9,289.65,765.65 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,1.0,290.67,783.47 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,2.0,284.14,815.09 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8380,inf,290.39,793.52 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,0.2,88.9,60.04 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,0.4,176.5,70.24 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,0.6,262.04,77.01 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,0.8,346.01,95.29 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,1.0,427.37,114.16 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,1.2,507.86,138.56 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,1.4,582.58,150.72 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,1.6,655.61,166.64 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,1.8,717.9,216.76 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,2.0,774.3,233.49 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8480+,inf,873.93,245.31 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,0.2,88.92,56.33 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,0.4,175.99,72.72 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,0.6,261.96,84.24 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,0.8,346.78,101.67 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,1.0,427.85,128.33 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,1.2,506.17,150.01 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,1.4,581.72,167.61 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,1.6,651.97,190.91 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,1.8,713.2,222.56 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,2.0,771.17,232.08 -ovms,mistralai/Mistral-7B-v0.1,PT,INT8-CW,Xeon Platinum 8580,inf,839.74,253.74 From dd16602824c66c53935a2d084ab4d7ace36a6414 Mon Sep 17 00:00:00 2001 From: Tomasz Krupa Date: Mon, 21 Oct 2024 07:44:28 +0000 Subject: [PATCH 083/112] [GPU] Weightless caching (#25731) Co-authored-by: Pavel Durandin --- .../openvino/runtime/properties/__init__.py | 1 + .../pyopenvino/core/properties/properties.cpp | 1 + .../tests/test_runtime/test_properties.py | 5 + .../rt_info/weightless_caching_attributes.hpp | 38 ++++ .../op/util/weightless_caching_attributes.cpp | 9 + .../pass/serialization/deterministicity.cpp | 1 + src/frontends/ir/src/ir_deserializer.cpp | 8 + .../include/openvino/runtime/properties.hpp | 6 + .../include/intel_gpu/primitives/data.hpp | 84 +++++-- src/plugins/intel_gpu/src/graph/program.cpp | 11 + .../intel_gpu/src/plugin/compiled_model.cpp | 26 +-- src/plugins/intel_gpu/src/plugin/plugin.cpp | 7 +- .../intel_gpu/src/plugin/program_builder.cpp | 10 + .../src/runtime/execution_config.cpp | 1 + .../tests/functional/behavior/model_cache.cpp | 210 ++++++++++++++++++ 15 files changed, 388 insertions(+), 30 deletions(-) create mode 100644 src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp create mode 100644 src/core/src/op/util/weightless_caching_attributes.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp diff --git a/src/bindings/python/src/openvino/runtime/properties/__init__.py b/src/bindings/python/src/openvino/runtime/properties/__init__.py index caaa93f37223b0..3269ea42e32ac2 100644 --- a/src/bindings/python/src/openvino/runtime/properties/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/__init__.py @@ -29,6 +29,7 @@ from openvino._pyopenvino.properties import execution_devices from openvino._pyopenvino.properties import loaded_from_cache from openvino._pyopenvino.properties import cache_encryption_callbacks +from openvino._pyopenvino.properties import weights_path # Submodules from openvino.runtime.properties import hint diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index 470161d9779558..a6b30bd773001f 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -43,6 +43,7 @@ void regmodule_properties(py::module m) { OPENVINO_SUPPRESS_DEPRECATED_END wrap_property_RW(m_properties, ov::force_tbb_terminate, "force_tbb_terminate"); wrap_property_RW(m_properties, ov::enable_mmap, "enable_mmap"); + wrap_property_RW(m_properties, ov::weights_path, "weights_path"); wrap_property_RO(m_properties, ov::supported_properties, "supported_properties"); wrap_property_RO(m_properties, ov::available_devices, "available_devices"); diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index e8d3162c362f4f..32eb48f6765f41 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -266,6 +266,11 @@ def test_properties_ro(ov_property_ro, expected_value): ), (props.force_tbb_terminate, "FORCE_TBB_TERMINATE", ((True, True), (False, False))), (props.enable_mmap, "ENABLE_MMAP", ((True, True), (False, False))), + ( + props.weights_path, + "WEIGHTS_PATH", + (("./model.bin", "./model.bin"),), + ), (hints.inference_precision, "INFERENCE_PRECISION_HINT", ((Type.f32, Type.f32),)), ( hints.model_priority, diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp new file mode 100644 index 00000000000000..fedcb030fb52cf --- /dev/null +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/runtime_attribute.hpp" + +namespace ov { + +/** + * @brief Holds weightless caching attributes of a single constant. + * + * WeightlessCacheAttribute class represents runtime info attribute that holds + * the values of original size of the constant in bytes and the binary offset of the + * constant's data in the weights file used by the weightless caching mechanism. It's + * not copyable in case the data was changed (the original node was replaced by a new + * one produced during the tranformation pipeline) - in that case weightless caching + * can't be used for that constant. + */ +class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { +public: + OPENVINO_RTTI("WeightlessCacheAttribute"); + + WeightlessCacheAttribute() = delete; + + WeightlessCacheAttribute(size_t original_size, size_t bin_offset) + : original_size(original_size), + bin_offset(bin_offset) {} + + bool is_copyable() const override; + + size_t original_size; + size_t bin_offset; +}; + +} // namespace ov diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp new file mode 100644 index 00000000000000..7c540f8a3bef02 --- /dev/null +++ b/src/core/src/op/util/weightless_caching_attributes.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" + +bool ov::WeightlessCacheAttribute::is_copyable() const { + return false; +} diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp index 5bcfbf97b77890..8441da501eb9bf 100644 --- a/src/core/tests/pass/serialization/deterministicity.cpp +++ b/src/core/tests/pass/serialization/deterministicity.cpp @@ -193,6 +193,7 @@ TEST_P(SerializationDeterministicityInputOutputTest, FromOvModel) { auto& expected1 = modelRef; ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1, irVersion).run_on_model(modelRef); auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1); + ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2); EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 68900b150514bc..f9ddcf1e8c14a6 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -9,6 +9,7 @@ #include "openvino/core/except.hpp" #include "openvino/core/meta_data.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/loop.hpp" @@ -944,6 +945,13 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")), + static_cast(pugixml::get_uint64_attr(dn, "offset"))); + } } ovNode->set_friendly_name(params.name); diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 621c0074fc9d1e..627314748bbe9c 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -1345,4 +1345,10 @@ static constexpr Property affinity{"AFFINITY"}; */ static constexpr Property, PropertyMutability::RO> execution_devices{"EXECUTION_DEVICES"}; +/** + * @brief Path to the file with model's weights. + * + * @note This property is used for weightless caching. Only used when ov::CacheMode Property is set to "OPTIMIZE_SIZE". + */ +static constexpr Property weights_path{"WEIGHTS_PATH"}; } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 7bc020c2529a88..461f063ec26bc5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -3,9 +3,13 @@ // #pragma once -#include "primitive.hpp" -#include "intel_gpu/runtime/memory.hpp" +#include + #include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/memory.hpp" +#include "openvino/runtime/shared_buffer.hpp" +#include "openvino/util/mmap_object.hpp" +#include "primitive.hpp" namespace cldnn { @@ -29,6 +33,9 @@ struct data : public primitive_base { /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. memory::ptr mem; + size_t original_size = SIZE_MAX; + size_t bin_offset = SIZE_MAX; + size_t hash() const override { size_t seed = primitive::hash(); seed = hash_combine(seed, id); @@ -46,20 +53,30 @@ struct data : public primitive_base { size_t data_size = mem->size(); ob << make_data(&data_size, sizeof(size_t)); - if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - ob << make_data(mem->buffer_ptr(), data_size); + bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size; + + if (is_cache_without_weights) { + ob << true; + ob << bin_offset; } else { - std::vector _buf; - _buf.resize(data_size); - stream* strm = reinterpret_cast(ob.get_stream()); - mem->copy_to(*strm, _buf.data()); - ob << make_data(_buf.data(), data_size); + ob << false; + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { + ob << make_data(mem->buffer_ptr(), data_size); + } else { + std::vector _buf; + _buf.resize(data_size); + stream* strm = reinterpret_cast(ob.get_stream()); + mem->copy_to(*strm, _buf.data()); + ob << make_data(_buf.data(), data_size); + } } } void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); + } + void load_weights(BinaryInputBuffer& ib, std::shared_ptr mapped_weights) { layout output_layout = layout(); ib >> output_layout; @@ -71,14 +88,39 @@ struct data : public primitive_base { mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); + bool is_cache_without_weights; + ib >> is_cache_without_weights; + if (is_cache_without_weights && mapped_weights == nullptr) { + OPENVINO_THROW("mmap object is null"); + } + + std::shared_ptr>> shared_buf; + if (is_cache_without_weights) { + ib >> bin_offset; + original_size = data_size; + + shared_buf = std::make_shared>>( + mapped_weights->data() + bin_offset, + data_size, + mapped_weights); + } + if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - ib >> make_data(mem->buffer_ptr(), data_size); + if (is_cache_without_weights) { + std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size); + } else { + ib >> make_data(mem->buffer_ptr(), data_size); + } } else { const size_t DATA_BLOCK_SIZE = 2 * 1024 * 1024; auto& strm = ib.get_engine().get_service_stream(); if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) { std::vector _buf(data_size); - ib >> make_data(_buf.data(), data_size); + if (is_cache_without_weights) { + std::memcpy(reinterpret_cast(_buf.data()), shared_buf->get_ptr(), data_size); + } else { + ib >> make_data(_buf.data(), data_size); + } mem->copy_from(strm, _buf.data()); } else { std::vector _buf1(DATA_BLOCK_SIZE); @@ -86,21 +128,33 @@ struct data : public primitive_base { bool buf_flag = true; event::ptr ev1, ev2; ev1 = ev2 = nullptr; - size_t dst_offset = 0; while (dst_offset < data_size) { const bool is_blocking = false; const size_t src_offset = 0; - size_t copy_size = (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset); + size_t copy_size = + (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset); if (buf_flag) { - ib >> make_data(_buf1.data(), copy_size); + if (is_cache_without_weights) { + std::memcpy(reinterpret_cast(_buf1.data()), + shared_buf->get_ptr() + dst_offset, + copy_size); + } else { + ib >> make_data(_buf1.data(), copy_size); + } if (ev2 != nullptr) { ev2->wait(); ev2 = nullptr; } ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking); } else { - ib >> make_data(_buf2.data(), copy_size); + if (is_cache_without_weights) { + std::memcpy(reinterpret_cast(_buf2.data()), + shared_buf->get_ptr() + dst_offset, + copy_size); + } else { + ib >> make_data(_buf2.data(), copy_size); + } if (ev1 != nullptr) { ev1->wait(); ev1 = nullptr; diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index d4461b8aad9107..1e2e84043dc82b 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -1720,6 +1720,7 @@ void program::cancel_compilation_context() { void program::save(cldnn::BinaryOutputBuffer& ob) const { std::map> mutable_datas_ptrs; ob << nodes_map.size(); + for (auto& node : nodes_map) { ob.setKernelImplParams(node.second->get_kernel_impl_params().get()); @@ -1732,6 +1733,7 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const { node.second->as().typed_desc()->mem = data_node.get_attached_memory_ptr(); } } + ob << true; ob << node.second->desc; @@ -1835,6 +1837,12 @@ void program::save(cldnn::BinaryOutputBuffer& ob) const { void program::load(cldnn::BinaryInputBuffer& ib) { init_program(); + std::shared_ptr mapped_memory = nullptr; + std::string weights_path = _config.get_property(ov::weights_path); + if (!weights_path.empty()) { + mapped_memory = ov::load_mmap_object(weights_path); + } + size_t num_nodes; ib >> num_nodes; bool is_valid_data_node; @@ -1845,6 +1853,9 @@ void program::load(cldnn::BinaryInputBuffer& ib) { std::shared_ptr prim; ib >> prim; + if (auto data_prim = dynamic_cast(prim.get())) { + data_prim->load_weights(ib, mapped_memory); + } get_or_create(prim); } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index b9729ca7bf0f20..15ff4447b4bafe 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -42,18 +42,15 @@ CompiledModel::CompiledModel(std::shared_ptr model, const std::shared_ptr& plugin, RemoteContextImpl::Ptr context, const ExecutionConfig& config) - : ov::ICompiledModel(model, - plugin, - context, - create_task_executor(plugin, config), - nullptr) - , m_context(context) - , m_config(config) - , m_wait_executor(std::make_shared(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})) - , m_model_name(model->get_friendly_name()) - , m_inputs(ov::ICompiledModel::inputs()) - , m_outputs(ov::ICompiledModel::outputs()) - , m_loaded_from_cache(false) { + : ov::ICompiledModel(model, plugin, context, create_task_executor(plugin, config), nullptr), + m_context(context), + m_config(config), + m_wait_executor(std::make_shared( + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"})), + m_model_name(model->get_friendly_name()), + m_inputs(ov::ICompiledModel::inputs()), + m_outputs(ov::ICompiledModel::outputs()), + m_loaded_from_cache(false) { auto graph_base = std::make_shared(model, m_context, m_config, 0); for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) { auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); @@ -170,7 +167,10 @@ std::shared_ptr CompiledModel::create_infer_request() co // [ ov::Node::Input/ ov::Node::Output ] // [ ov::intel_gpu::Graph ] void CompiledModel::export_model(std::ostream& model) const { - if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) + // If ov::CacheMode::OPTIMIZE_SIZE is set, do the export iff it's possible to do weightless caching + // which requires the weights_path. + if (m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + m_config.get_property(ov::weights_path).empty()) return; OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 4ea7851b3f8c58..2d29601ef0b69d 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -308,10 +308,13 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_user_property(_orig_config); config.apply_user_properties(context_impl->get_engine().get_device_info()); - if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) + cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + + if (config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE && + config.get_property(ov::weights_path).empty()) { return nullptr; + } - cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); return std::make_shared(ib, shared_from_this(), context_impl, config, loaded_from_cache); } diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index aae9b163b4f6bf..510d715e7ac805 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/split.hpp" #include "openvino/op/variadic_split.hpp" @@ -304,6 +305,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrorigin_op_name = op.get_friendly_name(); prim->origin_op_type_name = op.get_type_name(); + if (auto data_prim = dynamic_cast(prim.get())) { + auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_cache_attr != rt_info.end()) { + data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; + data_prim->original_size = weightless_cache_attr->second.as().original_size; + } + } + bool should_profile = prim->type != cldnn::mutable_data::type_id() && prim->type != cldnn::data::type_id(); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index a498dad24aa2f5..9c24fae1d6729a 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -60,6 +60,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), + std::make_tuple(ov::weights_path, ""), // Legacy API properties std::make_tuple(ov::intel_gpu::nv12_two_inputs, false), diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp new file mode 100644 index 00000000000000..573d275da84e51 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -0,0 +1,210 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "base/ov_behavior_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" +#include "common_test_utils/subgraph_builders/concat_with_params.hpp" +#include "common_test_utils/subgraph_builders/conv_bias.hpp" +#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" +#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp" +#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp" +#include "common_test_utils/subgraph_builders/convert_transpose.hpp" +#include "common_test_utils/subgraph_builders/detection_output.hpp" +#include "common_test_utils/subgraph_builders/kso_func.hpp" +#include "common_test_utils/subgraph_builders/matmul_bias.hpp" +#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" +#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp" +#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp" +#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp" +#include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp" +#include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" +#include "common_test_utils/subgraph_builders/single_conv.hpp" +#include "common_test_utils/subgraph_builders/single_split.hpp" +#include "common_test_utils/subgraph_builders/split_concat.hpp" +#include "common_test_utils/subgraph_builders/split_conv_concat.hpp" +#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp" +#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" +#include "common_test_utils/test_common.hpp" +#include "openvino/pass/serialize.hpp" + +namespace { +class CheckWeightlessCacheAccuracy : public ::testing::Test { +protected: + std::shared_ptr model; + std::string xml_path; + std::string bin_path; + std::string cache_path; + + void SetUp() override; + void TearDown() override; + void run(); +}; + +void CheckWeightlessCacheAccuracy::SetUp() { + std::string filePrefix = ov::test::utils::generateTestFilePrefix(); + xml_path = filePrefix + ".xml"; + bin_path = filePrefix + ".bin"; + cache_path = filePrefix + ".blob"; +} + +void CheckWeightlessCacheAccuracy::TearDown() { + std::remove(xml_path.c_str()); + std::remove(bin_path.c_str()); + std::remove(cache_path.c_str()); +} + +void CheckWeightlessCacheAccuracy::run() { + ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) }; + auto core = ov::test::utils::PluginCache::get().core(); + ov::pass::Serialize(xml_path, bin_path).run_on_model(model); + + ov::CompiledModel compiled_model; + OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + + auto ofstr = std::ofstream(cache_path, std::ofstream::binary); + OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr)); + ofstr.close(); + + auto ifstr = std::ifstream(cache_path, std::ifstream::binary); + ov::CompiledModel imported_model; + OV_ASSERT_NO_THROW(imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config)); + ifstr.close(); + + auto orig_req = compiled_model.create_infer_request(); + auto new_req = imported_model.create_infer_request(); + + for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) { + auto input = model->get_parameters().at(param_idx); + auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape()); + orig_req.set_tensor(input, tensor); + new_req.set_tensor(input, tensor); + } + + OV_ASSERT_NO_THROW(orig_req.infer()); + OV_ASSERT_NO_THROW(new_req.infer()); + + auto result_vector = model->get_results(); + for (auto& res : result_vector) { + auto orig_out = orig_req.get_tensor(res); + auto new_out = new_req.get_tensor(res); + ov::test::utils::compare(orig_out, new_out); + } +} + +TEST_F(CheckWeightlessCacheAccuracy, 2InputSubtract) { + model = ov::test::utils::make_2_input_subtract(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConcatWithParams) { + model = ov::test::utils::make_concat_with_params(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConvBias) { + model = ov::test::utils::make_conv_bias(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConvPoolRelu) { + model = ov::test::utils::make_conv_pool_relu(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConvPoolReluNoReshapes) { + model = ov::test::utils::make_conv_pool_relu_no_reshapes(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConvPoolReluNonZero) { + model = ov::test::utils::make_conv_pool_relu_non_zero(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ConvertTranspose) { + model = ov::test::utils::make_convert_transpose(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, DetectionOutput) { + model = ov::test::utils::make_detection_output(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, KsoFunction) { + model = ov::test::utils::make_kso_function(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, MatmulBias) { + model = ov::test::utils::make_matmul_bias(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, MultiSingleConv) { + model = ov::test::utils::make_multi_single_conv(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, MultipleInputOutputDoubleConcat) { + model = ov::test::utils::make_multiple_input_output_double_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, NestedBranchConvConcat) { + model = ov::test::utils::make_nested_branch_conv_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, NestedSplitConvConcat) { + model = ov::test::utils::make_nested_split_conv_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) { + model = ov::test::utils::make_read_concat_split_assign(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) { + model = ov::test::utils::make_single_concat_with_constant(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SingleConv) { + model = ov::test::utils::make_single_conv(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SingleSplit) { + model = ov::test::utils::make_single_split(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SplitConcat) { + model = ov::test::utils::make_split_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SplitConvConcat) { + model = ov::test::utils::make_split_conv_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, SplitMultiConvConcat) { + model = ov::test::utils::make_split_multi_conv_concat(); + run(); +} + +TEST_F(CheckWeightlessCacheAccuracy, TiWithLstmCell) { + model = ov::test::utils::make_ti_with_lstm_cell(); + run(); +} + +} // namespace From f3911616becfd47e22376b59f0cb0e103231d82e Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 21 Oct 2024 09:49:52 +0200 Subject: [PATCH 084/112] [DOCS] torch.compile examples (#27107) --- .../openvino-workflow/torch-compile.rst | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/docs/articles_en/openvino-workflow/torch-compile.rst b/docs/articles_en/openvino-workflow/torch-compile.rst index 6d874ff4d14be3..5bdb51a596d5d8 100644 --- a/docs/articles_en/openvino-workflow/torch-compile.rst +++ b/docs/articles_en/openvino-workflow/torch-compile.rst @@ -20,6 +20,186 @@ By default, Torch code runs in eager-mode, but with the use of ``torch.compile`` How to Use #################### + +.. tab-set:: + + .. tab-item:: Image Generation + + .. tab-set:: + + .. tab-item:: Stable-Diffusion-2 + + .. code-block:: py + :force: + + import torch + from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler + + model_id = "stabilityai/stable-diffusion-2-1" + + # Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead + pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) + + + pipe.text_encoder = torch.compile(pipe.text_encoder, backend="openvino") #Optional + + pipe.unet = torch.compile(pipe.unet, backend=“openvino”) + + pipe.vae.decode = torch.compile(pipe.vae.decode, backend=“openvino”) #Optional + + prompt = "a photo of an astronaut riding a horse on mars" + image = pipe(prompt).images[0] + + image.save("astronaut_rides_horse.png") + + + .. tab-item:: Stable-Diffusion-3 + + .. code-block:: py + + import torch + from diffusers import StableDiffusion3Pipeline + + pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float32) + + + pipe.transformer = torch.compile(pipe.transformer, backend="openvino") + + image = pipe( + "A cat holding a sign that says hello world", + negative_prompt="", + num_inference_steps=28, + guidance_scale=7.0, + ).images[0] + + image.save('out.png') + + .. tab-item:: Stable-Diffusion-XL + + .. code-block:: py + + import torch + from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler + + unet = UNet2DConditionModel.from_pretrained("latent-consistency/lcm-sdxl", torch_dtype=torch.float16, variant="fp16") + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", unet=unet, torch_dtype=torch.float16, variant="fp16") + pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) + + + pipe.text_encoder = torch.compile(pipe.text_encoder, backend="openvino") #Optional + + pipe.unet = torch.compile(pipe.unet, backend="openvino") + + pipe.vae.decode = torch.compile(pipe.vae.decode, backend="openvino") #Optional + + prompt = "a close-up picture of an old man standing in the rain" + image = pipe(prompt, num_inference_steps=5, guidance_scale=8.0).images[0] + image.save("result.png") + + .. tab-item:: Text Generation + + .. tab-set:: + + .. tab-item:: Llama-3.2-1B + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_name_or_path = "meta-llama/Llama-3.2-1B-Instruct" + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.float32) + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, + trust_remote_code=True, + device_map='cpu', + torch_dtype=torch.float32 + ) + + prompt = "Tell me about AI" + + + model.forward = torch.compile(model.forward, backend="openvino", options={'aot_autograd': True}) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=64 + ) + result = pipe(prompt) + print(result[0]['generated_text']) + + + .. tab-item:: Llama-2-7B-GPTQ + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_name_or_path = "TheBloke/Llama-2-7B-GPTQ" + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.float32) + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, + trust_remote_code=True, + device_map='cpu', + torch_dtype=torch.float32 + ) + + prompt = "Tell me about AI" + + + model.forward = torch.compile(model.forward, backend="openvino", options={'aot_autograd': True}) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=64 + ) + result = pipe(prompt) + print(result[0]['generated_text']) + + + .. tab-item:: Chatglm-4-GPTQ + + .. code-block:: py + + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer + + query = "tell me about AI“ + + tokenizer = AutoTokenizer.from_pretrained("mcavus/glm-4v-9b-gptq-4bit-dynamo", trust_remote_code=True) + inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}], + add_generation_prompt=True, + tokenize=True, + return_tensors="pt", + return_dict=True + ) + model = AutoModelForCausalLM.from_pretrained( + "mcavus/glm-4v-9b-gptq-4bit-dynamo", + torch_dtype=torch.float32, + low_cpu_mem_usage=True, + trust_remote_code=True + ) + + + model.transformer.encoder.forward = torch.compile(model.transformer.encoder.forward, backend="openvino", options={"aot_autograd":True}) + + gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} + with torch.no_grad(): + outputs = model.generate(**inputs, **gen_kwargs) + outputs = outputs[:, inputs['input_ids'].shape[1]:] + print(tokenizer.decode(outputs[0], skip_special_tokens=True)) + + + + + + + + + + + + + + + + To use ``torch.compile``, you need to define the ``openvino`` backend in your PyTorch application. This way Torch FX subgraphs will be directly converted to OpenVINO representation without any additional PyTorch-based tracing/scripting. From 308b420dde9216bab4ee2d70d1d2afc7a95b77c6 Mon Sep 17 00:00:00 2001 From: yuanxion <96522341+yuanxion@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:56:17 +0800 Subject: [PATCH 085/112] [GPU] Fix different element types of MatMul dequantization scales issue (#27077) ### Details: - MatMul dequantization Convert both dequantization scale variables (mulConst1 & mulConst2) to f32 instead of just one (mulConst2), to avoid different data type complaint issue (f16 & f32). ### Tickets: - 151988 --------- Signed-off-by: yuan.xiong --- .../src/mat_mul.cpp | 2 +- .../mat_mul_with_constant_transformation.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/common/low_precision_transformations/src/mat_mul.cpp b/src/common/low_precision_transformations/src/mat_mul.cpp index 15afe2408cc459..705f3d400a098c 100644 --- a/src/common/low_precision_transformations/src/mat_mul.cpp +++ b/src/common/low_precision_transformations/src/mat_mul.cpp @@ -160,7 +160,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ov::pass::p } const auto newMulConst = NetworkHelper::toScalarIfPossible(fold( - mulConst1, + foldConvert(mulConst1, element::f32), foldConvert(mulConst2, element::f32))); const auto newMultiply = std::make_shared>( diff --git a/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp b/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp index 454802c965f945..8425db398085ae 100644 --- a/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mat_mul_with_constant_transformation.cpp @@ -157,6 +157,22 @@ std::vector testValues = { {}, {}}}, + // test: multiply with f16 constant + {LayerTransformation::createParamsU8I8(), + {ov::element::u8, + {ov::element::f32, {}, ov::builder::subgraph::DequantizationOperations::Multiply{0.02f}.setConstantPrecision(ov::element::f16)}, + {std::vector(1024 * 1024, 1.f), ov::element::i8, ov::Shape{1024, 1024}}, + {}, + {ov::element::f32, {}, {0.1f}}, + }, + {ov::element::u8, + {}, + {std::vector(1024 * 1024, 1.f), ov::element::i8, ov::Shape{1024, 1024}}, + ov::element::u8, + {{}, {}, {0.02f * 0.1f}}, + {}, + {}}}, + // supported 3D: U8 & I8 with Dq on weights {LayerTransformation::createParamsU8I8(), { From 2cb8222dd7bf443096f80e09bcba8766c223f680 Mon Sep 17 00:00:00 2001 From: Egor Duplenskii Date: Mon, 21 Oct 2024 10:38:52 +0200 Subject: [PATCH 086/112] [CPU] Use actual input shape to init desc for MemoryInputSDPA (#27143) An output shape was previously used to create an input descriptor for some reason --- src/plugins/intel_cpu/src/nodes/memory.cpp | 52 +++------------------- src/plugins/intel_cpu/src/nodes/memory.hpp | 2 - 2 files changed, 7 insertions(+), 47 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 88693ebfa49fdf..756fbc5b578f61 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -427,29 +427,20 @@ void MemoryInputBase::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto&& shape = getOutputShapeAtPort(0); auto precision = getOriginalOutputPrecisionAtPort(0); auto&& descCreators = ov::intel_cpu::BlockedDescCreator::getCommonCreators(); - NodeConfig config; if (!getParentEdges().empty()) { - PortConfig inPortConfig; - - inPortConfig.inPlace(-1); - inPortConfig.constant(false); - inPortConfig.setMemDesc(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, shape)); - - config.inConfs.push_back(std::move(inPortConfig)); + const auto& inputShape = getInputShapeAtPort(0); + config.inConfs.emplace_back(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, inputShape)); } - PortConfig outPortConfig; - - outPortConfig.inPlace(0); - outPortConfig.constant(false); - outPortConfig.setMemDesc(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, shape)); - - config.outConfs.push_back(std::move(outPortConfig)); + const auto& outputShape = getOutputShapeAtPort(0); + config.outConfs.emplace_back( + descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, outputShape), + BlockedMemoryDesc::FULL_MASK, + 0); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -759,35 +750,6 @@ void MemoryInputSDPA::createPrimitive() { OPENVINO_ASSERT(m_child_port_idx != -1, getName(), " should be connected to SDPA node."); } -void MemoryInputSDPA::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; - - auto&& shape = getOutputShapeAtPort(0); - auto precision = getOriginalOutputPrecisionAtPort(0); - auto&& descCreators = ov::intel_cpu::BlockedDescCreator::getCommonCreators(); - NodeConfig config; - if (!getParentEdges().empty()) { - PortConfig inPortConfig; - inPortConfig.inPlace(-1); - inPortConfig.constant(false); - inPortConfig.setMemDesc(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, shape)); - config.inConfs.push_back(std::move(inPortConfig)); - } - - PortConfig outPortConfig; - outPortConfig.inPlace(0); - outPortConfig.constant(false); - // layout for fake memory obj, the child sdpa also does not use it - outPortConfig.setMemDesc(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, shape)); - config.outConfs.push_back(std::move(outPortConfig)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); -} - -void MemoryInputSDPA::initOptimalPrimitiveDescriptor() { - Node::initOptimalPrimitiveDescriptor(); -} - void MemoryInputSDPA::assignStateHook() { auto currentState = getAssignedState(); auto sdpaNode = m_sdpaNode.lock(); diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index c5a83cfa5cad1a..c158d738a36148 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -204,8 +204,6 @@ class MemoryInputSDPA : public MemoryInputBase { static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void createPrimitive() override; - void initSupportedPrimitiveDescriptors() override; - void initOptimalPrimitiveDescriptor() override; void resolveInPlaceEdges(Edge::LOOK look) override; MemStatePtr makeState() const override; From b785e6eec95ad4a3da8a89bff56994522fa66ee4 Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Mon, 21 Oct 2024 18:00:46 +0900 Subject: [PATCH 087/112] [GPU] Enable dynamic quantization gs32 as default for non-systolic (#27119) ### Details: - It is applied only to int4 compressed model, non-systolic path - Though it is a global configuration, systolic hardware will ignore it ### Tickets: - 151708 --- .../include/intel_gpu/runtime/debug_configuration.hpp | 1 + .../fully_connected_kernel_bf_tiled.cpp | 2 +- .../intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +- .../intel_gpu/src/runtime/debug_configuration.cpp | 2 +- src/plugins/intel_gpu/src/runtime/execution_config.cpp | 5 ++--- .../tests/unit/fusions/fully_connected_fusion_test.cpp | 2 ++ .../tests/unit/test_cases/fully_connected_gpu_test.cpp | 10 +++++++--- 7 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index fbc8ae84c36a29..c65aa3e5894cb8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -175,6 +175,7 @@ class debug_configuration { } dump_prof_data_iter_params; static std::ostream* verbose_stream; + static const int DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET = -2; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index c4115d74f54a92..b26b11ce97df6a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -55,7 +55,7 @@ static size_t get_dynamic_quantize_group_size(const fully_connected_params& para auto dynamic_quantization_group_size = params.dynamic_quantization_group_size; GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size) { + GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { dynamic_quantization_group_size = debug_config->dynamic_quantize_group_size; // Specify which Fully-connected layer would be dynamic-quantized diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f173e378fca3f9..b75519ac40e678 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -872,7 +872,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); - if (device_info.supports_immad) { // XXX: 1048576 is considered per-token + if (device_info.supports_immad) { pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { GPU_DEBUG_TRACE << root->get_friendly_name() << " Dynamic quantization is turned off because input type is not supported" << std::endl; diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index dcbabff548cc5d..5f943564d6f50e 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -253,7 +253,7 @@ debug_configuration::debug_configuration() , disable_runtime_skip_reorder(0) , disable_primitive_fusing(0) , disable_fake_alignment(0) - , dynamic_quantize_group_size(0) + , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) , disable_horizontal_fc_fusion(0) { #ifdef GPU_DEBUG_CONFIG get_gpu_debug_env_var("Help", help); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 9c24fae1d6729a..7661444cc4fd7b 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -46,7 +46,6 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), std::make_tuple(ov::hint::num_requests, 0), std::make_tuple(ov::hint::enable_cpu_pinning, false), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), @@ -58,7 +57,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::internal::query_model_ratio, 1.0f), std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), + std::make_tuple(ov::hint::dynamic_quantization_group_size, 32), std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), std::make_tuple(ov::weights_path, ""), @@ -204,7 +203,7 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) { set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true)); } - GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size) { + GPU_DEBUG_IF(debug_config->dynamic_quantize_group_size != debug_config->DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) { if (debug_config->dynamic_quantize_group_size == -1) set_property(ov::hint::dynamic_quantization_group_size(UINT64_MAX)); else diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp index 3743298a3c981a..5e9b5134fb3802 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp @@ -666,6 +666,7 @@ TEST_P(fc_compressed_int8_bias_dynamic_onednn, basic) { bool is_dynamic = true; cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic)); + cfg_not_fused.set_property(ov::hint::dynamic_quantization_group_size(0)); tolerance = 1.0f; execute(p, false, is_dynamic); } @@ -705,6 +706,7 @@ TEST_P(fc_compressed_int8_bias_prod_unfused_dynamic_onednn, basic) { bool is_dynamic = true; cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic)); + cfg_not_fused.set_property(ov::hint::dynamic_quantization_group_size(0)); tolerance = 1.0f; execute(p, false, is_dynamic); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 0ef7b6a5ca088b..dde1b6215148b3 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1590,6 +1590,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1615,6 +1616,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1698,9 +1700,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - if (is_dyn_quan) { - config.set_property(ov::hint::dynamic_quantization_group_size(0)); - } + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1728,6 +1728,8 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { config.set_property(ov::hint::dynamic_quantization_group_size(32)); + } else { + config.set_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1868,6 +1870,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1896,6 +1899,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); From 2e25c873f66477c0676354511dfb4c58b13b05c4 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Mon, 21 Oct 2024 02:03:06 -0700 Subject: [PATCH 088/112] [GPU] Fix not to check _dynamic_dims_mask when get_from_padded_pool() (#27120) ### Details: - Fix not to check _dynamic_dims_mask when get_from_padded_pool() ### Tickets: - 154329 - 155099 - 154137 --- src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp | 2 +- src/plugins/intel_gpu/src/runtime/memory_pool.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 82cf01ab9522b1..62e4c08a90f004 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -183,7 +183,7 @@ struct padding { } friend bool operator<(const padding& lhs, const padding& rhs) { - OPENVINO_ASSERT(!lhs.is_dynamic() && !rhs.is_dynamic(), "[GPU] padding compare is called for dynamic shape"); + // Compare only actual padding size not _dynamic_dims_mask if (lhs._lower_size < rhs._lower_size) return true; else if (lhs._lower_size > rhs._lower_size) return false; if (lhs._upper_size < rhs._upper_size) return true; diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 9dee7c4487002e..1d34cfcde18a63 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -306,7 +306,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, } if (do_reuse) { // reusable within the same network - if (!layout.format.is_image() && layout.data_padding == padding{{0, 0, 0, 0}, 0}) { + if (!layout.format.is_image() && !layout.data_padding) { // non-padded buffers return get_from_non_padded_pool(layout, prim_id, unique_id, network_id, restrictions, type, reset, is_dynamic); } else if (!layout.format.is_image()) { From 34398738424a9908f10891b80459ce582c71a1e6 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 21 Oct 2024 13:04:07 +0400 Subject: [PATCH 089/112] [GPU] Disable onednn pool in some cases due to the bug (#27115) ### Tickets: - *CVS-155035* --- .../src/graph/impls/onednn/pooling_onednn.hpp | 2 +- .../src/graph/impls/registry/pooling_impls.cpp | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp index 26cecbb659e475..343fe66771de25 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.hpp @@ -14,7 +14,7 @@ namespace onednn { struct PoolingImplementationManager : public ImplementationManager { OV_GPU_PRIMITIVE_IMPL("onednn::pool") - PoolingImplementationManager(shape_types shape_type) : ImplementationManager(impl_types::onednn, shape_type) {} + PoolingImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) : ImplementationManager(impl_types::onednn, shape_type, vf) {} std::unique_ptr create_impl(const program_node& node, const kernel_impl_params& params) const override; bool validate_impl(const program_node& node) const override { diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/pooling_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/pooling_impls.cpp index 191edc050cd694..9958404b14bfee 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/pooling_impls.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/pooling_impls.cpp @@ -17,7 +17,18 @@ using namespace cldnn; const std::vector>& Registry::get_implementations() { static const std::vector> impls = { - OV_GPU_CREATE_INSTANCE_ONEDNN(onednn::PoolingImplementationManager, shape_types::static_shape) + OV_GPU_CREATE_INSTANCE_ONEDNN(onednn::PoolingImplementationManager, shape_types::static_shape, [](const program_node& node) { + const auto& in_layout = node.get_input_layout(0); + const auto& out_layout = node.get_output_layout(0); + // Disable this case due to sporadic hang for the following case: + // onednn_verbose,primitive,exec,gpu:0,pooling,jit:ir,forward_inference,src_u8::blocked:acdb::f0 dst_u8::blocked:abcd::f0 + // ws_undef::undef:::,attr-scratchpad:user attr-post-ops:eltwise_linear:1.52456,alg:pooling_avg_include_padding, + // mb1ic96_ih56oh28kh2sh2dh0ph0_iw56ow28kw2sw2dw0pw0,0.0400391 + // issue: 12579 + if (in_layout.format == format::byxf && out_layout.format == format::bfyx && ov::element::Type(in_layout.data_type).is_integral_number()) + return false; + return true; + }) OV_GPU_GET_INSTANCE_OCL(pooling, shape_types::static_shape) }; From 924b311dc3a5228ae17ced7bfe9013344b007f1b Mon Sep 17 00:00:00 2001 From: captainneil Date: Mon, 21 Oct 2024 17:31:22 +0800 Subject: [PATCH 090/112] [Conan Build]Fix Debug Build (#27150) ### Details: - *Fix Conan Debug Build* A failed compilation look like this ``` cmake -G "Visual Studio 17 2022" -DCMAKE_TOOLCHAIN_FILE="generators/conan_toolchain.cmake" -DCMAKE_INSTALL_PREFIX="F:/.conan2/p/b/openvdff378fa94719/p" -DENABLE_INTEL_CPU="ON" -DENABLE_INTEL_GPU="ON" -DENABLE_ONEDNN_FOR_GPU="ON" -DENABLE_INTEL_GNA="OFF" -DENABLE_AUTO="ON" -DENABLE_MULTI="ON" -DENABLE_AUTO_BATCH="ON" -DENABLE_HETERO="ON" -DENABLE_OV_IR_FRONTEND="ON" -DENABLE_OV_PADDLE_FRONTEND="ON" -DENABLE_OV_TF_FRONTEND="ON" -DENABLE_OV_TF_LITE_FRONTEND="ON" -DENABLE_OV_ONNX_FRONTEND="ON" -DENABLE_OV_PYTORCH_FRONTEND="ON" -DENABLE_SYSTEM_TBB="ON" -DENABLE_TBBBIND_2_5="OFF" -DENABLE_SYSTEM_PUGIXML="ON" -DENABLE_SYSTEM_PROTOBUF="ON" -DENABLE_SYSTEM_SNAPPY="ON" -DENABLE_SYSTEM_FLATBUFFERS="ON" -DENABLE_SYSTEM_OPENCL="ON" -DENABLE_GAPI_PREPROCESSING="ON" -DBUILD_SHARED_LIBS="ON" -DCPACK_GENERATOR="CONAN" -DENABLE_PROFILING_ITT="OFF" -DENABLE_PYTHON="OFF" -DENABLE_PROXY="OFF" -DENABLE_WHEEL="OFF" -DENABLE_CPPLINT="OFF" -DENABLE_NCC_STYLE="OFF" -DENABLE_SAMPLES="OFF" -DENABLE_TEMPLATE="OFF" -DCMAKE_POLICY_DEFAULT_CMP0091="NEW" "F:/.conan2/p/openvac7fc2c3b20db/s/src" --fresh -- Using Conan toolchain: F:/.conan2/p/b/openvdff378fa94719/b/build/generators/conan_toolchain.cmake -- Conan toolchain: Including user_toolchain: F:/.conan2/profiles/disable_vcpkg.cmake -- Conan toolchain: Including user_toolchain: F:/.conan2/profiles/limit_sdkver.cmake -- Conan user toolchain: CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION_MAXIMUM=10.0.22621.0 -- Conan toolchain: CMAKE_GENERATOR_TOOLSET=v142 -- Conan toolchain: Setting CMAKE_MSVC_RUNTIME_LIBRARY=$<$:MultiThreadedDebugDLL> -- Conan toolchain: C++ Standard 17 with extensions OFF -- Conan toolchain: Setting BUILD_SHARED_LIBS = ON -- Selecting Windows SDK version 10.0.22621.0 to target Windows 10.0.22631. -- The C compiler identification is MSVC 19.29.30154.0 -- The CXX compiler identification is MSVC 19.29.30154.0 -- Detecting C compiler ABI info -- Detecting C compiler ABI info - done -- Check for working C compiler: C:/Program Files/Microsoft Visual Studio/2022/Professional/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe - skipped -- Detecting C compile features -- Detecting C compile features - done -- Detecting CXX compiler ABI info -- Detecting CXX compiler ABI info - done -- Check for working CXX compiler: C:/Program Files/Microsoft Visual Studio/2022/Professional/VC/Tools/MSVC/14.29.30133/bin/HostX64/x64/cl.exe - skipped -- Detecting CXX compile features -- Detecting CXX compile features - done -- OpenVINO version is 2023.2.0 (Build 000) -- Performing Test CMAKE_HAVE_LIBC_PTHREAD -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed -- Looking for pthread_create in pthreads -- Looking for pthread_create in pthreads - not found -- Looking for pthread_create in pthread -- Looking for pthread_create in pthread - not found -- Found Threads: TRUE -- Performing Test SUGGEST_OVERRIDE_SUPPORTED -- Performing Test SUGGEST_OVERRIDE_SUPPORTED - Failed -- Performing Test UNUSED_BUT_SET_VARIABLE_SUPPORTED -- Performing Test UNUSED_BUT_SET_VARIABLE_SUPPORTED - Failed -- OpenVINO Runtime enabled features: -- -- CI_BUILD_NUMBER: 2023.2.0-000-- -- CPACK_GENERATOR = CONAN -- ENABLE_LTO = OFF -- OS_FOLDER = OFF -- USE_BUILD_TYPE_SUBFOLDER = OFF -- CMAKE_COMPILE_WARNING_AS_ERROR = OFF -- ENABLE_QSPECTRE = OFF -- ENABLE_INTEGRITYCHECK = OFF -- ENABLE_SANITIZER = OFF -- ENABLE_UB_SANITIZER = OFF -- ENABLE_THREAD_SANITIZER = OFF -- ENABLE_COVERAGE = OFF -- ENABLE_SSE42 = ON -- ENABLE_AVX2 = ON -- ENABLE_AVX512F = ON -- BUILD_SHARED_LIBS = ON -- ENABLE_LIBRARY_VERSIONING = OFF -- ENABLE_FASTER_BUILD = OFF -- ENABLE_CPPLINT = OFF -- ENABLE_CPPLINT_REPORT = OFF -- ENABLE_CLANG_FORMAT = OFF -- ENABLE_NCC_STYLE = OFF -- ENABLE_UNSAFE_LOCATIONS = OFF -- ENABLE_FUZZING = OFF -- ENABLE_PROXY = OFF -- ENABLE_INTEL_CPU = ON -- ENABLE_ARM_COMPUTE_CMAKE = OFF -- ENABLE_TESTS = OFF -- ENABLE_INTEL_GPU = ON -- ENABLE_ONEDNN_FOR_GPU = ON -- ENABLE_DEBUG_CAPS = OFF -- ENABLE_GPU_DEBUG_CAPS = OFF -- ENABLE_CPU_DEBUG_CAPS = OFF -- ENABLE_PROFILING_ITT = OFF -- ENABLE_PROFILING_FILTER = ALL -- ENABLE_PROFILING_FIRST_INFERENCE = ON -- SELECTIVE_BUILD = OFF -- ENABLE_DOCS = OFF -- ENABLE_PKGCONFIG_GEN = OFF -- THREADING = TBB -- ENABLE_TBBBIND_2_5 = OFF -- ENABLE_TBB_RELEASE_ONLY = OFF -- ENABLE_INTEL_GNA = OFF -- ENABLE_INTEL_GNA_DEBUG = OFF -- ENABLE_V7_SERIALIZE = OFF -- ENABLE_IR_V7_READER = OFF -- ENABLE_GAPI_PREPROCESSING = ON -- ENABLE_MULTI = ON -- ENABLE_AUTO = ON -- ENABLE_AUTO_BATCH = ON -- ENABLE_HETERO = ON -- ENABLE_TEMPLATE = OFF -- ENABLE_PLUGINS_XML = OFF -- GAPI_TEST_PERF = OFF -- ENABLE_FUNCTIONAL_TESTS = OFF -- ENABLE_SAMPLES = OFF -- ENABLE_OV_ONNX_FRONTEND = ON -- ENABLE_OV_PADDLE_FRONTEND = ON -- ENABLE_OV_IR_FRONTEND = ON -- ENABLE_OV_PYTORCH_FRONTEND = ON -- ENABLE_OV_IR_FRONTEND = ON -- ENABLE_OV_TF_FRONTEND = ON -- ENABLE_OV_TF_LITE_FRONTEND = ON -- ENABLE_SNAPPY_COMPRESSION = ON -- ENABLE_STRICT_DEPENDENCIES = OFF -- ENABLE_SYSTEM_TBB = ON -- ENABLE_SYSTEM_PUGIXML = ON -- ENABLE_SYSTEM_FLATBUFFERS = ON -- ENABLE_SYSTEM_OPENCL = ON -- ENABLE_SYSTEM_PROTOBUF = ON -- ENABLE_SYSTEM_SNAPPY = ON -- ENABLE_PYTHON_PACKAGING = OFF -- ENABLE_OPENVINO_DEBUG = OFF -- -- CMAKE_VERSION ......................... 3.29.8 -- OpenVINO_SOURCE_DIR ................... F:/.conan2/p/openvac7fc2c3b20db/s/src -- OpenVINO_BINARY_DIR ................... F:/.conan2/p/b/openvdff378fa94719/b/build -- CMAKE_GENERATOR ....................... Visual Studio 17 2022 -- CPACK_GENERATOR ....................... CONAN -- CMAKE_C_COMPILER_ID ................... MSVC -- CMAKE_CXX_COMPILER_ID ................. MSVC -- CMAKE_CXX_STANDARD .................... 17 -- CMAKE_CONFIGURATION_TYPES ............. Debug Release MinSizeRel RelWithDebInfo -- CMAKE_GENERATOR_PLATFORM .............. x64 -- CMAKE_GENERATOR_PLATFORM .............. x64 -- CMAKE_GENERATOR_PLATFORM .............. x64 -- CMAKE_GENERATOR_TOOLSET ............... v142 -- CMAKE_TOOLCHAIN_FILE .................. F:/.conan2/p/b/openvdff378fa94719/b/build/generators/conan_toolchain.cmake -- Conan: Target declared 'pugixml::pugixml' -- Conan: Component target declared 'protobuf::libprotobuf' -- Conan: Component target declared 'protobuf::libprotoc' -- Conan: Target declared 'protobuf::protobuf' -- Conan: Target declared 'ZLIB::ZLIB' -- Conan: Including build module from 'F:/.conan2/p/b/protoa6c757f4d3132/p/lib/cmake/protobuf/protobuf-generate.cmake' -- Conan: Including build module from 'F:/.conan2/p/b/protoa6c757f4d3132/p/lib/cmake/protobuf/protobuf-module.cmake' -- Conan: Including build module from 'F:/.conan2/p/b/protoa6c757f4d3132/p/lib/cmake/protobuf/protobuf-options.cmake' -- Conan: Including build module from 'F:/.conan2/p/b/protoa6c757f4d3132/p/lib/cmake/protobuf/protobuf-conan-protoc-target.cmake' -- Conan: Component target declared 'flatbuffers::libflatbuffers' -- Conan: Target declared 'flatbuffers::flatbuffers' -- Conan: Including build module from 'F:/.conan2/p/b/flatb71a17782f7317/p/lib/cmake/FlatcTargets.cmake' -- Conan: Including build module from 'F:/.conan2/p/b/flatb71a17782f7317/p/lib/cmake/BuildFlatBuffers.cmake' -- Conan: Component target declared 'Snappy::snappy' -- Cannot locate shared library: tbb_debug -- Cannot locate shared library: tbb_debug -- TBB (2021.10.0) is found at F:/.conan2/p/b/openvdff378fa94719/b/build/generators CMake Error at src/cmake/ov_parallel.cmake:75 (message): Failed to detect TBB library location Call Stack (most recent call first): src/cmake/install_tbb.cmake:18 (_ov_get_tbb_location) src/cmake/install_tbb.cmake:37 (_ov_detect_dynamic_tbbbind_2_5) src/CMakeLists.txt:11 (include) ``` --- src/cmake/ov_parallel.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmake/ov_parallel.cmake b/src/cmake/ov_parallel.cmake index 1c10f1c121d8bc..110e7fe185f63f 100644 --- a/src/cmake/ov_parallel.cmake +++ b/src/cmake/ov_parallel.cmake @@ -23,7 +23,7 @@ function(_ov_get_tbb_location tbb_target _tbb_lib_location_var) get_target_property(_imported_configs ${target} IMPORTED_CONFIGURATIONS) if(NOT _imported_configs) # if IMPORTED_CONFIGURATIONS property is not set, then set a common list - set(_imported_configs RELEASE NONE) + set(_imported_configs RELEASE DEBUG NONE) if(NOT OV_GENERATOR_MULTI_CONFIG) string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type) list(APPEND _imported_configs ${_build_type}) From c9deb2128ddad68d7a7abea64643527b708f75ad Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Mon, 21 Oct 2024 11:36:47 +0200 Subject: [PATCH 091/112] [IR FE] Ignore unrecognized xml rt_info entries (#27118) ### Details: - Ignores unrecognized `` entries instead of throwing ### Tickets: - CVS-155326 --- src/frontends/ir/src/ir_deserializer.cpp | 13 +++---------- src/frontends/ir/tests/rt_info_deserialization.cpp | 4 ++++ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index f9ddcf1e8c14a6..7c8b6e9d4b97ab 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -968,16 +968,9 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector - if (!getStrAttribute(item, "name", attribute_name)) { - std::stringstream ss; - item.print(ss); - OPENVINO_THROW("rt_info attribute has no \"name\" field: ", ss.str()); - } - if (!getStrAttribute(item, "version", attribute_version)) { - std::stringstream ss; - item.print(ss); - OPENVINO_THROW("rt_info attribute: ", attribute_name, " has no \"version\" field: ", ss.str()); - } + if (!getStrAttribute(item, "name", attribute_name) || !getStrAttribute(item, "version", attribute_version)) + continue; + const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), attribute_version.c_str()); auto attr = attrs_factory.create_by_type_info(type_info); if (!attr.empty()) { diff --git a/src/frontends/ir/tests/rt_info_deserialization.cpp b/src/frontends/ir/tests/rt_info_deserialization.cpp index 4313b4d19be515..466db1291e674a 100644 --- a/src/frontends/ir/tests/rt_info_deserialization.cpp +++ b/src/frontends/ir/tests/rt_info_deserialization.cpp @@ -405,11 +405,15 @@ TEST_F(RTInfoDeserialization, node_v11) { + + + + 1 22 From a3c07d582de7cae5e901184b5492f42f84a905a6 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Mon, 21 Oct 2024 14:02:09 +0400 Subject: [PATCH 092/112] [GPU] Added empty LoRA adapters support for onednn case (#27111) ### Tickets: - *[152852](https://jira.devtools.intel.com/browse/CVS-152852)* --- .../src/graph/impls/onednn/gemm_onednn.cpp | 30 +++++++++++++++---- .../impls/onednn/primitive_onednn_base.h | 6 +++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 637a391b7f9e65..767128a5be2950 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -31,9 +31,13 @@ struct gemm_onednn : typed_primitive_onednn_impl { auto dnnl_engine = engine.get_onednn_engine(); { + dnnl::memory input1_mem; auto& weights = instance.input_memory(1); auto offset = onednn::get_offset(instance.get_input_layout(1), _pd.dnnl::primitive_desc_base::weights_desc(0)); - args.insert({DNNL_ARG_WEIGHTS, weights.get_onednn_memory(_pd.weights_desc(0), offset)}); + if (instance.get_input_layout(1).count() != 0) { + input1_mem = weights.get_onednn_memory(_pd.weights_desc(0), offset); + } + args.insert({DNNL_ARG_WEIGHTS, input1_mem}); } if (instance.inputs_memory_count() == 3) { @@ -86,11 +90,16 @@ struct gemm_onednn : typed_primitive_onednn_impl { const auto& in0_l = in_layouts[0]; const auto& in1_l = in_layouts[1]; - size_t in0_batched_size = in0_l.count() / (in0_l.spatial(0) * in0_l.spatial(1)); - size_t in1_batched_size = in1_l.count() / (in1_l.spatial(0) * in1_l.spatial(1)); - size_t out_batched_size = out_l.count() / (out_l.spatial(0) * out_l.spatial(1)); + bool batched_dims_can_be_removed = false; + + if (in0_l.count() != 0 && in1_l.count() != 0) { + size_t in0_batched_size = in0_l.count() / (in0_l.spatial(0) * in0_l.spatial(1)); + size_t in1_batched_size = in1_l.count() / (in1_l.spatial(0) * in1_l.spatial(1)); + size_t out_batched_size = out_l.count() / (out_l.spatial(0) * out_l.spatial(1)); + + batched_dims_can_be_removed = in0_batched_size == 1 && in1_batched_size == 1 && out_batched_size == 1; + } - auto batched_dims_can_be_removed = in0_batched_size == 1 && in1_batched_size == 1 && out_batched_size == 1; if (gemm_with_bias) { const auto& bias_l = in_layouts[2]; size_t bias_batched_size = bias_l.count() / (bias_l.spatial(0) * bias_l.spatial(1)); @@ -434,6 +443,17 @@ struct gemm_onednn : typed_primitive_onednn_impl { return cldnn::make_unique(engine, config, attr, *prim_desc); } + + event::ptr execute_impl(const std::vector& events, typed_primitive_inst& instance) override { + if (instance.get_input_layout(0).count() == 0 || + instance.get_input_layout(1).count() == 0) { + stream& stream = instance.get_network().get_stream(); + stream.enqueue_barrier(); + return instance.output_memory_ptr()->fill(stream, false); + } + + return parent::execute_impl(events, instance); + } }; std::unique_ptr GemmImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 96834b6a03c35e..6a8f2cb57d275b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -455,9 +455,13 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { auto dnnl_engine = engine.get_onednn_engine(); { + dnnl::memory input_mem; auto& input = instance.input_memory(0); auto offset = onednn::get_offset(instance.get_input_layout(0), _pd.dnnl::primitive_desc_base::src_desc(0)); - args.insert({DNNL_ARG_SRC, input.get_onednn_memory(_pd.dnnl::primitive_desc_base::src_desc(0), offset)}); + if (instance.get_input_layout(0).count() != 0) { + input_mem = input.get_onednn_memory(_pd.dnnl::primitive_desc_base::src_desc(0), offset); + } + args.insert({DNNL_ARG_SRC, input_mem}); } { From 85253c4f6717d3b512821b784bd575fa92a777fd Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Mon, 21 Oct 2024 14:18:48 +0400 Subject: [PATCH 093/112] [ONNX] Update ONNX version for vcpkg (#27155) ### Details: - Delayed update ONNX version for vcpkg due to delay in the original repository ### Tickets: - N/A --- vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcpkg.json b/vcpkg.json index 4956cee14cae9d..7214195df49506 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -80,7 +80,7 @@ "dependencies": [ { "name": "onnx", - "version>=": "1.15.0" + "version>=": "1.16.2" }, { "name": "protobuf", From 1f41cbae5d7c4a12da3e23dd1f0a33db44c9f900 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Mon, 21 Oct 2024 13:25:37 +0100 Subject: [PATCH 094/112] Update NNCF WC documentation (#27101) Co-authored-by: Alexander Kozlov Co-authored-by: Tatiana Savina --- .../weight-compression.rst | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index 6348ca897c5ea5..47cfed977dc3df 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -161,15 +161,16 @@ trade-offs after optimization: `Larger Group Size`: Results in faster inference and a smaller model, but might compromise accuracy. -* ``ratio`` controls the ratio between INT4 and INT8_ASYM compressed layers in the model. +* ``ratio`` controls the ratio between the layers compressed to the precision defined + by ``mode`` and the rest of the layers that will be kept in the ``backup_mode`` in the optimized model. Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be - compressed to INT4, while the rest will be compressed to INT8_ASYM precision. The default - value for ratio is 1. + compressed to the precision defined by ``mode``, while the rest will be compressed to + ``backup_mode`` precision. The default value for ratio is 1. - `Higher Ratio (more INT4)`: Reduces the model size and increase inference speed but + `Higher Ratio (more layers set to mode precision)`: Reduces the model size and increase inference speed but might lead to higher accuracy degradation. - `Lower Ratio (more INT8_ASYM)`: Maintains better accuracy but results in a larger model size + `Lower Ratio (more layers set to backup_mode precision)`: Maintains better accuracy but results in a larger model size and potentially slower inference. In this example, 90% of the model's layers are quantized to INT4 asymmetrically with @@ -196,8 +197,11 @@ trade-offs after optimization: 4 bits. The method can sometimes result in reduced accuracy when used with Dynamic Quantization of activations. Requires dataset. +* ``gptq`` - boolean parameter that enables the GPTQ method for more accurate INT4 weight + quantization. Requires dataset. + * ``dataset`` - calibration dataset for data-aware weight compression. It is required - for some compression options, for example, ``scale_estimation`` or ``awq``. Some types + for some compression options, for example, ``scale_estimation``, ``gptq`` or ``awq``. Some types of ``sensitivity_metric`` can use data for precision selection. * ``sensitivity_metric`` - controls the metric to estimate the sensitivity of compressing @@ -226,6 +230,36 @@ trade-offs after optimization: * ``all_layers`` - boolean parameter that enables INT4 weight quantization of all Fully-Connected and Embedding layers, including the first and last layers in the model. +* ``lora_correction`` - boolean parameter that enables the LoRA Correction Algorithm + to further improve the accuracy of INT4 compressed models on top of other + algorithms - AWQ and Scale Estimation. + +* ``backup_mode`` - defines a backup precision for mixed-precision weight compression. + There are three modes: INT8_ASYM, INT8_SYM, and NONE, which retains + the original floating-point precision of the model weights (``INT8_ASYM`` is default value). + + +**Use synthetic data for LLM weight compression** + +It is possible to generate a synthetic dataset using the `nncf.data.generate_text_data` method for +data-aware weight compression. The method takes a language model (e.g. from `optimum.intel.openvino`) +and a tokenizer (e.g. from `transformers`) as input and returns the list of strings generated by the model. +Note that dataset generation takes time and depends on various conditions, like the model size, +requested dataset length or environment setup. Also, since the dataset is generated by the model output, +it does not guarantee significant accuracy improvement after compression. This method is recommended +only when a better dataset is not available. Refer to the +`example `__ +for details of the usage. + +.. code-block:: python + + from nncf import Dataset + from nncf.data import generate_text_data + + # Example: Generating synthetic dataset + synthetic_data = generate_text_data(model, tokenizer) + nncf_dataset = nncf.Dataset(synthetic_data, transform_fn) + For data-aware weight compression refer to the following `example `__. From d34cddac9676c943f9351ad31dadf20b06e5c812 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Mon, 21 Oct 2024 16:30:05 +0100 Subject: [PATCH 095/112] [NPUW] Support mixed precision models (#27130) Separated change from https://github.com/openvinotoolkit/openvino/pull/26263 --- .../al/include/intel_npu/al/config/npuw.hpp | 2 +- .../al/include/npuw_private_properties.hpp | 2 +- .../npuw/partitioning/online/compiler.cpp | 2 - .../plugin/npuw/partitioning/online/group.cpp | 12 ++++ .../plugin/npuw/partitioning/online/group.hpp | 6 ++ .../npuw/partitioning/online/repeated.hpp | 15 ++++- .../npuw/partitioning/online/snapshot.cpp | 64 +++++++++++++++++++ .../npuw/partitioning/online/snapshot.hpp | 6 +- .../plugin/npuw/partitioning/partitioning.cpp | 5 +- 9 files changed, 105 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp index b0ecf3cd45d152..f315d333d67ae4 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp @@ -35,7 +35,7 @@ DEFINE_OPT(NPUW_ONLINE_AVOID, std::string, "", npuw::partitioning::online::avoid DEFINE_OPT(NPUW_ONLINE_ISOLATE, std::string, "", npuw::partitioning::online::isolate, CompileTime); DEFINE_OPT(NPUW_ONLINE_NO_FOLD, std::string, "", npuw::partitioning::online::nofold, CompileTime); DEFINE_OPT(NPUW_ONLINE_MIN_SIZE, std::size_t, 10, npuw::partitioning::online::min_size, CompileTime); -DEFINE_OPT(NPUW_ONLINE_KEEP_BLOCKS, std::size_t, 10, npuw::partitioning::online::keep_blocks, CompileTime); +DEFINE_OPT(NPUW_ONLINE_KEEP_BLOCKS, std::size_t, 5, npuw::partitioning::online::keep_blocks, CompileTime); DEFINE_OPT(NPUW_ONLINE_KEEP_BLOCK_SIZE, std::size_t, 10, npuw::partitioning::online::keep_block_size, CompileTime); DEFINE_OPT(NPUW_ONLINE_DUMP_PLAN, std::string, "", npuw::partitioning::online::dump_plan, CompileTime); DEFINE_OPT(NPUW_PLAN, std::string, "", npuw::partitioning::plan, CompileTime); diff --git a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp index 834f90db9cf9ef..a3eb4ecfa8cb63 100644 --- a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp @@ -123,7 +123,7 @@ static constexpr ov::Property min_size{"NPUW_ONLINE_MIN_SIZE"}; * Used to control fusion term criteria in online partitioning. * Only compatible with online partitioning. * Possible values: Integer > 0. - * Default value: 10. + * Default value: 5. */ static constexpr ov::Property keep_blocks{"NPUW_ONLINE_KEEP_BLOCKS"}; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index a06a6f3bd1ced5..173091011d38fe 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -73,8 +73,6 @@ std::vector getAvoids(::intel_npu::Config& cfg) { std::string avoids_opt = cfg.getString<::intel_npu::NPUW_ONLINE_AVOID>(); if (avoids_opt.empty()) { - LOG_VERB(::intel_npu::NPUW_ONLINE_AVOID().key() - << " property is not set. NPU device will be prioritized for every subgraph."); return {}; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp index cfa9e451ffb149..2b2878481f1330 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp @@ -292,6 +292,10 @@ void Group::takeFlags(const Group::GPtr& gptr_other) { m_reptrack[layer].push_back(rep); } } + // Update weights precisions + for (const auto& wp : gptr_other->m_consts_precision) { + m_consts_precision.push_back(wp); + } // Update avoids for (const auto& device : gptr_other->avoidedTargets()) { avoid(device); @@ -417,6 +421,14 @@ std::unordered_set Group::interconnect(const Group::GPtr& gptr_pro return ics; } +void Group::addWeightsPrecision(const std::vector& prec) { + m_consts_precision.insert(m_consts_precision.end(), prec.begin(), prec.end()); +} + +const std::vector& Group::getConstsPrecision() const { + return m_consts_precision; +} + std::string Group::specialTags() const { std::string tags = ""; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp index 538eeb03bc851c..17527033173a82 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp @@ -81,6 +81,8 @@ class Group : public std::enable_shared_from_this { const std::set& avoidedTargets() const; const std::string& isolatedTag() const; std::string specialTags() const; + void addWeightsPrecision(const std::vector& prec); + const std::vector& getConstsPrecision() const; private: void includeExtraLayers(detail::OVNodeSet& input_layers, @@ -105,6 +107,10 @@ class Group : public std::enable_shared_from_this { std::set m_avoided_devices; std::string m_isol_tag = ""; + // Structure to keep track of mixed precision within initial model + // Note: partitioning is stable so keep it in a single vector + std::vector m_consts_precision; + // Unique repeated tag std::shared_ptr m_repeated = nullptr; // For each layer inside group, store it's history of repeated groups diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/repeated.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/repeated.hpp index fe34063fda211d..43eebc5f17ddb0 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/repeated.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/repeated.hpp @@ -66,13 +66,24 @@ struct hash +struct hash> { + inline size_t operator()(const std::vector& vec) const { + std::size_t seed = vec.size(); + for (const auto& s : vec) { + seed ^= s.hash() + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + template <> struct hash, std::string>> { inline size_t operator()(const std::tuple, std::string>& t) const { std::size_t seed = std::hash()(std::get<0>(t)) + 0x9e3779b9; - seed ^= std::hash()(std::get<2>(t)) + 0x9e3779b9; + seed ^= std::hash()(std::get<2>(t)) + 0x9e3779b9 + (seed << 6) + (seed >> 2); for (const auto& s : std::get<1>(t)) { - seed ^= std::hash()(s) + 0x9e3779b9; + seed ^= std::hash()(s) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } return seed; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 4cdc92ffc92d25..c8a27c47665021 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -45,11 +45,35 @@ bool isOp(const std::shared_ptr& node) { } return true; } + +std::vector getConstsPrecision(const std::shared_ptr& node) { + NPUW_ASSERT(!ov::op::util::is_constant(node) && !ov::op::util::is_parameter(node) && + !ov::op::util::is_output(node)); + + std::vector precisions; + + for (size_t i = 0; i < node->inputs().size(); ++i) { + auto target_input = node->get_input_source_output(i); + auto ov_node_parent = target_input.get_node()->shared_from_this(); + + if (ov::is_type(ov_node_parent)) { + auto target_op_input = ov_node_parent->get_input_source_output(0); + auto parent_op_node = target_op_input.get_node()->shared_from_this(); + + if (ov::op::util::is_constant(parent_op_node)) { + precisions.push_back(parent_op_node->get_element_type()); + } + } + } + + return precisions; +} } // namespace detail } // namespace online } // namespace npuw } // namespace ov +using ov::npuw::online::detail::getConstsPrecision; using ov::npuw::online::detail::isOp; void Snapshot::buildGraph() { @@ -68,6 +92,7 @@ void Snapshot::buildGraph() { auto nh = m_graph->create(); auto group = std::make_shared(ov_node, gid, nh, m_graph, shared_from_this()); + group->addWeightsPrecision(getConstsPrecision(ov_node)); m_graph->meta(nh).set(group); m_node_to_gr->emplace(std::make_pair(ov_node, group)); ++gid; @@ -126,6 +151,44 @@ void Snapshot::buildGraph() { LOG_INFO("DONE."); } +void Snapshot::splitMixedPrecision() { + LOG_INFO("Online partitioning: executing splitMixedPrecision pass..."); + LOG_BLOCK(); + + auto reptag_to_gset = repeating(); + // Iterate over repeated blocks + for (const auto& elem : reptag_to_gset) { + auto reptag = elem.first; + auto gset = elem.second; + + // Fill a map of ordered consts precisions to a Group + std::unordered_map, GPtrSet> prec_to_new_gset; + for (const auto& gptr : gset) { + prec_to_new_gset[gptr->getConstsPrecision()].insert(gptr); + } + + // In case all precisions match - skip + if (prec_to_new_gset.size() == 1) { + continue; + } + + // Otherwise need to split repeated block based on consts precisions + for (const auto& elem : prec_to_new_gset) { + // Assign new reptags - basically create a new repeated block + std::shared_ptr rep = std::make_shared(); + + LOG_VERB("Identified mixed precision, splitting a new repeated block of " << elem.second.size() + << " groups."); + + for (const auto& gptr : elem.second) { + gptr->setRepeated(rep); + } + } + } + + LOG_INFO("DONE"); +} + void Snapshot::singleGroup() { LOG_INFO("Online partitioning: executing singleGroup pass..."); LOG_BLOCK(); @@ -458,6 +521,7 @@ void Snapshot::repeatedBlocks(Snapshot::CB&& on_done) { return; // FROM top-level repeat! } }); + splitMixedPrecision(); cleanUpUniques(); LOG_INFO("Number of groups after compiler pass: " << graphSize()); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp index 6da1a6d98939bb..0ce6766d45850f 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp @@ -18,8 +18,11 @@ namespace online { namespace detail { // At partitioning level we exclude some "non-Ops" to not interfere with the passes. -// We include some of them back to properly link everything at plugin level +// We include some of them back to properly link everything at plugin level. bool isOp(const std::shared_ptr& node); +// Find Const->Convert->Node if any and return Const precisions. +// Used for mixed-precision models to properly identify repeated blocks. +std::vector getConstsPrecision(const std::shared_ptr& node); } // namespace detail // Core part of the partitioning algorithm which implements a list of graph passes. @@ -69,6 +72,7 @@ class Snapshot : public std::enable_shared_from_this { void identifyUniques(); void mergeUniques(); void mergeTriangles(); + void splitMixedPrecision(); void cleanUpUniques(); void afterUniques(); void markInternalCompute(); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index f12350e8952eaa..6c7f996acca22f 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -111,7 +111,7 @@ ov::npuw::Ensemble load_groups(const std::shared_ptr& model, const st std::ifstream ifs(path_to_plan); if (!ifs) { - LOG_ERROR("Couldn't open " << ::intel_npu::NPUW_PLAN().key() << "pointing to " << path_to_plan << "!"); + LOG_ERROR("Couldn't open " << ::intel_npu::NPUW_PLAN().key() << " pointing to " << path_to_plan << "!"); return {}; } @@ -276,6 +276,7 @@ class Partitioner { if (!ov::is_type(node_ptr)) { OPENVINO_THROW("NPUW: trying to get a unique name of a non-Constant node"); } + // FIXME: cache this return node_ptr->get_friendly_name() + " with meta " + ov::npuw::online::util::getMetaDesc(node_ptr) + " with output " + (*node_ptr->output(0).get_target_inputs().begin()).get_node()->description(); } @@ -2160,7 +2161,7 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr(); if (file_path.empty()) { - LOG_WARN("No " << ::intel_npu::NPUW_PLAN().key() << " property is provided! Using online partitioning."); + LOG_INFO("No " << ::intel_npu::NPUW_PLAN().key() << " property is provided! Using online partitioning."); ens = ov::npuw::online::buildPartitioning(model, cfg); } else { ens = load_groups(model, file_path); From 5f4a445c8d588b37f978cd812661b439da2d63e8 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Mon, 21 Oct 2024 19:41:24 +0400 Subject: [PATCH 096/112] [CPU] CACHE_DIR hash optimization (#25624) ### Details: - *JIT implementation of the hash function in the ConstantWriter* ### Tickets: - *127331* --- src/core/CMakeLists.txt | 5 +- .../dev_api/openvino/runtime/compute_hash.hpp | 20 + src/core/reference/CMakeLists.txt | 3 - .../reference/utils}/jit_generator.hpp | 70 +- .../reference/utils/registers_pool.hpp | 247 +++++ src/core/reference/src/op/convert.cpp | 6 +- .../src/{op => utils}/jit_generator.cpp | 19 +- .../reference/src/utils/registers_pool.cpp | 106 ++ src/core/src/pass/serialize.cpp | 105 +- src/core/src/runtime/compute_hash.cpp | 918 ++++++++++++++++++ 10 files changed, 1410 insertions(+), 89 deletions(-) create mode 100644 src/core/dev_api/openvino/runtime/compute_hash.hpp rename src/core/reference/{src/op => include/openvino/reference/utils}/jit_generator.hpp (59%) create mode 100644 src/core/reference/include/openvino/reference/utils/registers_pool.hpp rename src/core/reference/src/{op => utils}/jit_generator.cpp (91%) create mode 100644 src/core/reference/src/utils/registers_pool.cpp create mode 100644 src/core/src/runtime/compute_hash.cpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index bc42ffca8a3cf6..5ea4a21b705489 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -49,6 +49,9 @@ target_include_directories(openvino_core_dev INTERFACE $ $) +target_include_directories(openvino_core_dev SYSTEM INTERFACE + $:$>>) + target_link_libraries(openvino_core_dev INTERFACE openvino::itt openvino::util) set_target_properties(openvino_core_dev PROPERTIES EXPORT_NAME core::dev) @@ -81,7 +84,7 @@ if(ENABLE_SYSTEM_PUGIXML) set_target_properties(openvino_core_obj PROPERTIES NO_SYSTEM_FROM_IMPORTED ON) endif() -target_compile_definitions(openvino_core_obj PRIVATE IMPLEMENT_OPENVINO_API) +target_compile_definitions(openvino_core_obj PRIVATE IMPLEMENT_OPENVINO_API XBYAK_NO_OP_NAMES XBYAK64) ov_build_target_faster(openvino_core_obj UNITY diff --git a/src/core/dev_api/openvino/runtime/compute_hash.hpp b/src/core/dev_api/openvino/runtime/compute_hash.hpp new file mode 100644 index 00000000000000..47a90d589be4ee --- /dev/null +++ b/src/core/dev_api/openvino/runtime/compute_hash.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace runtime { + +/** + * @brief Computes the hash value for the input data + * @param src A pointer to the input data + * @param size The length of the input data in bytes + */ +size_t compute_hash(const void* src, size_t size); + +} // namespace runtime +} // namespace ov diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt index f7874964233cf5..b62cf02f23f4f1 100644 --- a/src/core/reference/CMakeLists.txt +++ b/src/core/reference/CMakeLists.txt @@ -50,9 +50,6 @@ target_include_directories(${TARGET_NAME} PUBLIC $ $) -target_include_directories(${TARGET_NAME} SYSTEM PRIVATE - $:$>>) - find_package(Threads REQUIRED) target_link_libraries(${TARGET_NAME} PRIVATE Threads::Threads openvino::core::dev) diff --git a/src/core/reference/src/op/jit_generator.hpp b/src/core/reference/include/openvino/reference/utils/jit_generator.hpp similarity index 59% rename from src/core/reference/src/op/jit_generator.hpp rename to src/core/reference/include/openvino/reference/utils/jit_generator.hpp index b4b9cd7a60c23f..539f686020049c 100644 --- a/src/core/reference/src/op/jit_generator.hpp +++ b/src/core/reference/include/openvino/reference/utils/jit_generator.hpp @@ -15,7 +15,6 @@ namespace ov { namespace reference { namespace jit { -#ifdef XBYAK64 static const Xbyak::Operand::Code abi_save_gpr_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, @@ -23,28 +22,42 @@ static const Xbyak::Operand::Code abi_save_gpr_regs[] = { Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, -# ifdef _WIN32 +#ifdef _WIN32 Xbyak::Operand::RDI, Xbyak::Operand::RSI, -# endif +#endif }; -# ifdef _WIN32 -# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX -# else -# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI -# endif -#endif // XBYAK64 +#ifdef _WIN32 +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RCX) // RCX +#else +# define abi_param1 Xbyak::Reg64(Xbyak::Operand::RDI) // RDI +#endif -class Generator : public Xbyak::CodeGenerator { - static constexpr size_t xmm_len = 16; +typedef enum { + isa_any, + sse42, + avx, + avx2, + avx512_common, + avx512_core, + avx512_core_vnni, + avx512_mic, + avx512_mic_4ops, + avx512_core_bf16, + avx512_vpopcnt, + fp16, + pclmulqdq, + vpclmulqdq +} cpu_isa_t; +class Generator : public Xbyak::CodeGenerator { #ifdef _WIN32 - static constexpr size_t xmm_to_preserve_start = 6; - static constexpr size_t xmm_to_preserve = 10; + static constexpr size_t xmm_to_preserve_start = 6llu; + static constexpr size_t xmm_to_preserve = 10llu; #else - static constexpr size_t xmm_to_preserve_start = 0; - static constexpr size_t xmm_to_preserve = 0; + static constexpr size_t xmm_to_preserve_start = 0lu; + static constexpr size_t xmm_to_preserve = 0lu; #endif static const size_t num_abi_save_gpr_regs = sizeof(abi_save_gpr_regs) / sizeof(abi_save_gpr_regs[0]); @@ -52,29 +65,19 @@ class Generator : public Xbyak::CodeGenerator { const Xbyak::Reg64 reg_EVEX_max_8b_offt; static constexpr int EVEX_max_8b_offt = 0x200; + size_t m_vlen = ymm_len; public: - const Xbyak::Reg64 param = abi_param1; + static constexpr size_t xmm_len = 16lu; + static constexpr size_t ymm_len = 32lu; + static constexpr size_t zmm_len = 64lu; - typedef enum { - isa_any, - sse42, - avx, - avx2, - avx512_common, - avx512_core, - avx512_core_vnni, - avx512_mic, - avx512_mic_4ops, - avx512_core_bf16, - avx512_vpopcnt, - fp16 - } cpu_isa_t; + const Xbyak::Reg64 param = abi_param1; static bool mayiuse(const cpu_isa_t cpu_isa); static bool is_x64(); - Generator(void* code_ptr = nullptr, size_t code_size = 16 * 1024); + Generator(cpu_isa_t isa = avx2, void* code_ptr = nullptr, size_t code_size = 16lu * 1024lu); void preamble(); void postamble(); @@ -85,7 +88,12 @@ class Generator : public Xbyak::CodeGenerator { template void copy(const Xbyak::Reg64& dst, const Xbyak::Reg64& src, const Xbyak::Reg64& size); + + size_t get_vlen() { + return m_vlen; + } }; + } // namespace jit } // namespace reference } // namespace ov diff --git a/src/core/reference/include/openvino/reference/utils/registers_pool.hpp b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp new file mode 100644 index 00000000000000..62dfe01ec4ef1d --- /dev/null +++ b/src/core/reference/include/openvino/reference/utils/registers_pool.hpp @@ -0,0 +1,247 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "openvino/core/except.hpp" +#include "openvino/reference/utils/jit_generator.hpp" +namespace ov { +namespace reference { +namespace jit { + +class RegistersPool { +public: + using Ptr = std::shared_ptr; + using WeakPtr = std::weak_ptr; + static constexpr int any_idx = -1; + + template + class Reg { + friend class RegistersPool; + + public: + Reg() {} + Reg(const RegistersPool::Ptr& regPool) { + initialize(regPool); + } + Reg(const RegistersPool::Ptr& regPool, int requested_idx) { + initialize(regPool, requested_idx); + } + ~Reg() { + release(); + } + Reg& operator=(Reg&& other) noexcept { + release(); + reg = other.reg; + regPool = std::move(other.regPool); + return *this; + } + Reg(Reg&& other) noexcept : reg(other.reg), regPool(std::move(other.regPool)) {} + operator TReg&() { + ensure_valid(); + return reg; + } + operator const TReg&() const { + ensure_valid(); + return reg; + } + operator Xbyak::RegExp() const { + ensure_valid(); + return reg; + } + int getIdx() const { + ensure_valid(); + return reg.getIdx(); + } + friend Xbyak::RegExp operator+(const Reg& lhs, const Xbyak::RegExp& rhs) { + lhs.ensure_valid(); + return lhs.operator Xbyak::RegExp() + rhs; + } + void release() { + if (auto pool = regPool.lock()) { + pool->return_to_pool(reg); + regPool.reset(); + } + } + bool is_initialized() const { + return !regPool.expired(); + } + + private: + void ensure_valid() const { + if (!is_initialized()) { + OPENVINO_THROW("RegistersPool::Reg is either not initialized or released"); + } + } + + void initialize(const RegistersPool::Ptr& pool, int requested_idx = any_idx) { + release(); + reg = TReg(pool->template get_free(requested_idx)); + regPool = pool; + } + + private: + TReg reg; + RegistersPool::WeakPtr regPool; + }; + + virtual ~RegistersPool() { + check_unique_and_update(false); + } + + template + static Ptr create(std::initializer_list regsToExclude); + + static Ptr create(cpu_isa_t isa, std::initializer_list regsToExclude); + + template + size_t count_free() const { + if (std::is_base_of::value) { + return m_simd_set.count_unused(); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + return m_general_set.count_unused(); + } else if (std::is_same::value) { + return count_unused_opmask(); + } + } + +protected: + class PhysicalSet { + public: + PhysicalSet(int size) : m_is_free_index_vector(size, true) {} + + void set_as_used(size_t reg_idx); + + void set_as_unused(size_t reg_idx); + + size_t get_unused(size_t requested_idx); + + void exclude(Xbyak::Reg reg) { + m_is_free_index_vector.at(reg.getIdx()) = false; + } + + size_t count_unused() const; + + private: + size_t get_first_free_index(); + + private: + std::vector m_is_free_index_vector; + }; + + virtual int get_free_opmask(int requested_idx) { + OPENVINO_THROW("get_free_opmask: The Opmask is not supported in current instruction set"); + } + virtual void return_opmask_to_pool(int idx) { + OPENVINO_THROW("return_opmask_to_pool: The Opmask is not supported in current instruction set"); + } + virtual size_t count_unused_opmask() const { + OPENVINO_THROW("count_unused_opmask: The Opmask is not supported in current instruction set"); + } + + RegistersPool(int simd_registers_number); + + RegistersPool(std::initializer_list regsToExclude, int simd_registers_number); + +private: + template + int get_free(int requested_idx) { + if (std::is_base_of::value) { + auto idx = m_simd_set.get_unused(requested_idx); + m_simd_set.set_as_used(idx); + return static_cast(idx); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + auto idx = m_general_set.get_unused(requested_idx); + m_general_set.set_as_used(idx); + return static_cast(idx); + } else if (std::is_same::value) { + return get_free_opmask(requested_idx); + } + } + + template + void return_to_pool(const TReg& reg) { + if (std::is_base_of::value) { + m_simd_set.set_as_unused(reg.getIdx()); + } else if (std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value) { + m_general_set.set_as_unused(reg.getIdx()); + } else if (std::is_same::value) { + return_opmask_to_pool(reg.getIdx()); + } + } + + void check_unique_and_update(bool isCtor = true); + + PhysicalSet m_general_set; + PhysicalSet m_simd_set; +}; + +template +class IsaRegistersPool : public RegistersPool { +public: + IsaRegistersPool(std::initializer_list regsToExclude) : RegistersPool(regsToExclude, 32) {} +}; + +template <> +class IsaRegistersPool : public RegistersPool { +public: + IsaRegistersPool() : RegistersPool(32) { + m_opmask_set.exclude( + Xbyak::Opmask(0)); // the Opmask(0) has special meaning for some instructions, like gather instruction + } + + IsaRegistersPool(std::initializer_list regsToExclude) : RegistersPool(regsToExclude, 32) { + for (auto& reg : regsToExclude) { + if (reg.isOPMASK()) { + m_opmask_set.exclude(reg); + } + } + } + + int get_free_opmask(int requested_idx) override { + auto idx = static_cast(m_opmask_set.get_unused(requested_idx)); + m_opmask_set.set_as_used(idx); + return idx; + } + + void return_opmask_to_pool(int idx) override { + m_opmask_set.set_as_unused(idx); + } + + size_t count_unused_opmask() const override { + return m_opmask_set.count_unused(); + } + +protected: + PhysicalSet m_opmask_set{8}; +}; + +template +RegistersPool::Ptr RegistersPool::create(std::initializer_list regsToExclude) { + return std::make_shared>(regsToExclude); +} + +inline RegistersPool::Ptr RegistersPool::create(cpu_isa_t isa, std::initializer_list regsToExclude) { +#define ISA_SWITCH_CASE(isa) \ + case isa: \ + return std::make_shared>(regsToExclude); + switch (isa) { + ISA_SWITCH_CASE(avx2) + ISA_SWITCH_CASE(avx512_core) + default: + OPENVINO_THROW("Invalid isa argument in RegistersPool::create(): ", isa); + } +#undef ISA_SWITCH_CASE +} + +} // namespace jit +} // namespace reference +} // namespace ov diff --git a/src/core/reference/src/op/convert.cpp b/src/core/reference/src/op/convert.cpp index 5054121b5615c0..034734afd8fd2a 100644 --- a/src/core/reference/src/op/convert.cpp +++ b/src/core/reference/src/op/convert.cpp @@ -7,7 +7,7 @@ #include "openvino/reference/utils/convert_util.hpp" #ifdef OV_CORE_USE_XBYAK_JIT -# include "jit_generator.hpp" +# include "openvino/reference/utils/jit_generator.hpp" #endif #ifdef OV_CORE_USE_INTRINSICS @@ -256,7 +256,7 @@ class jit_convert_array : public jit::Generator { template static fn_t get() { - if (is_x64() && mayiuse(avx) && mayiuse(avx2) && mayiuse(fp16)) { + if (is_x64() && mayiuse(jit::avx) && mayiuse(jit::avx2) && mayiuse(jit::fp16)) { static const jit_convert_array::context_t context{{sizeof(src_t), &jit::Generator::copy}, {sizeof(dst_t), &jit::Generator::copy}, jit_convert_vec, @@ -460,7 +460,7 @@ class jit_count_out_of_range : public jit::Generator { template static fn_t get() { - if (is_x64() && mayiuse(avx2)) { + if (is_x64() && mayiuse(jit::avx2)) { static const jit_count_out_of_range::context_t context{ {sizeof(data_t), &jit::Generator::copy}, jit_count_out_of_range_vec_prepare, diff --git a/src/core/reference/src/op/jit_generator.cpp b/src/core/reference/src/utils/jit_generator.cpp similarity index 91% rename from src/core/reference/src/op/jit_generator.cpp rename to src/core/reference/src/utils/jit_generator.cpp index 7d7da06d5da8d5..39dc31c0033f9f 100644 --- a/src/core/reference/src/op/jit_generator.cpp +++ b/src/core/reference/src/utils/jit_generator.cpp @@ -11,9 +11,10 @@ # endif # include -# include "jit_generator.hpp" +# include "openvino/core/except.hpp" # include "openvino/core/type/bfloat16.hpp" # include "openvino/core/type/float16.hpp" +# include "openvino/reference/utils/jit_generator.hpp" namespace ov { namespace reference { @@ -51,6 +52,10 @@ bool Generator::mayiuse(const cpu_isa_t cpu_isa) { return true && cpu.has(Cpu::tAVX512_VPOPCNTDQ); case fp16: return cpu.has(Cpu::tF16C); + case cpu_isa_t::pclmulqdq: + return cpu.has(Cpu::tPCLMULQDQ); + case cpu_isa_t::vpclmulqdq: + return cpu.has(Cpu::tVPCLMULQDQ); case isa_any: return true; } @@ -60,10 +65,18 @@ bool Generator::mayiuse(const cpu_isa_t cpu_isa) { bool Generator::is_x64() { return sizeof(void*) == 8; } -Generator::Generator(void* code_ptr, size_t code_size) +Generator::Generator(cpu_isa_t isa, void* code_ptr, size_t code_size) : Xbyak::CodeGenerator(code_size, code_ptr), size_of_abi_save_regs(num_abi_save_gpr_regs * rax.getBit() / 8 + xmm_to_preserve * xmm_len), - reg_EVEX_max_8b_offt(rbp) {} + reg_EVEX_max_8b_offt(rbp) { + if (isa == avx512_core) { + m_vlen = zmm_len; + } else if (isa == avx2) { + m_vlen = ymm_len; + } else { + OPENVINO_THROW("Unsupported isa: ", isa); + } +} void Generator::preamble() { if (xmm_to_preserve) { diff --git a/src/core/reference/src/utils/registers_pool.cpp b/src/core/reference/src/utils/registers_pool.cpp new file mode 100644 index 00000000000000..413fdcc3ed83cf --- /dev/null +++ b/src/core/reference/src/utils/registers_pool.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/core/visibility.hpp" + +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +# include "openvino/reference/utils/registers_pool.hpp" + +namespace ov { +namespace reference { +namespace jit { + +RegistersPool::RegistersPool(int simd_registers_number) : m_general_set(16), m_simd_set(simd_registers_number) { + check_unique_and_update(); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RSP)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RAX)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RCX)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RDI)); + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RBP)); +} + +RegistersPool::RegistersPool(std::initializer_list regsToExclude, int simd_registers_number) + : m_general_set(16), + m_simd_set(simd_registers_number) { + check_unique_and_update(); + for (auto& reg : regsToExclude) { + if (reg.isXMM() || reg.isYMM() || reg.isZMM()) { + m_simd_set.exclude(reg); + } else if (reg.isREG()) { + m_general_set.exclude(reg); + } + } + m_general_set.exclude(Xbyak::Reg64(Xbyak::Operand::RSP)); +} + +void RegistersPool::check_unique_and_update(bool is_ctor) { + static thread_local bool is_created = false; + if (is_ctor) { + if (is_created) { + OPENVINO_THROW("There should be only one instance of RegistersPool per thread"); + } + is_created = true; + } else { + is_created = false; + } +} + +void RegistersPool::PhysicalSet::set_as_used(size_t reg_idx) { + if (reg_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("reg_idx is out of bounds in RegistersPool::PhysicalSet::set_as_used()"); + } + if (!m_is_free_index_vector[reg_idx]) { + OPENVINO_THROW("Inconsistency in RegistersPool::PhysicalSet::set_as_used()"); + } + m_is_free_index_vector[reg_idx] = false; +} + +void RegistersPool::PhysicalSet::set_as_unused(size_t reg_idx) { + if (reg_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("reg_idx is out of bounds in RegistersPool::PhysicalSet::set_as_used()"); + } + if (m_is_free_index_vector[reg_idx]) { + OPENVINO_THROW("Inconsistency in RegistersPool::PhysicalSet::set_as_unused()"); + } + m_is_free_index_vector[reg_idx] = true; +} + +size_t RegistersPool::PhysicalSet::get_unused(size_t requested_idx) { + if (requested_idx == static_cast(any_idx)) { + return get_first_free_index(); + } else { + if (requested_idx >= m_is_free_index_vector.size()) { + OPENVINO_THROW("requested_idx is out of bounds in RegistersPool::PhysicalSet::get_unused()"); + } + if (!m_is_free_index_vector[requested_idx]) { + OPENVINO_THROW("The register with index #", requested_idx, " already used in the RegistersPool"); + } + return requested_idx; + } +} + +size_t RegistersPool::PhysicalSet::count_unused() const { + size_t count = 0; + for (const auto& isFree : m_is_free_index_vector) { + if (isFree) { + ++count; + } + } + return count; +} + +size_t RegistersPool::PhysicalSet::get_first_free_index() { + for (size_t c = 0; c < m_is_free_index_vector.size(); ++c) { + if (m_is_free_index_vector[c]) { + return c; + } + } + OPENVINO_THROW("Not enough registers in the RegistersPool"); +} + +} // namespace jit +} // namespace reference +} // namespace ov + +#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64 diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 409dcad066d7a6..3af6d2c4b5313f 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -23,6 +23,7 @@ #include "openvino/pass/constant_folding.hpp" #include "openvino/reference/convert.hpp" #include "openvino/runtime/aligned_buffer.hpp" +#include "openvino/runtime/compute_hash.hpp" #include "openvino/runtime/string_aligned_buffer.hpp" #include "openvino/util/file_util.hpp" #include "pugixml.hpp" @@ -30,6 +31,18 @@ #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" +namespace ov { +class OstreamHashWrapperBin final : public std::streambuf { + uint64_t m_res = 0lu; + +public: + uint64_t getResult() const { + return m_res; + } + std::streamsize xsputn(const char* s, std::streamsize n) override; +}; +} // namespace ov + namespace { // helpers template std::string join(const Container& c, const char* glue = ", ") { @@ -69,23 +82,6 @@ std::string translate_type_name(const std::string& name) { return name; } -size_t hash_combine(const void* v, int64_t size) { - constexpr auto cel_size = sizeof(size_t); - auto seed = static_cast(size); - const auto data = static_cast(v); - const auto d_end = std::next(data, size / cel_size); - // The constant value used as a magic number has been - // traditionally used e.g. in boost library's hash_combine. - // It happens to be derived from the golden ratio. - for (auto d = data; d != d_end; ++d) { - seed ^= *d + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - size_t last_bytes{0}; - std::memcpy(&last_bytes, d_end, size % cel_size); - seed ^= last_bytes + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; -} - class ConstantWriter { public: using FilePosition = int64_t; @@ -95,16 +91,18 @@ class ConstantWriter { ConstantWriter(std::ostream& bin_data, bool enable_compression = true) : m_binary_output(bin_data), m_enable_compression(enable_compression), - m_blob_offset(bin_data.tellp()) {} + m_blob_offset(bin_data.tellp()) { + m_write_hash_value = (dynamic_cast(bin_data.rdbuf())) ? true : false; + } FilePosition write(const char* ptr, size_t size, - size_t* new_size, + size_t& new_size, bool compress_to_fp16 = false, ov::element::Type src_type = ov::element::dynamic) { const FilePosition write_pos = m_binary_output.tellp(); const auto offset = write_pos - m_blob_offset; - *new_size = size; + new_size = size; if (!m_enable_compression) { if (!compress_to_fp16) { @@ -112,7 +110,7 @@ class ConstantWriter { } else { OPENVINO_ASSERT(size % src_type.size() == 0); auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size); - m_binary_output.write(fp16_buffer.get(), *new_size); + m_binary_output.write(fp16_buffer.get(), new_size); } return offset; } else { @@ -132,18 +130,24 @@ class ConstantWriter { // the same hash for {2, 2} and {0, 128} arrays. // But even strong hashing algorithms sometimes give collisions. // Therefore we always have to compare values when finding a match in the hash multimap. - const HashValue hash = hash_combine(ptr_to_write, *new_size); + const HashValue hash = ov::runtime::compute_hash(ptr_to_write, new_size); + auto found = m_hash_to_file_positions.find(hash); // iterate over all matches of the key in the multimap while (found != m_hash_to_file_positions.end()) { - if (memcmp(ptr, found->second.second, size) == 0) + if (memcmp(ptr, found->second.second, size) == 0) { return found->second.first; + } found++; } // Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the ostream, // we store pointer to the original uncompressed blob. m_hash_to_file_positions.insert({hash, {offset, static_cast(ptr)}}); - m_binary_output.write(ptr_to_write, *new_size); + if (m_write_hash_value) { + m_binary_output.write(reinterpret_cast(&hash), sizeof(uint64_t)); + } else { + m_binary_output.write(ptr_to_write, new_size); + } } return offset; } @@ -152,17 +156,17 @@ class ConstantWriter { static std::unique_ptr compress_data_to_fp16(const char* ptr, size_t size, ov::element::Type src_type, - size_t* compressed_size) { + size_t& compressed_size) { auto num_src_elements = size / src_type.size(); - *compressed_size = num_src_elements * ov::element::f16.size(); + compressed_size = num_src_elements * ov::element::f16.size(); if (src_type == ov::element::f32) { - auto new_ptr = std::unique_ptr(new char[*compressed_size]); + auto new_ptr = std::unique_ptr(new char[compressed_size]); auto dst_data = reinterpret_cast(new_ptr.get()); auto src_data = reinterpret_cast(ptr); ov::reference::convert_from_f32_to_f16_with_clamp(src_data, dst_data, num_src_elements); return new_ptr; } else if (src_type == ov::element::f64) { - auto new_ptr = std::unique_ptr(new char[*compressed_size]); + auto new_ptr = std::unique_ptr(new char[compressed_size]); auto dst_data = reinterpret_cast(new_ptr.get()); auto src_data = reinterpret_cast(ptr); @@ -188,6 +192,7 @@ class ConstantWriter { ConstWritePositions m_hash_to_file_positions; std::ostream& m_binary_output; bool m_enable_compression; + bool m_write_hash_value = false; FilePosition m_blob_offset; // blob offset inside output stream }; @@ -531,7 +536,7 @@ class XmlSerializer : public ov::AttributeVisitor { int64_t offset = m_constant_write_handler.write(reinterpret_cast(header_ptr.get()), header_size, - &inter_size, + inter_size, m_compress_to_fp16, m_output_element_type); new_size += inter_size; @@ -554,7 +559,7 @@ class XmlSerializer : public ov::AttributeVisitor { m_constant_write_handler.write(raw_string_ptr, raw_string_size, - &inter_size, + inter_size, m_compress_to_fp16, m_output_element_type); new_size += inter_size; @@ -568,7 +573,7 @@ class XmlSerializer : public ov::AttributeVisitor { size_t new_size; int64_t offset = m_constant_write_handler.write(static_cast(a->get()->get_ptr()), size, - &new_size, + new_size, m_compress_to_fp16, m_output_element_type); @@ -1393,10 +1398,19 @@ bool pass::StreamSerialize::run_on_model(const std::shared_ptr& model /// -------- Hash calculation pass ------------- namespace { -template -static uint64_t hash_combine(uint64_t seed, const T& a) { - // Hash combine formula from boost - return seed ^ (std::hash()(a) + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +// Hash combine formula from boost for uint64_t. +inline uint64_t hash_combine(uint64_t h, uint64_t k) { + constexpr uint64_t m = 0xc6a4a7935bd1e995; + constexpr int r = 47; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + return h + 0xe6546b64; } class OstreamHashWrapper final : public std::streambuf { @@ -1408,28 +1422,23 @@ class OstreamHashWrapper final : public std::streambuf { } std::streamsize xsputn(const char* s, std::streamsize n) override { - // Reinterpret data as uint32_t and accumulate in uint64_t to avoid overflow fluctuations in parallel_sum. - auto* int_sum = reinterpret_cast(s); - const uint64_t n32 = n / sizeof(uint32_t); - - m_res += parallel_sum(n32, uint64_t(0lu), [&](size_t k) -> uint32_t { - return int_sum[k]; - }); - - const uint64_t rest = n % sizeof(uint32_t); - for (uint64_t i = 0lu; i < rest; i++) { - m_res += s[n - rest + i]; - } + uint64_t h = ov::runtime::compute_hash(s, n); + m_res = hash_combine(m_res, h); return n; } }; } // namespace +std::streamsize OstreamHashWrapperBin::xsputn(const char* s, std::streamsize n) { + m_res = hash_combine(m_res, *reinterpret_cast(s)); + return n; +} + bool pass::Hash::run_on_model(const std::shared_ptr& model) { RUN_ON_MODEL_SCOPE(Hash); OstreamHashWrapper xmlHash; - OstreamHashWrapper binHash; + OstreamHashWrapperBin binHash; std::ostream xml(&xmlHash); std::ostream bin(&binHash); diff --git a/src/core/src/runtime/compute_hash.cpp b/src/core/src/runtime/compute_hash.cpp new file mode 100644 index 00000000000000..c1a5a40c8638de --- /dev/null +++ b/src/core/src/runtime/compute_hash.cpp @@ -0,0 +1,918 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// The CRC computation is used for x86. +// The calculations were taken from the article +// "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction - Intel (December, 2009)". + +#include "openvino/runtime/compute_hash.hpp" + +#include +#include +#include + +#include "openvino/core/visibility.hpp" + +#if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)) +# define OV_CORE_USE_XBYAK_JIT +#endif + +#ifdef OV_CORE_USE_XBYAK_JIT +# include "openvino/core/parallel.hpp" +# include "openvino/reference/utils/registers_pool.hpp" +#endif // OV_CORE_USE_XBYAK_JIT + +namespace ov { +namespace runtime { + +#ifdef OV_CORE_USE_XBYAK_JIT + +using namespace ov::reference::jit; + +namespace jit { + +# define GET_OFF(field) offsetof(ComputeHashCallArgs, field) +# define getReg64() RegistersPool::Reg(m_registers_pool) +# define getVmm() RegistersPool::Reg(m_registers_pool) +# define getXmm() RegistersPool::Reg(m_registers_pool) + +enum KernelType { SINGLE_THREAD = 0, FIRST_THREAD, N_THREAD, FINAL_FOLD }; + +struct ComputeHashCompileParams { + KernelType type; +}; + +struct ComputeHashCallArgs { + const void* src_ptr = nullptr; + void* dst_ptr = nullptr; + const void* k_ptr = nullptr; + void* intermediate_ptr = nullptr; + uint64_t work_amount = 0lu; + uint64_t size = 0lu; + uint64_t threads_num = 1lu; +}; + +typedef void (*hash_kernel)(const ComputeHashCallArgs*); + +static const uint8_t SHUF_MASK[16] = {0b00001111, + 0b00001110, + 0b00001101, + 0b00001100, + 0b00001011, + 0b00001010, + 0b00001001, + 0b00001000, + 0b00000111, + 0b00000110, + 0b00000101, + 0b00000100, + 0b00000011, + 0b00000010, + 0b00000001, + 0b00000000}; + +constexpr uint64_t CRC_VAL = 0xffffffffffffffff; + +// POLYNOM(x) = 0x42F0E1EBA9EA3693 +constexpr uint64_t K_2 = 0x05f5c3c7eb52fab6; // x^(64*2) +constexpr uint64_t P_1 = 0x578d29d06cc4f872; // floor(x^128/P(x))-x^64 +constexpr uint64_t P_2 = 0x42f0e1eba9ea3693; // P(x)-x^64 +static const uint64_t K_PULL[] = { + K_2, // x^(64*2) + 0x4eb938a7d257740e, // x^(64*3) + 0x571bee0a227ef92b, // x^(64*4) + 0x44bef2a201b5200c, // x^(64*5) + 0x54819d8713758b2c, // x^(64*6) + 0x4a6b90073eb0af5a, // x^(64*7) + 0x5f6843ca540df020, // x^(64*8) + 0xddf4b6981205b83f, // x^(64*9) + 0x097c516e98bd2e73, // x^(64*10) + 0x0b76477b31e22e7b, // x^(64*11) + 0x9af04e1eff82d0dd, // x^(64*12) + 0x6e82e609297f8fe8, // x^(64*13) + 0xe464f4df5fb60ac1, // x^(64*14) + 0xb649c5b35a759cf2, // x^(64*15) + 0x05cf79dea9ac37d6, // x^(64*16) + 0x001067e571d7d5c2 // x^(64*17) +}; + +constexpr uint64_t K_2_3_OFF = 0lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_4_5_OFF = 1lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_6_7_OFF = 2lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_8_9_OFF = 3lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_10_11_OFF = 4lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_12_13_OFF = 5lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_14_15_OFF = 6lu * 2lu * sizeof(uint64_t); +constexpr uint64_t K_16_17_OFF = 7lu * 2lu * sizeof(uint64_t); + +class HashBase : public Generator { +protected: + void (*ker_fn)(const ComputeHashCallArgs*); + +public: + HashBase(cpu_isa_t isa) : Generator(isa) {} + + virtual void generate() = 0; + + void operator()(const ComputeHashCallArgs* args) { + ker_fn(args); + } + + virtual void create_kernel() { + generate(); + ker_fn = (decltype(ker_fn))getCode(); + OPENVINO_ASSERT(ker_fn, "[ CORE ] Could not generate kernel code."); + } +}; + +template +class ComputeHash : public HashBase { +public: + explicit ComputeHash(const ComputeHashCompileParams& jcp) : HashBase(isa), m_jcp(jcp) { + if (!mayiuse(cpu_isa_t::pclmulqdq)) { + OPENVINO_THROW( + "The current CPU does not support pclmulqdq instruction, which is required for the CRC algorithm."); + } + if (mayiuse(cpu_isa_t::vpclmulqdq)) { + is_vpclmulqdq = true; + } + } + + void generate() override { + m_registers_pool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0}); + + r64_src_ptr = getReg64(); + r64_dst_ptr = getReg64(); + r64_work_amount = getReg64(); + r64_k_ptr = getReg64(); + r64_aux = getReg64(); + v_k_2_3 = getVmm(); + v_shuf_mask = getVmm(); + auto v_dst = getVmm(); + + this->preamble(); + + initialize(v_dst); + bulk_fold(v_dst); + join(v_dst); + fold_to_128(v_dst); + fold_to_64(v_dst); + + this->postamble(); + m_registers_pool.reset(); + } + + static std::shared_ptr create(const ComputeHashCompileParams& params) { + auto kernel = std::make_shared(params); + OPENVINO_ASSERT(kernel, "[ CORE ] Could not create ComputeHash kernel."); + kernel->create_kernel(); + + return kernel; + } + +private: + using Vmm = typename std::conditional::type; + bool is_vpclmulqdq = false; + + ComputeHashCompileParams m_jcp; + RegistersPool::Ptr m_registers_pool; + + const Xbyak::Reg64 r64_params = abi_param1; + + RegistersPool::Reg r64_src_ptr; + RegistersPool::Reg r64_dst_ptr; + RegistersPool::Reg r64_work_amount; + RegistersPool::Reg r64_k_ptr; + RegistersPool::Reg r64_aux; + + // Vector registers + RegistersPool::Reg v_k_2_3; + RegistersPool::Reg v_shuf_mask; + + void initialize(const Vmm& v_dst); + + void bulk_fold(const Vmm& v_dst); + + void join(const Vmm& v_dst); + + void fold_to_128(const Vmm& v_dst); + + void fold_to_64(const Vmm& v_dst); + + void uni_vpxorq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src_0, const Xbyak::Xmm& v_src_1); + + void uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0); + + void uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0); + + void uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0); + + void partial_load(const Xbyak::Xmm& xmm_dst, const Xbyak::Address& src_addr, const Xbyak::Reg64& r64_load_num); + + void partial_load(const Xbyak::Ymm& ymm_dst, const Xbyak::Address& src_addr, const Xbyak::Reg64& r64_load_num); +}; + +template <> +void ComputeHash::uni_vpxorq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src_0, + const Xbyak::Xmm& v_src_1) { + vpxorq(v_dst, v_src_0, v_src_1); +} +template +void ComputeHash::uni_vpxorq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src_0, const Xbyak::Xmm& v_src_1) { + vpxor(v_dst, v_src_0, v_src_1); +} +template <> +void ComputeHash::uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0) { + vmovdqu64(v_dst, v_src_0); +} +template +void ComputeHash::uni_vmovdqu64(const Xbyak::Xmm& v_dst, const Xbyak::Operand& v_src_0) { + vmovdqu(v_dst, v_src_0); +} +template <> +void ComputeHash::uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0) { + vmovdqu64(v_dst, v_src_0); +} +template +void ComputeHash::uni_vmovdqu64(const Xbyak::Address& v_dst, const Xbyak::Xmm& v_src_0) { + vmovdqu(v_dst, v_src_0); +} +template <> +void ComputeHash::uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0) { + vbroadcasti64x2(v_dst, v_src_0); +} +template +void ComputeHash::uni_vbroadcasti64x2(const Xbyak::Ymm& v_dst, const Xbyak::Address& v_src_0) { + vbroadcasti128(v_dst, v_src_0); +} +template <> +void ComputeHash::partial_load(const Xbyak::Xmm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_mv_mask; + auto rOnes = getReg64(); + auto k_load_mask = RegistersPool::Reg(m_registers_pool); + + mov(rOnes, 0xFFFFFFFFFFFFFFFF); + cmp(r64_load_num, 0x3f); + jg(l_mv_mask); + + shlx(rOnes, rOnes, r64_load_num); + not_(rOnes); + + L(l_mv_mask); + kmovq(k_load_mask, rOnes); + + vmovdqu8(Vmm(xmm_dst.getIdx()) | k_load_mask | T_z, ptr[r64_src_ptr]); +} +template +void ComputeHash::partial_load(const Xbyak::Xmm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_partial, l_end; + + cmp(r64_load_num, xmm_len); + jl(l_partial, T_NEAR); + uni_vmovdqu64(xmm_dst, ptr[src_addr.getRegExp()]); + jmp(l_end, T_NEAR); + + L(l_partial); + { + uni_vpxorq(xmm_dst, xmm_dst, xmm_dst); + for (size_t j = 0lu; j < xmm_len - 1; j++) { + cmp(r64_load_num, static_cast(j)); + jle(l_end, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + j], static_cast(j)); + } + } + + L(l_end); +} +template <> +void ComputeHash::partial_load(const Xbyak::Ymm& xmm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + partial_load(Xbyak::Xmm(xmm_dst.getIdx()), src_addr, r64_load_num); +} +template +void ComputeHash::partial_load(const Xbyak::Ymm& ymm_dst, + const Xbyak::Address& src_addr, + const Xbyak::Reg64& r64_load_num) { + Xbyak::Label l_xmm, l_partial, l_end; + auto xmm_dst = Xbyak::Xmm(ymm_dst.getIdx()); + + cmp(r64_load_num, ymm_len); + jl(l_xmm, T_NEAR); + uni_vmovdqu64(ymm_dst, ptr[src_addr.getRegExp()]); + jmp(l_end, T_NEAR); + + L(l_xmm); + uni_vpxorq(ymm_dst, ymm_dst, ymm_dst); + cmp(r64_load_num, xmm_len); + jl(l_partial, T_NEAR); + uni_vmovdqu64(xmm_dst, ptr[src_addr.getRegExp()]); + je(l_end, T_NEAR); + + { + Xbyak::Label l_rest_loop, l_perm; + + vperm2i128(ymm_dst, ymm_dst, ymm_dst, 0x1); + for (size_t j = 0lu; j < xmm_len - 1lu; j++) { + cmp(r64_load_num, static_cast(xmm_len + j)); + jle(l_perm, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + xmm_len + j], static_cast(j)); + } + L(l_perm); + vperm2i128(ymm_dst, ymm_dst, ymm_dst, 0x1); + } + jmp(l_end, T_NEAR); + + L(l_partial); + { + for (size_t j = 0lu; j < xmm_len - 1; j++) { + cmp(r64_load_num, static_cast(j)); + jle(l_end, T_NEAR); + pinsrb(xmm_dst, ptr[src_addr.getRegExp() + j], static_cast(j)); + } + } + + L(l_end); +} + +template +void ComputeHash::initialize(const Vmm& v_dst) { + mov(r64_src_ptr, ptr[r64_params + GET_OFF(src_ptr)]); + mov(r64_dst_ptr, ptr[r64_params + GET_OFF(dst_ptr)]); + mov(r64_k_ptr, ptr[r64_params + GET_OFF(k_ptr)]); + mov(r64_work_amount, ptr[r64_params + GET_OFF(work_amount)]); + + uni_vbroadcasti64x2(v_k_2_3, ptr[r64_k_ptr + K_2_3_OFF]); + + mov(r64_aux, reinterpret_cast(SHUF_MASK)); + uni_vbroadcasti64x2(v_shuf_mask, ptr[r64_aux]); + + if (m_jcp.type == SINGLE_THREAD || m_jcp.type == FIRST_THREAD) { + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux = getXmm(); + + // Initial CRC + mov(r64_aux, ptr[r64_params + GET_OFF(size)]); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x0); + mov(r64_aux, CRC_VAL); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x1); + + // First xor with source. + partial_load(v_dst, ptr[r64_src_ptr], r64_work_amount); + vpshufb(v_dst, v_dst, v_shuf_mask); + pxor(xmm_dst, xmm_aux); // The SSE version is used to avoid zeroing out the rest of the Vmm. + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, xmm_len); + } + } else if (m_jcp.type == N_THREAD) { + uni_vmovdqu64(v_dst, ptr[r64_src_ptr]); + vpshufb(v_dst, v_dst, v_shuf_mask); + } + if (m_jcp.type == SINGLE_THREAD || m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + sub(r64_work_amount, xmm_len); + } +} + +template <> +void ComputeHash::bulk_fold(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FIRST_THREAD && m_jcp.type != N_THREAD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + jl(l_end, T_NEAR); + + auto v_src_0 = getVmm(); + auto v_dst_0 = getVmm(); + auto v_dst_1 = getVmm(); + auto v_dst_2 = getVmm(); + auto& v_dst_3 = v_dst; + auto v_k_loop = getVmm(); + auto v_aux_0 = getVmm(); + + auto xmm_src_0 = Xbyak::Xmm(v_src_0.getIdx()); + auto xmm_src_1 = getXmm(); + auto xmm_dst_0 = Xbyak::Xmm(v_dst_0.getIdx()); + auto xmm_dst_1 = Xbyak::Xmm(v_dst_1.getIdx()); + auto xmm_dst_2 = Xbyak::Xmm(v_dst_2.getIdx()); + auto xmm_dst_3 = Xbyak::Xmm(v_dst_3.getIdx()); + auto xmm_k_loop = Xbyak::Xmm(v_k_loop.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_aux_0 = Xbyak::Xmm(v_aux_0.getIdx()); + + RegistersPool::Reg r64_bulk_step; + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + r64_bulk_step = getReg64(); + mov(r64_bulk_step, ptr[r64_params + GET_OFF(threads_num)]); + sal(r64_bulk_step, static_cast(std::log2(get_vlen()))); // * vlen + } + + if (m_jcp.type == SINGLE_THREAD) { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_8_9_OFF]); + } else { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_16_17_OFF]); + } + + uni_vmovdqu64(v_dst_0, v_dst); + + if (!is_vpclmulqdq) { + vextracti64x2(xmm_dst_1, v_dst_0, 0x1); + vextracti64x2(xmm_dst_2, v_dst_0, 0x2); + vextracti64x2(xmm_dst_3, v_dst_0, 0x3); + } + + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + add(r64_src_ptr, r64_bulk_step); + prefetcht2(ptr[r64_src_ptr + 16384]); + } else { + add(r64_src_ptr, static_cast(get_vlen() - xmm_len)); + prefetcht2(ptr[r64_src_ptr + 4096]); + } + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + sub(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + + L(l_fold_loop); + { + uni_vmovdqu64(v_src_0, ptr[r64_src_ptr]); + vpshufb(v_src_0, v_src_0, v_shuf_mask); + + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + add(r64_src_ptr, r64_bulk_step); + prefetcht2(ptr[r64_src_ptr + 16384]); + } else { + add(r64_src_ptr, static_cast(get_vlen())); + prefetcht2(ptr[r64_src_ptr + 4096]); + } + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + if (is_vpclmulqdq) { + vpclmulqdq(v_aux_0, v_dst_0, v_k_loop, 0b00000000); + vpclmulqdq(v_dst_0, v_dst_0, v_k_loop, 0b00010001); + uni_vpxorq(v_aux_0, v_aux_0, v_src_0); + uni_vpxorq(v_dst_0, v_dst_0, v_aux_0); + } else { + // 0 + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_0); + uni_vpxorq(xmm_dst_0, xmm_dst_0, xmm_aux_0); + + // 1 + vextracti64x2(xmm_src_1, v_src_0, 0x1); + vpclmulqdq(xmm_aux_0, xmm_dst_1, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + + // 2 + vextracti64x2(xmm_src_1, v_src_0, 0x2); + vpclmulqdq(xmm_aux_0, xmm_dst_2, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_2, xmm_dst_2, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_2, xmm_dst_2, xmm_aux_0); + + // 3 + vextracti64x2(xmm_src_1, v_src_0, 0x3); + vpclmulqdq(xmm_aux_0, xmm_dst_3, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_3, xmm_dst_3, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + } + + sub(r64_work_amount, static_cast(get_vlen())); + jge(l_fold_loop, T_NEAR); + } + add(r64_work_amount, static_cast(get_vlen())); + + if (m_jcp.type == SINGLE_THREAD) { + if (is_vpclmulqdq) { + vextracti64x2(xmm_dst_1, v_dst_0, 0x1); + vextracti64x2(xmm_dst_2, v_dst_0, 0x2); + vextracti64x2(xmm_dst_3, v_dst_0, 0x3); + } + + vpclmulqdq(xmm_aux_0, xmm_dst_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_0); + + vpclmulqdq(xmm_aux_0, xmm_dst_1, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_1); + + vpclmulqdq(xmm_aux_0, xmm_dst_2, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst_2, xmm_dst_2, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_aux_0); + uni_vpxorq(xmm_dst_3, xmm_dst_3, xmm_dst_2); + } else { + if (is_vpclmulqdq) { + uni_vmovdqu64(ptr[r64_dst_ptr], v_dst_0); + } else { + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 0lu], xmm_dst_0); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 1lu], xmm_dst_1); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 2lu], xmm_dst_2); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 3lu], xmm_dst_3); + } + } + + L(l_end); +} + +template +void ComputeHash::bulk_fold(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FIRST_THREAD && m_jcp.type != N_THREAD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + jl(l_end, T_NEAR); + + auto v_src_0 = getVmm(); + auto v_dst_0 = getVmm(); + auto& v_dst_1 = v_dst; + auto v_aux_0 = getVmm(); + auto v_k_loop = getVmm(); + + auto xmm_src_0 = Xbyak::Xmm(v_src_0.getIdx()); + auto xmm_src_1 = getXmm(); + auto xmm_dst_0 = Xbyak::Xmm(v_dst_0.getIdx()); + auto xmm_dst_1 = Xbyak::Xmm(v_dst_1.getIdx()); + auto xmm_k_loop = Xbyak::Xmm(v_k_loop.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_aux_0 = Xbyak::Xmm(v_aux_0.getIdx()); + + RegistersPool::Reg r64_bulk_step; + if (m_jcp.type == FIRST_THREAD || m_jcp.type == N_THREAD) { + r64_bulk_step = getReg64(); + mov(r64_bulk_step, ptr[r64_params + GET_OFF(threads_num)]); + sal(r64_bulk_step, static_cast(std::log2(get_vlen()))); // * vlen + } + + if (m_jcp.type == SINGLE_THREAD) { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_4_5_OFF]); + } else { + uni_vbroadcasti64x2(v_k_loop, ptr[r64_k_ptr + K_8_9_OFF]); + } + + uni_vmovdqu64(v_dst_0, v_dst); + + if (!is_vpclmulqdq) { + vextracti128(xmm_dst_1, v_dst_0, 0x1); + } + + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, static_cast(get_vlen() - xmm_len)); + } else { + add(r64_src_ptr, r64_bulk_step); + } + prefetcht2(ptr[r64_src_ptr + 4096]); + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + sub(r64_work_amount, static_cast(get_vlen() * 2lu - xmm_len)); + + L(l_fold_loop); + { + uni_vmovdqu64(v_src_0, ptr[r64_src_ptr]); + vpshufb(v_src_0, v_src_0, v_shuf_mask); + + if (m_jcp.type == SINGLE_THREAD) { + add(r64_src_ptr, static_cast(get_vlen())); + } else { + add(r64_src_ptr, r64_bulk_step); + } + prefetcht2(ptr[r64_src_ptr + 4096]); + prefetcht1(ptr[r64_src_ptr + 1024]); + prefetcht0(ptr[r64_src_ptr + 64]); + + if (is_vpclmulqdq) { + vpclmulqdq(v_aux_0, v_dst_0, v_k_loop, 0b00000000); + vpclmulqdq(v_dst_0, v_dst_0, v_k_loop, 0b00010001); + uni_vpxorq(v_aux_0, v_aux_0, v_src_0); + uni_vpxorq(v_dst_0, v_dst_0, v_aux_0); + } else { + // 0 + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_0); + uni_vpxorq(xmm_dst_0, xmm_dst_0, xmm_aux_0); + // 1 + vextracti128(xmm_src_1, v_src_0, 0x1); + vpclmulqdq(xmm_aux_0, xmm_dst_1, xmm_k_loop, 0b00000000); + vpclmulqdq(xmm_dst_1, xmm_dst_1, xmm_k_loop, 0b00010001); + uni_vpxorq(xmm_aux_0, xmm_aux_0, xmm_src_1); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + } + + sub(r64_work_amount, static_cast(get_vlen())); + jge(l_fold_loop, T_NEAR); + } + add(r64_work_amount, static_cast(get_vlen())); + + if (m_jcp.type == SINGLE_THREAD) { + if (is_vpclmulqdq) { + vextracti128(xmm_dst_1, v_dst_0, 0x1); + } + vpclmulqdq(xmm_aux_0, xmm_dst_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst_0, xmm_dst_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_aux_0); + uni_vpxorq(xmm_dst_1, xmm_dst_1, xmm_dst_0); + } else { + if (is_vpclmulqdq) { + uni_vmovdqu64(ptr[r64_dst_ptr], v_dst_0); + } else { + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 0lu], xmm_dst_0); + uni_vmovdqu64(ptr[r64_dst_ptr + xmm_len * 1lu], xmm_dst_1); + } + } + + L(l_end); +} + +template <> +void ComputeHash::join(const Vmm& v_dst) { + if (m_jcp.type != FINAL_FOLD) { + return; + } + + mov(r64_aux, ptr[r64_params + GET_OFF(intermediate_ptr)]); + prefetcht0(ptr[r64_aux + 1024]); + + auto xmm_src_0 = getXmm(); + auto xmm_src_last = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux_0 = getXmm(); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + + uni_vmovdqu64(xmm_src_last, ptr[r64_aux + xmm_len * 7]); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_14_15_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_14_15_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_12_13_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_12_13_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 2lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_10_11_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_10_11_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 3lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_8_9_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_8_9_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 4lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 5lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 6lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); +} + +template +void ComputeHash::join(const Vmm& v_dst) { + if (m_jcp.type != FINAL_FOLD) { + return; + } + + mov(r64_aux, ptr[r64_params + GET_OFF(intermediate_ptr)]); + prefetcht0(ptr[r64_aux + 1024]); + + auto xmm_src_0 = getXmm(); + auto xmm_src_last = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_aux_0 = getXmm(); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + + uni_vmovdqu64(xmm_src_last, ptr[r64_aux + xmm_len * 3]); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 0lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_6_7_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 1lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, ptr[r64_k_ptr + K_4_5_OFF], 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); + + uni_vmovdqu64(xmm_src_0, ptr[r64_aux + xmm_len * 2lu]); + vpclmulqdq(xmm_aux_0, xmm_src_0, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_src_0, xmm_src_0, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_aux_0); + uni_vpxorq(xmm_src_last, xmm_src_last, xmm_src_0); +} + +template +void ComputeHash::fold_to_128(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FINAL_FOLD) { + return; + } + Xbyak::Label l_fold_loop, l_end; + cmp(r64_work_amount, xmm_len); + jl(l_end, T_NEAR); + + auto xmm_src = getXmm(); + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_shuf_mask = Xbyak::Xmm(v_shuf_mask.getIdx()); + auto xmm_aux = getXmm(); + + L(l_fold_loop); + { + uni_vmovdqu64(xmm_src, ptr[r64_src_ptr]); + vpshufb(xmm_src, xmm_src, xmm_shuf_mask); + + vpclmulqdq(xmm_aux, xmm_dst, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst, xmm_dst, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + uni_vpxorq(xmm_dst, xmm_dst, xmm_src); + + add(r64_src_ptr, xmm_len); + sub(r64_work_amount, xmm_len); + cmp(r64_work_amount, xmm_len); + jge(l_fold_loop, T_NEAR); + } + + L(l_end); +} + +template +void ComputeHash::fold_to_64(const Vmm& v_dst) { + if (m_jcp.type != SINGLE_THREAD && m_jcp.type != FINAL_FOLD) { + return; + } + Xbyak::Label l_fold_to_64; + cmp(r64_work_amount, 0); + jle(l_fold_to_64, T_NEAR); + + auto xmm_src = getXmm(); + auto xmm_dst = Xbyak::Xmm(v_dst.getIdx()); + auto xmm_k_2_3 = Xbyak::Xmm(v_k_2_3.getIdx()); + auto xmm_shuf_mask = Xbyak::Xmm(v_shuf_mask.getIdx()); + auto xmm_aux = getXmm(); + auto xmm_aux_1 = getXmm(); + auto xmm_aux_2 = getXmm(); + + partial_load(xmm_src, ptr[r64_src_ptr], r64_work_amount); + vpshufb(xmm_src, xmm_src, xmm_shuf_mask); + + vpclmulqdq(xmm_aux, xmm_dst, xmm_k_2_3, 0b00000000); + vpclmulqdq(xmm_dst, xmm_dst, xmm_k_2_3, 0b00010001); + uni_vpxorq(xmm_aux, xmm_aux, xmm_src); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + L(l_fold_to_64); + + mov(r64_aux, K_2); + vpinsrq(xmm_aux, xmm_aux, r64_aux, 0x0); + vpclmulqdq(xmm_aux, xmm_dst, xmm_aux, 0b00000001); + vpslldq(xmm_dst, xmm_dst, 0x8); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + mov(r64_aux, P_1); + vpinsrq(xmm_aux_2, xmm_aux_2, r64_aux, 0x0); + vpclmulqdq(xmm_aux, xmm_dst, xmm_aux_2, 0b00000001); + mov(r64_aux, 0x0); + vpinsrq(xmm_aux_1, xmm_dst, r64_aux, 0x0); + uni_vpxorq(xmm_aux, xmm_aux, xmm_aux_1); + vpinsrq(xmm_aux_1, xmm_aux, r64_aux, 0x0); + + mov(r64_aux, P_2); + vpinsrq(xmm_aux_2, xmm_aux_2, r64_aux, 0x1); + vpclmulqdq(xmm_aux, xmm_aux, xmm_aux_2, 0b00010001); + uni_vpxorq(xmm_aux, xmm_aux, xmm_aux_1); + uni_vpxorq(xmm_dst, xmm_dst, xmm_aux); + + vpextrq(ptr[r64_dst_ptr], xmm_dst, 0x0); +} + +} // namespace jit +#endif // OV_CORE_USE_XBYAK_JIT + +size_t compute_hash(const void* src, size_t size) { +#ifdef OV_CORE_USE_XBYAK_JIT + if (Generator::mayiuse(avx2)) { + uint64_t result = 0lu; + + // Parallel section + constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17 + const uint64_t size_u64 = static_cast(size); + if (size_u64 >= min_wa_per_thread * 2lu) { + static auto first_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FIRST_THREAD}) + : jit::ComputeHash::create({jit::FIRST_THREAD}); + static auto n_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::N_THREAD}) + : jit::ComputeHash::create({jit::N_THREAD}); + static auto final_fold_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FINAL_FOLD}) + : jit::ComputeHash::create({jit::FINAL_FOLD}); + + static const uint64_t max_thr_num = 2lu; + uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num); + const uint64_t el_per_thread = + first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen()); + std::vector intermediate(thr_num * first_thr_kernel->get_vlen()); + + parallel_nt_static(static_cast(thr_num), [&](const int ithr, const int nthr) { + uint64_t start = el_per_thread * ithr; + if (start >= size_u64) { + return; + } + uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread; + + jit::ComputeHashCallArgs args; + + args.src_ptr = reinterpret_cast(src) + first_thr_kernel->get_vlen() * ithr; + args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]); + args.k_ptr = jit::K_PULL; + args.work_amount = work_amount; + args.size = size_u64; + args.threads_num = thr_num; + + if (ithr == 0) { + (*first_thr_kernel)(&args); + } else { + (*n_thr_kernel)(&args); + } + }); + + jit::ComputeHashCallArgs args; + args.work_amount = size_u64 - el_per_thread * thr_num; + args.src_ptr = reinterpret_cast(src) + size_u64 - args.work_amount; + args.dst_ptr = &result; + args.k_ptr = jit::K_PULL; + args.size = size_u64; + args.intermediate_ptr = intermediate.data(); + + (*final_fold_kernel)(&args); + } else { + static auto single_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::SINGLE_THREAD}) + : jit::ComputeHash::create({jit::SINGLE_THREAD}); + + jit::ComputeHashCallArgs args; + args.src_ptr = src; + args.dst_ptr = &result; + args.k_ptr = jit::K_PULL; + args.work_amount = size_u64; + args.size = size_u64; + + (*single_thr_kernel)(&args); + } + + return result; + } + +#endif // OV_CORE_USE_XBYAK_JIT + + constexpr auto cel_size = sizeof(size_t); + size_t seed = size; + const auto data = static_cast(src); + const auto d_end = std::next(data, size / cel_size); + // The constant value used as a magic number has been + // traditionally used e.g. in boost library's hash_combine. + // It happens to be derived from the golden ratio. + for (auto d = data; d != d_end; ++d) { + seed ^= *d + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + size_t last_bytes{0}; + std::memcpy(&last_bytes, d_end, size % cel_size); + seed ^= last_bytes + 0x9e3779b9 + (seed << 6) + (seed >> 2); + + return seed; +} + +} // namespace runtime +} // namespace ov From 8a33df72760f075bb277161f3b5b2ad3768963bf Mon Sep 17 00:00:00 2001 From: "Anastasiya(Asya) Pronina" Date: Mon, 21 Oct 2024 22:34:31 +0100 Subject: [PATCH 097/112] Added i8 for DQMatMulCwi (#27112) ### Details: - *Added i8 for DQMatMulCwi* ### Tickets: - *N/A* --------- Co-authored-by: Dmitry Matveev --- .../intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index 077fb6d6660132..ddf1449adb9d59 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -149,8 +149,9 @@ DQMatMulCWi::DQMatMulCWi() { auto qcoeff_shape = matched_qcoeff->output(0).get_shape(); - if (ov::element::i4 == matched_qweight->get_element_type() && qcoeff_shape[1] == 1 && - !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) { + if ((ov::element::i4 == matched_qweight->get_element_type() || + ov::element::i8 == matched_qweight->get_element_type()) && + qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) { auto matched_node_cvtw = node_to_output.at(qcvtw).get_node_shared_ptr(); auto matched_node_cvtm = node_to_output.at(qcvtm).get_node_shared_ptr(); auto matched_node_muls = node_to_output.at(qmuls).get_node_shared_ptr(); From de0dc7199945e132142cfd7bbd72f0755799eb06 Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Tue, 22 Oct 2024 09:02:08 +0400 Subject: [PATCH 098/112] [ONNX] Direct loading from ModelProto object (#27124) ### Details: - Direct loading from ModelProto object ### Tickets: - 155265 --- src/frontends/onnx/frontend/src/editor.cpp | 8 +++ src/frontends/onnx/frontend/src/editor.hpp | 9 +++ src/frontends/onnx/frontend/src/frontend.cpp | 31 ++++++++- .../onnx/frontend/src/input_model.cpp | 3 + .../onnx/frontend/src/input_model.hpp | 3 + src/frontends/onnx/tests/CMakeLists.txt | 5 +- src/frontends/onnx/tests/load_from.cpp | 66 +++++++++++++++++++ 7 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/frontends/onnx/frontend/src/editor.cpp b/src/frontends/onnx/frontend/src/editor.cpp index eaa7b31a61c03f..4ad576cd9d5b96 100644 --- a/src/frontends/onnx/frontend/src/editor.cpp +++ b/src/frontends/onnx/frontend/src/editor.cpp @@ -343,6 +343,14 @@ ONNXModelEditor::ONNXModelEditor(std::istream& model_stream, delete impl; }} {} +ONNXModelEditor::ONNXModelEditor(std::shared_ptr model_proto, frontend::ExtensionHolder extensions) + : m_model_path{""}, + m_mmap_cache{nullptr}, + m_extensions{std::move(extensions)}, + m_pimpl{new ONNXModelEditor::Impl{model_proto}, [](Impl* impl) { + delete impl; + }} {} + const std::string& ONNXModelEditor::model_path() const { return m_model_path; } diff --git a/src/frontends/onnx/frontend/src/editor.hpp b/src/frontends/onnx/frontend/src/editor.hpp index 81d2527c88b9cf..5c7619ed87dbf2 100644 --- a/src/frontends/onnx/frontend/src/editor.hpp +++ b/src/frontends/onnx/frontend/src/editor.hpp @@ -16,6 +16,8 @@ #include "openvino/op/constant.hpp" #include "utils/tensor_external_data.hpp" +using ::ONNX_NAMESPACE::ModelProto; + namespace ov { namespace frontend { namespace onnx { @@ -54,6 +56,13 @@ class ONNXModelEditor final { const bool enable_mmap = false, frontend::ExtensionHolder extensions = {}); + /// \brief Creates an editor from a ModelProto. The model_proto is + /// stored in m_model_proto member variable. + /// + /// \param model_proto A shared pointer on ModelProto object. + /// \param extensions Holder for custom extensions (like custom ops). + ONNXModelEditor(std::shared_ptr model_proto, frontend::ExtensionHolder extensions = {}); + /// \brief Modifies the in-memory representation of the model by setting /// custom input types for all inputs specified in the provided map. /// diff --git a/src/frontends/onnx/frontend/src/frontend.cpp b/src/frontends/onnx/frontend/src/frontend.cpp index d4b83fee20db82..8afc9b661ec28d 100644 --- a/src/frontends/onnx/frontend/src/frontend.cpp +++ b/src/frontends/onnx/frontend/src/frontend.cpp @@ -32,6 +32,8 @@ using namespace ov; using namespace ov::frontend::onnx; using namespace ov::frontend::onnx::common; +using ::ONNX_NAMESPACE::ModelProto; +using ::ONNX_NAMESPACE::Version; ONNX_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; @@ -83,6 +85,17 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector& variants) const #endif return std::make_shared(*stream, enable_mmap, m_extensions); } + // !!! Experimental feature, it may be changed or removed in the future !!! + if (variants[0].is()) { + void* model_proto_addr = reinterpret_cast(variants[0].as()); + FRONT_END_GENERAL_CHECK(model_proto_addr != 0, "Wrong address of a ModelProto object is passed"); + ModelProto* model_proto_ptr = static_cast(model_proto_addr); + FRONT_END_GENERAL_CHECK( + model_proto_ptr->has_ir_version() && model_proto_ptr->ir_version() < Version::IR_VERSION, + "A ModelProto object contains unsupported IR version"); + return std::make_shared(std::make_shared(*model_proto_ptr), m_extensions); + } + // !!! End of Experimental feature return nullptr; } @@ -213,7 +226,23 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { StreamRewinder rwd{*stream}; return is_valid_model(*stream); } - + // !!! Experimental feature, it may be changed or removed in the future !!! + if (variants[0].is()) { + void* model_proto_addr = reinterpret_cast(variants[0].as()); + if (model_proto_addr == 0) { + return false; + } + ModelProto* model_proto_ptr = static_cast(model_proto_addr); + try { + if (!model_proto_ptr->has_ir_version() || model_proto_ptr->ir_version() > Version::IR_VERSION) { + return false; + } + } catch (...) { + return false; + } + return true; + } + // !!! End of Experimental feature return false; } diff --git a/src/frontends/onnx/frontend/src/input_model.cpp b/src/frontends/onnx/frontend/src/input_model.cpp index 108690a6d645d9..87f1439eb18b38 100644 --- a/src/frontends/onnx/frontend/src/input_model.cpp +++ b/src/frontends/onnx/frontend/src/input_model.cpp @@ -37,6 +37,9 @@ InputModel::InputModel(std::istream& model_stream, : InputModel(model_stream, ov::util::wstring_to_string(path), enable_mmap, std::move(extensions)) {} #endif +InputModel::InputModel(std::shared_ptr model_proto, frontend::ExtensionHolder extensions) + : m_editor{std::make_shared(model_proto, std::move(extensions))} {} + std::vector InputModel::get_inputs() const { const auto& inputs = m_editor->model_inputs(); std::vector in_places; diff --git a/src/frontends/onnx/frontend/src/input_model.hpp b/src/frontends/onnx/frontend/src/input_model.hpp index 9bf44a5672fb28..246696621f1fd4 100644 --- a/src/frontends/onnx/frontend/src/input_model.hpp +++ b/src/frontends/onnx/frontend/src/input_model.hpp @@ -10,6 +10,8 @@ #include "openvino/frontend/extension/holder.hpp" +using ::ONNX_NAMESPACE::ModelProto; + namespace ov { namespace frontend { namespace onnx { @@ -33,6 +35,7 @@ class InputModel : public ov::frontend::InputModel { const bool enable_mmap = false, ExtensionHolder extensions = {}); #endif + InputModel(std::shared_ptr model_proto, ExtensionHolder extensions = {}); std::vector get_inputs() const override; std::vector get_outputs() const override; diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index 599c7c43b05395..9b928773b7d65a 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -182,8 +182,11 @@ add_custom_command(TARGET ov_onnx_frontend_tests POST_BUILD ${custom_commands} COMMENT "Copy test manifest files to ${TEST_MODEL_ZOO}/onnx") -# process models +# Process models add_dependencies(ov_onnx_frontend_tests test_model_zoo) +# Working with ModelProto +ov_link_system_libraries(ov_onnx_frontend_tests PUBLIC onnx_proto onnx) + add_subdirectory(standalone_build) add_dependencies(ov_onnx_frontend_tests onnx_fe_standalone_build_test) diff --git a/src/frontends/onnx/tests/load_from.cpp b/src/frontends/onnx/tests/load_from.cpp index 617f4a917567d5..547937ac52171f 100644 --- a/src/frontends/onnx/tests/load_from.cpp +++ b/src/frontends/onnx/tests/load_from.cpp @@ -4,6 +4,7 @@ #include "load_from.hpp" #include +#include #include @@ -61,3 +62,68 @@ INSTANTIATE_TEST_SUITE_P(ONNXLoadTest, FrontEndLoadFromTest, ::testing::Values(getTestData()), FrontEndLoadFromTest::getTestCaseName); + +// !!! Experimental feature, it may be changed or removed in the future !!! +using ::ONNX_NAMESPACE::ModelProto; +using ::ONNX_NAMESPACE::Version; + +TEST_P(FrontEndLoadFromTest, testLoadFromModelProtoUint64) { + const auto path = + ov::util::path_join({ov::test::utils::getExecutableDirectory(), TEST_ONNX_MODELS_DIRNAME, "abs.onnx"}); + std::ifstream ifs(path, std::ios::in | std::ios::binary); + ASSERT_TRUE(ifs.is_open()) << "Could not open an ifstream for the model path: " << path; + std::vector frontends; + FrontEnd::Ptr fe; + + { + auto model_proto = std::make_shared(); + ASSERT_TRUE(model_proto->ParseFromIstream(&ifs)) << "Could not parse ModelProto from file: " << path; + + uint64_t model_proto_ptr = reinterpret_cast(model_proto.get()); + + ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_model(model_proto_ptr)) + << "Could not create the ONNX FE using a pointer on ModelProto object as uint64_t"; + ASSERT_NE(m_frontEnd, nullptr); + ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(model_proto_ptr)) << "Could not load the model"; + ASSERT_NE(m_inputModel, nullptr); + } + + std::shared_ptr model; + ASSERT_NO_THROW(model = m_frontEnd->convert(m_inputModel)) << "Could not convert the model to OV representation"; + ASSERT_NE(model, nullptr); + + ASSERT_TRUE(model->get_ordered_ops().size() > 0); +} + +TEST_P(FrontEndLoadFromTest, testLoadFromModelProtoUint64_Negative) { + const auto path = + ov::util::path_join({ov::test::utils::getExecutableDirectory(), TEST_ONNX_MODELS_DIRNAME, "abs.onnx"}); + std::ifstream ifs(path, std::ios::in | std::ios::binary); + ASSERT_TRUE(ifs.is_open()) << "Could not open an ifstream for the model path: " << path; + std::vector frontends; + FrontEnd::Ptr fe; + + auto model_proto = std::make_shared(); + ASSERT_TRUE(model_proto->ParseFromIstream(&ifs)) << "Could not parse ModelProto from file: " << path; + + uint64_t model_proto_ptr = reinterpret_cast(model_proto.get()); + + ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_model(model_proto_ptr)) + << "Could not create the ONNX FE using a pointer on ModelProto object as uint64_t"; + ASSERT_NE(m_frontEnd, nullptr); + // Should say unsupported if an address is 0 + ASSERT_FALSE(m_frontEnd->supported(static_cast(0))); + // Should throw an ov::Exception if address is 0 + OV_EXPECT_THROW(m_inputModel = m_frontEnd->load(static_cast(0)), + ov::Exception, + testing::HasSubstr("Wrong address")); + + model_proto->set_ir_version(Version::IR_VERSION + 1); + // Should say unsupported if ModelProto has IR_VERSION higher than supported + ASSERT_FALSE(m_frontEnd->supported(model_proto_ptr)); + // Should throw an ov::Exception if address is 0 + OV_EXPECT_THROW(m_inputModel = m_frontEnd->load(model_proto_ptr), + ov::Exception, + testing::HasSubstr("unsupported IR version")); +} +// !!! End of Experimental feature !!! From d6dc4952c9c7f1a53dd2f0f64fd2f24e0b8ceb16 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 22 Oct 2024 09:13:11 +0400 Subject: [PATCH 099/112] [PT FE] Fix aten.index.Tensor for indices with None values (#27122) **Details:** Support Stable Diffusion in ExportedProgram format **Ticket:** 149983 --------- Signed-off-by: Kazantsev, Roman --- src/frontends/pytorch/src/op/index.cpp | 200 +----------------- .../src/transforms/aten_index_replacer.cpp | 182 +--------------- src/frontends/pytorch/src/utils.cpp | 193 +++++++++++++++++ src/frontends/pytorch/src/utils.hpp | 9 + .../pytorch_tests/test_index_tensor.py | 49 +++++ .../pytorch_tests/test_upsample.py | 3 + 6 files changed, 272 insertions(+), 364 deletions(-) create mode 100644 tests/layer_tests/pytorch_tests/test_index_tensor.py diff --git a/src/frontends/pytorch/src/op/index.cpp b/src/frontends/pytorch/src/op/index.cpp index a1e286cad93adc..880e0acee0f983 100644 --- a/src/frontends/pytorch/src/op/index.cpp +++ b/src/frontends/pytorch/src/op/index.cpp @@ -26,191 +26,6 @@ namespace op { using namespace ov::op; -namespace { -Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { - // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of - // input tensor. The last dimension is the product of the rest of input tensor dimensions: - // [d_{axis}, ..., d_n] - Output output_shape; - if (axis == 0) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); - } else if (axis == 1) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); - } else { - const auto value_shape = rg.make(value, element::i32); - const auto value_rank = rg.make(value_shape, element::i32); - const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); - auto start = v0::Constant::create(element::i32, Shape{1}, {0}); - auto step = v0::Constant::create(element::i32, Shape{1}, {1}); - const auto first_part_dims = rg.make(value_shape, start, axis_node, step); - auto zero = v0::Constant::create(element::i32, {}, {0}); - auto first_part_dims_length = rg.make(first_part_dims, zero, true); - - auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); - - output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); - } - return rg.make(value, output_shape, true); -} - -OutputVector index_on_list(ov::pass::NodeRegistry& rg, - const Output& data, - std::deque> ids, - int64_t rank) { - // Multiple tensors as indices. Each tensor could either be - // 1. prim::Constant() - // representing ":" in python indexing. E.g. tensor[:, :] - // 2. prim::Constant[value=...] or tensor output - // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. - // For more info on advanced indexing, - // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing - - // Consider a general case of - // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] - // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for - // ":". Same results can be achieved through transposing t into - // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] - // and use gather - // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] - // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) - // After gather, reshape and transpose back. - std::vector advanced_ids; - std::vector is_masked_bool; - OutputVector masked_indicies; - // for case when index is bool e.g. x[x>0], replace index with non_zero - for (size_t i = 0; i < ids.size(); i++) { - // skip dimensions where index is None - bool is_none = false; - if (!ids[i].get_node_shared_ptr()) { - is_none = true; - } - if (auto const_input = cast_fw_node(ids[i].get_node_shared_ptr(), "prim::Constant")) { - const auto& attrs = const_input->get_attrs(); - if (attrs.find("none_value") != attrs.end()) { - is_none = true; - } - } - if (is_none) { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - continue; - } - auto id_dtype = ids[i].get_element_type(); - if (id_dtype == element::boolean || id_dtype == element::u8) { - auto idx = rg.make(ids[i], element::u8); - auto nonzero = rg.make(idx, element::i32); - auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0}); - auto masked_id = rg.make(nonzero, input_order); - masked_indicies.push_back(masked_id); - is_masked_bool.push_back(true); - } else { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - } - advanced_ids.push_back(i); - } - - // all indicies prim::Constant(None), return input as is - if (advanced_ids.size() == 0) { - return {data}; - } - // perform gather for single element case - if (advanced_ids.size() == 1) { - auto index = masked_indicies[advanced_ids[0]]; - if (is_masked_bool[advanced_ids[0]]) { - auto gather = rg.make(data, index); - return {gather}; - } - index = rg.make(index, element::i32); - auto dim = v0::Constant::create(element::i32, Shape{}, {advanced_ids[0]}); - auto gather = rg.make(data, index, dim); - return {gather}; - } - auto adv_idx_count = advanced_ids.size(); - auto input_shape = rg.make(data, element::i32); - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - auto input_dims = rg.make(input_shape, zero, rank); - std::vector non_used_dims; - for (auto i = 0; i < rank; i++) { - if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { - non_used_dims.push_back(i); - } - } - std::vector permutation_dims; - permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); - permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); - auto transpose_dims = v0::Constant::create(element::i32, Shape{permutation_dims.size()}, permutation_dims); - auto transposed_input = rg.make(data, transpose_dims); - auto flatten_input = flatten(rg, transposed_input, adv_idx_count); - auto cum_adv_index = masked_indicies[advanced_ids.back()]; - cum_adv_index = rg.make(cum_adv_index, element::i32); - auto multiplier = input_dims->output(advanced_ids.back()); - for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { - auto input_id = advanced_ids[i]; - auto m_idx = rg.make(masked_indicies[input_id], element::i32); - auto adv_index = rg.make(m_idx, multiplier); - cum_adv_index = rg.make(cum_adv_index, adv_index); - multiplier = rg.make(multiplier, input_dims->output(input_id)); - } - std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); - OutputVector concat_dims; - // check if all advanced indices are consecutive. - std::vector consequence_dims; - auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); - for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.back()]; i++) { - consequence_dims.push_back(i); - } - // unfold regular index axes - if (advanced_ids == consequence_dims) { - OutputVector folded_adv_idx_shape_vector; - auto minus_one = v0::Constant::create(element::i32, Shape{1}, {-1}); - folded_adv_idx_shape_vector.push_back(minus_one); - for (auto i : non_used_dims) { - folded_adv_idx_shape_vector.push_back(input_dims->output(i)); - } - auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); - gather = rg.make(gather, folded_adv_idx_shape, false); - std::vector adv_idx_permute; - for (size_t i = 1; i < advanced_ids[0] + 1; i++) { - adv_idx_permute.push_back(i); - } - adv_idx_permute.push_back(0); - for (size_t i = advanced_ids[0] + 1; i < (rank - adv_idx_count + 1); i++) { - adv_idx_permute.push_back(i); - } - // Transpose folded advanced indexed axis to its original location. - auto permute_indicies = v0::Constant::create(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); - gather = rg.make(gather, permute_indicies); - // unfold advanced index axes - for (size_t i = 0; i < advanced_ids[0]; i++) { - concat_dims.push_back(input_dims->output(i)); - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - if (i < advanced_ids[0]) { - continue; - } - concat_dims.push_back(input_dims->output(i)); - } - - } else { - size_t i = 0; - auto one = v0::Constant::create(element::i32, Shape{1}, {1}); - while (i < non_used_dims.size() && non_used_dims[i] < advanced_ids[0]) { - concat_dims.push_back(one); - i++; - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (; i < non_used_dims.size(); i++) { - concat_dims.push_back(input_dims->output(non_used_dims[i])); - } - } - auto final_shape = rg.make(concat_dims, 0); - gather = rg.make(gather, final_shape, false); - return {gather}; -} -} // namespace - OutputVector translate_index(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); @@ -225,9 +40,12 @@ OutputVector translate_index(const NodeContext& context) { auto rank = x.get_partial_shape().rank(); // index transformation supports only tensors with static rank PYTORCH_OP_CONVERSION_CHECK(rank.is_static(), "Dynamic rank for aten::index input is not supported."); - auto res = index_on_list(rg, x, list_elems, rank.get_length()); + OutputVector ids{list_elems.begin(), list_elems.end()}; + ov::Output res; + bool use_input_as_output = true; + index_tensor_on_list(rg, x, ids, rank.get_length(), res, use_input_as_output); context.mark_nodes(rg.get()); - return res; + return {res}; } auto index_ov_type = indices.get_element_type(); if (index_ov_type.is_dynamic()) { @@ -267,9 +85,13 @@ OutputVector translate_index_fx(const NodeContext& context) { } // index transformation supports only tensors with static rank PYTORCH_OP_CONVERSION_CHECK(rank.is_static(), "Dynamic rank for aten::index input is not supported."); - auto res = index_on_list(rg, x, list_elems, rank.get_length()); + + OutputVector ids{list_elems.begin(), list_elems.end()}; + ov::Output res; + bool use_input_as_output = true; + index_tensor_on_list(rg, x, ids, rank, res, use_input_as_output); context.mark_nodes(rg.get()); - return res; + return {res}; }; } // namespace op diff --git a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp index 39a9bc710ca08d..9294409a565691 100644 --- a/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_index_replacer.cpp @@ -34,34 +34,6 @@ namespace pass { using namespace ov::op; -namespace { -Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { - // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of - // input tensor. The last dimension is the product of the rest of input tensor dimensions: - // [d_{axis}, ..., d_n] - Output output_shape; - if (axis == 0) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); - } else if (axis == 1) { - output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); - } else { - const auto value_shape = rg.make(value, element::i32); - const auto value_rank = rg.make(value_shape, element::i32); - const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); - auto start = v0::Constant::create(element::i32, Shape{1}, {0}); - auto step = v0::Constant::create(element::i32, Shape{1}, {1}); - const auto first_part_dims = rg.make(value_shape, start, axis_node, step); - auto zero = v0::Constant::create(element::i32, {}, {0}); - auto first_part_dims_length = rg.make(first_part_dims, zero, true); - - auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); - - output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); - } - return rg.make(value, output_shape, true); -} -}; // namespace - AtenIndexToSelect::AtenIndexToSelect() { auto index_op = ov::pass::pattern::wrap_type(); @@ -75,162 +47,22 @@ AtenIndexToSelect::AtenIndexToSelect() { auto indicies = index_op->input_value(1).get_node_shared_ptr(); auto list_indicies = cast_fw_node(indicies, "prim::ListConstruct"); if (list_indicies) { - // Multiple tensors as indices. Each tensor could either be - // 1. prim::Constant() - // representing ":" in python indexing. E.g. tensor[:, :] - // 2. prim::Constant[value=...] or tensor output - // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. - // For more info on advanced indexing, - // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing - - // Consider a general case of - // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] - // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for - // ":". Same results can be achieved through transposing t into - // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] - // and use gather - // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] - // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) - // After gather, reshape and transpose back. auto ids = list_indicies->input_values(); - std::vector advanced_ids; - std::vector is_masked_bool; - OutputVector masked_indicies; - // for case when index is bool e.g. x[x>0], replace index with non_zero - for (size_t i = 0; i < ids.size(); i++) { - auto const_input = cast_fw_node(ids[i].get_node_shared_ptr(), "prim::Constant"); - - // skip dimensions where index is None - if (const_input) { - const auto& attrs = const_input->get_attrs(); - if (attrs.find("none_value") != attrs.end()) { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - continue; - } - } - auto id_dtype = ids[i].get_element_type(); - if (id_dtype == element::boolean || id_dtype == element::u8) { - auto idx = rg.make(ids[i], element::u8); - auto nonzero = rg.make(idx, element::i32); - auto input_order = v0::Constant::create(element::i32, Shape{2}, {1, 0}); - auto masked_id = rg.make(nonzero, input_order); - masked_indicies.push_back(masked_id); - is_masked_bool.push_back(true); - } else { - masked_indicies.push_back(ids[i]); - is_masked_bool.push_back(false); - } - advanced_ids.push_back(i); - } - - // all indicies prim::Constant(None), return input as is - if (advanced_ids.size() == 0) { - index_op->output(0).replace(index_op->get_input_source_output(0)); - return true; - } - // perform gather for single element case - if (advanced_ids.size() == 1) { - auto index = masked_indicies[advanced_ids[0]]; - if (is_masked_bool[advanced_ids[0]]) { - auto gather = rg.make(input_node, index); - copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); - return true; - } - index = rg.make(index, element::i32); - auto dim = v0::Constant::create(element::i32, Shape{}, {advanced_ids[0]}); - auto gather = rg.make(input_node, index, dim); - copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); - return true; - } - auto adv_idx_count = advanced_ids.size(); auto rank = input_node.get_partial_shape().rank(); // index transformation supports only tensors with static rank - if (rank.is_dynamic()) { + ov::Output new_output; + bool use_input_as_output = true; + if (!index_tensor_on_list(rg, input_node, ids, rank, new_output, use_input_as_output)) { add_exception_to_fw_node(index_op, "aten::index: dynamic rank for aten::index input is not supported."); return false; } - auto input_shape = rg.make(input_node, element::i32); - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - auto input_dims = rg.make(input_shape, zero, rank.get_length()); - std::vector non_used_dims; - for (auto i = 0; i < rank.get_length(); i++) { - if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { - non_used_dims.push_back(i); - } - } - std::vector permutation_dims; - permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); - permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); - auto transpose_dims = v0::Constant::create(element::i32, Shape{permutation_dims.size()}, permutation_dims); - auto transposed_input = rg.make(input_node, transpose_dims); - auto flatten_input = flatten(rg, transposed_input, adv_idx_count); - auto cum_adv_index = masked_indicies[advanced_ids[adv_idx_count - 1]]; - cum_adv_index = rg.make(cum_adv_index, element::i32); - auto multiplier = input_dims->output(advanced_ids[adv_idx_count - 1]); - for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { - auto input_id = advanced_ids[i]; - auto m_idx = rg.make(masked_indicies[input_id], element::i32); - auto adv_index = rg.make(m_idx, multiplier); - cum_adv_index = rg.make(cum_adv_index, adv_index); - multiplier = rg.make(multiplier, input_dims->output(input_id)); - } - std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); - OutputVector concat_dims; - // check if all advanced indices are consecutive. - std::vector consequence_dims; - auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); - for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.size() - 1]; i++) { - consequence_dims.push_back(i); - } - // unfold regular index axes - if (advanced_ids == consequence_dims) { - OutputVector folded_adv_idx_shape_vector; - auto minus_one = v0::Constant::create(element::i32, Shape{1}, {-1}); - folded_adv_idx_shape_vector.push_back(minus_one); - for (auto i : non_used_dims) { - folded_adv_idx_shape_vector.push_back(input_dims->output(i)); - } - auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); - gather = rg.make(gather, folded_adv_idx_shape, false); - std::vector adv_idx_permute; - for (size_t i = 1; i < advanced_ids[0] + 1; i++) { - adv_idx_permute.push_back(i); - } - adv_idx_permute.push_back(0); - for (size_t i = advanced_ids[0] + 1; i < (rank.get_length() - adv_idx_count + 1); i++) { - adv_idx_permute.push_back(i); - } - // Transpose folded advanced indexed axis to its original location. - auto permute_indicies = - v0::Constant::create(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); - gather = rg.make(gather, permute_indicies); - // unfold advanced index axes - for (size_t i = 0; i < advanced_ids[0]; i++) { - concat_dims.push_back(input_dims->output(i)); - } - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - if (i < advanced_ids[0]) { - continue; - } - concat_dims.push_back(input_dims->output(i)); - } - - } else { - concat_dims.push_back(cum_adv_index_shape_tensor); - for (auto i : non_used_dims) { - concat_dims.push_back(input_dims->output(i)); - } + if (use_input_as_output) { + index_op->output(0).replace(index_op->get_input_source_output(0)); + return true; } - auto final_shape = rg.make(concat_dims, 0); - gather = rg.make(gather, final_shape, false); copy_runtime_info_and_name(index_op, rg.get()); - replace_node(index_op, gather); + replace_node(index_op, new_output.get_node_shared_ptr()); return true; - } else { auto const_input = cast_fw_node(indicies, "prim::Constant"); diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index 852de6e90fa25b..752b9accb71d01 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -17,6 +17,7 @@ #include "openvino/op/gather.hpp" #include "openvino/op/gather_nd.hpp" #include "openvino/op/mod.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/non_zero.hpp" #include "openvino/op/range.hpp" #include "openvino/op/reduce_prod.hpp" @@ -24,6 +25,7 @@ #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" +#include "openvino/op/split.hpp" #include "openvino/op/squeeze.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" @@ -664,6 +666,197 @@ Output masked_select(const NodeContext& context, const Output& data, return context.mark_node(std::make_shared(data, masked_id)); } +Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis) { + // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of + // input tensor. The last dimension is the product of the rest of input tensor dimensions: + // [d_{axis}, ..., d_n] + Output output_shape; + if (axis == 0) { + output_shape = v0::Constant::create(element::i32, Shape{2}, {1, -1}); + } else if (axis == 1) { + output_shape = v0::Constant::create(element::i32, Shape{2}, {0, -1}); + } else { + const auto value_shape = rg.make(value, element::i32); + const auto value_rank = rg.make(value_shape, element::i32); + const auto axis_node = v0::Constant::create(element::i32, Shape{1}, {axis}); + auto start = v0::Constant::create(element::i32, Shape{1}, {0}); + auto step = v0::Constant::create(element::i32, Shape{1}, {1}); + const auto first_part_dims = rg.make(value_shape, start, axis_node, step); + auto zero = v0::Constant::create(element::i32, {}, {0}); + auto first_part_dims_length = rg.make(first_part_dims, zero, true); + + auto remaining_part_length = v0::Constant::create(element::i32, {1}, {-1}); + + output_shape = rg.make(OutputVector{first_part_dims_length, remaining_part_length}, 0); + } + return rg.make(value, output_shape, true); +} + +bool index_tensor_on_list(ov::pass::NodeRegistry& rg, + const Output& data, + const ov::OutputVector& indices, + const ov::Rank& rank, + Output& new_output, + bool& use_input_as_output) { + // Multiple tensors as indices. Each tensor could either be + // 1. prim::Constant() + // representing ":" in python indexing. E.g. tensor[:, :] + // 2. prim::Constant[value=...] or tensor output + // representing advanced indexing. E.g. tensor[[0, 1], [2, 0]]. + // For more info on advanced indexing, + // check https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing + + // Consider a general case of + // t: [x_1, y_1, y_2, ..., x_m, ..., y_n] + // where t is a tensor of rank m+n, {x_i} are axes where tensor index is provided, and {y_i} are axes for + // ":". Same results can be achieved through transposing t into + // t: [x_1, x_2, ..., x_m, y_1, y_2, ..., y_n] + // and use gather + // t: [x_1 * x_2 * ... * x_m, y_1 * y_2 * ... * y_n] + // tensor index = \sum_{i=1}^m (ind_i * \prod_{j=i+1}^m (x_j)) + // After gather, reshape and transpose back. + std::vector advanced_ids; + std::vector is_masked_bool; + OutputVector masked_indicies; + // for case when index is bool e.g. x[x>0], replace index with non_zero + for (size_t i = 0; i < indices.size(); ++i) { + // skip dimensions where index is None + bool is_none = false; + if (!indices[i].get_node_shared_ptr()) { + is_none = true; + } + if (auto const_input = cast_fw_node(indices[i].get_node_shared_ptr(), "prim::Constant")) { + const auto& attrs = const_input->get_attrs(); + if (attrs.find("none_value") != attrs.end()) { + is_none = true; + } + } + if (is_none) { + masked_indicies.push_back(indices[i]); + is_masked_bool.push_back(false); + continue; + } + auto id_dtype = indices[i].get_element_type(); + if (id_dtype == element::boolean || id_dtype == element::u8) { + auto idx = rg.make(indices[i], element::u8); + auto nonzero = rg.make(idx, element::i32); + auto input_order = rg.make(element::i32, Shape{2}, std::vector{1, 0}); + auto masked_id = rg.make(nonzero, input_order); + masked_indicies.push_back(masked_id); + is_masked_bool.push_back(true); + } else { + masked_indicies.push_back(indices[i]); + is_masked_bool.push_back(false); + } + advanced_ids.push_back(i); + } + + // all indicies prim::Constant(None), return input as is + if (advanced_ids.size() == 0) { + new_output = data; + use_input_as_output = true; + return true; + } + // perform gather for single element case + if (advanced_ids.size() == 1) { + auto index = masked_indicies[advanced_ids[0]]; + if (is_masked_bool[advanced_ids[0]]) { + auto gather = rg.make(data, index); + new_output = gather->output(0); + use_input_as_output = false; + return true; + } + index = rg.make(index, element::i32); + auto dim = rg.make(element::i32, Shape{}, static_cast(advanced_ids[0])); + auto gather = rg.make(data, index, dim); + new_output = gather->output(0); + use_input_as_output = false; + return true; + } + // index transformation supports only tensors with static rank + if (rank.is_dynamic()) { + return false; + } + auto adv_idx_count = advanced_ids.size(); + auto input_shape = rg.make(data, element::i32); + auto zero = rg.make(element::i32, Shape{}, 0); + auto input_dims = rg.make(input_shape, zero, rank.get_length()); + std::vector non_used_dims; + for (auto i = 0; i < rank.get_length(); i++) { + if (std::find(advanced_ids.begin(), advanced_ids.end(), i) == advanced_ids.end()) { + non_used_dims.push_back(i); + } + } + std::vector permutation_dims; + permutation_dims.insert(permutation_dims.end(), advanced_ids.begin(), advanced_ids.end()); + permutation_dims.insert(permutation_dims.end(), non_used_dims.begin(), non_used_dims.end()); + auto transpose_dims = rg.make(element::i32, Shape{permutation_dims.size()}, permutation_dims); + auto transposed_input = rg.make(data, transpose_dims); + auto flatten_input = flatten(rg, transposed_input, adv_idx_count); + auto cum_adv_index = masked_indicies[advanced_ids[adv_idx_count - 1]]; + cum_adv_index = rg.make(cum_adv_index, element::i32); + auto multiplier = input_dims->output(advanced_ids[adv_idx_count - 1]); + for (int i = static_cast(adv_idx_count) - 2; i > -1; i--) { + auto input_id = advanced_ids[i]; + auto m_idx = rg.make(masked_indicies[input_id], element::i32); + auto adv_index = rg.make(m_idx, multiplier); + cum_adv_index = rg.make(cum_adv_index, adv_index); + multiplier = rg.make(multiplier, input_dims->output(input_id)); + } + std::shared_ptr gather = rg.make(flatten_input, cum_adv_index, zero); + OutputVector concat_dims; + // check if all advanced indices are consecutive. + std::vector consequence_dims; + auto cum_adv_index_shape_tensor = rg.make(cum_adv_index, element::i32); + for (size_t i = advanced_ids[0]; i <= advanced_ids[advanced_ids.size() - 1]; i++) { + consequence_dims.push_back(i); + } + // unfold regular index axes + if (advanced_ids == consequence_dims) { + OutputVector folded_adv_idx_shape_vector; + auto minus_one = rg.make(element::i32, Shape{1}, -1); + folded_adv_idx_shape_vector.push_back(minus_one); + for (auto i : non_used_dims) { + folded_adv_idx_shape_vector.push_back(input_dims->output(i)); + } + auto folded_adv_idx_shape = rg.make(folded_adv_idx_shape_vector, 0); + gather = rg.make(gather, folded_adv_idx_shape, false); + std::vector adv_idx_permute; + for (size_t i = 1; i < advanced_ids[0] + 1; i++) { + adv_idx_permute.push_back(i); + } + adv_idx_permute.push_back(0); + for (size_t i = advanced_ids[0] + 1; i < (rank.get_length() - adv_idx_count + 1); i++) { + adv_idx_permute.push_back(i); + } + // Transpose folded advanced indexed axis to its original location. + auto permute_indicies = rg.make(element::i32, Shape{adv_idx_permute.size()}, adv_idx_permute); + gather = rg.make(gather, permute_indicies); + // unfold advanced index axes + for (size_t i = 0; i < advanced_ids[0]; i++) { + concat_dims.push_back(input_dims->output(i)); + } + concat_dims.push_back(cum_adv_index_shape_tensor); + for (auto i : non_used_dims) { + if (i < advanced_ids[0]) { + continue; + } + concat_dims.push_back(input_dims->output(i)); + } + + } else { + concat_dims.push_back(cum_adv_index_shape_tensor); + for (auto i : non_used_dims) { + concat_dims.push_back(input_dims->output(i)); + } + } + auto final_shape = rg.make(concat_dims, 0); + gather = rg.make(gather, final_shape, false); + new_output = gather->output(0); + use_input_as_output = false; + return true; +} + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index f4104a83ae3252..9346b9e18b94a3 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -129,6 +129,15 @@ Output concat_list_from_inputs(const NodeContext& context, size_t begin, s Output masked_select(const NodeContext& context, const Output& data, const Output& mask); +Output flatten(ov::pass::NodeRegistry& rg, const Output& value, size_t axis); + +bool index_tensor_on_list(ov::pass::NodeRegistry& rg, + const Output& data, + const ov::OutputVector& indices, + const ov::Rank& rank, + Output& new_output, + bool& use_input_as_output); + namespace op { template OutputVector inplace_op(const NodeContext& context) { diff --git a/tests/layer_tests/pytorch_tests/test_index_tensor.py b/tests/layer_tests/pytorch_tests/test_index_tensor.py new file mode 100644 index 00000000000000..d2055b5f5a4ec5 --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_index_tensor.py @@ -0,0 +1,49 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestIndexTensor(PytorchLayerTest): + def _prepare_input(self, input_shape): + import numpy as np + return (np.random.randn(*input_shape).astype(np.float32),) + + def create_model(self, indices_list): + import torch + + class aten_index_tensor(torch.nn.Module): + def __init__(self, indices_list): + super(aten_index_tensor, self).__init__() + self.indices_list = indices_list + + def forward(self, x): + return torch.ops.aten.index.Tensor(x, self.indices_list) + + ref_net = None + + adjusted_indices_list = [] + for indices in indices_list: + if indices is not None: + adjusted_indices_list.append(torch.tensor(indices, dtype=torch.int32)) + continue + adjusted_indices_list.append(None) + + return aten_index_tensor(adjusted_indices_list), ref_net, None + + @pytest.mark.nightly + @pytest.mark.precommit_torch_export + @pytest.mark.parametrize(('input_shape', 'indices_list'), [ + ([3, 7], [[0], [5, 3, 0]]), + ([3, 7, 6], [[0], None, None]), + ([3, 7, 6], [[0], None, [5, 0, 3]]), + ([3, 7, 6], [[0, 2, 1], None, [5, 0, 3]]), + ([3, 7, 6], [[0, 2, 1], [4], [5, 0, 3]]), + ]) + def test_index_tensor(self, input_shape, indices_list, ie_device, precision, ir_version): + if not PytorchLayerTest.use_torch_export(): + pytest.skip(reason='aten.index.Tensor test is supported only on torch.export()') + self._test(*self.create_model(indices_list), ie_device, precision, ir_version, + kwargs_to_prepare_input={'input_shape': input_shape}) diff --git a/tests/layer_tests/pytorch_tests/test_upsample.py b/tests/layer_tests/pytorch_tests/test_upsample.py index 34ffb9880c7f62..aa5cec1080f7d0 100644 --- a/tests/layer_tests/pytorch_tests/test_upsample.py +++ b/tests/layer_tests/pytorch_tests/test_upsample.py @@ -43,6 +43,7 @@ def forward(self, x): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.skipif(platform == 'darwin', reason="Ticket - 122182") def test_upsample1d(self, mode, size, scale, ie_device, precision, ir_version): if ie_device == "GPU" and mode == "linear": @@ -96,6 +97,7 @@ def forward(self, x): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_upsample2d(self, mode, size, scale, ie_device, precision, ir_version): self._test(*self.create_model(size, scale, mode), ie_device, precision, ir_version, trace_model=True, **{"custom_eps": 1e-3}) @@ -213,6 +215,7 @@ def forward(self, x): @pytest.mark.parametrize("mode", ['nearest', 'bilinear', 'bicubic']) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_upsample2d_list_sizes(self, mode, ie_device, precision, ir_version): self._test(*self.create_model(mode), ie_device, precision, ir_version, trace_model=True) From e3ad821bcca52b2ff86550d9354655f405f1e401 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Tue, 22 Oct 2024 10:17:52 +0400 Subject: [PATCH 100/112] [CPU] Implemented "jit_exp_emitter" (#26974) ### Details: - *Previously, we used dnnl-injector for `Exp` op which require 2 `aux_vec_regs`. The snippets kernel have some pool of aux vec registers which can be used by emitters in their implementations. However, dnnl cannot work with user-provided aux registers and always spill them on stack while plugin emitters can do it. To avoid extra push-pop in Snippets kernel (it leads to performance degradations), we implemented own emitter for `Exp` with the same logic to have opportunity to pass free aux vec registers* - *Updated `jit_erf_emitter`: reused new `jit_exp_emitter` to compute exponent and now we work only with `vmm_dst` to avoid `vmm_src` data corruption (input registers must not be corrupted)* ### Tickets: - *155236* --- .../plugin/x64/jit_dnnl_ext_emitters.hpp | 13 - .../plugin/x64/jit_eltwise_emitters.cpp | 252 ++++++++++-------- .../plugin/x64/jit_eltwise_emitters.hpp | 27 ++ src/plugins/intel_cpu/src/nodes/eltwise.cpp | 7 +- 4 files changed, 175 insertions(+), 124 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp index 835605756f9014..7a4d1e31277e3b 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_dnnl_ext_emitters.hpp @@ -64,19 +64,6 @@ class jit_elu_emitter : public jit_dnnl_emitter { } }; -class jit_exp_emitter : public jit_dnnl_emitter { -public: - jit_exp_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - ov::element::Type exec_prc = ov::element::f32) - : jit_dnnl_emitter(host, host_isa, n, exec_prc) { - kind = dnnl_eltwise_exp; - alpha = 0.f; - beta = 0.f; - - set_injector(); - } -}; - class jit_abs_emitter : public jit_dnnl_emitter { public: jit_abs_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp index fb74c196f6a289..0331a3ee4908b9 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.cpp @@ -1822,29 +1822,25 @@ void jit_negative_emitter::emit_isa(const std::vector &in_vec_idxs, cons h->uni_vsubps(vmm_dst, vmm_dst, vmm_src); } -/// ERF /// -jit_erf_emitter::jit_erf_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, ov::element::Type exec_prc) + +/// EXP /// +jit_exp_emitter::jit_exp_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { prepare_table(); } -jit_erf_emitter::jit_erf_emitter(x64::jit_generator* host, - x64::cpu_isa_t host_isa, - const std::shared_ptr& node, - ov::element::Type exec_prc) +jit_exp_emitter::jit_exp_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { prepare_table(); } -size_t jit_erf_emitter::get_inputs_num() const { return 1; } +size_t jit_exp_emitter::get_inputs_num() const { return 1; } -std::set> jit_erf_emitter::get_supported_precisions(const std::shared_ptr& node) { +std::set> jit_exp_emitter::get_supported_precisions(const std::shared_ptr& node) { return {{element::f32}}; } -void jit_erf_emitter::emit_impl( - const std::vector &in_vec_idxs, - const std::vector &out_vec_idxs) const { +void jit_exp_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1857,20 +1853,16 @@ void jit_erf_emitter::emit_impl( } template -void jit_erf_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +void jit_exp_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { using Vmm = typename conditional3::type; Vmm vmm_src = Vmm(in_vec_idxs[0]); Vmm vmm_dst = Vmm(out_vec_idxs[0]); - Vmm vmm_mask = Vmm(aux_vec_idxs[0]); - Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); - Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]); - Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]); - Vmm vmm_aux3 = Vmm(aux_vec_idxs[3]); - Vmm vmm_aux4 = Vmm(aux_vec_idxs[4]); + Vmm vmm_mask = need_vmm_mask() ? Vmm(aux_vec_idxs[0]) : Vmm(); + Vmm vmm_aux0 = Vmm(aux_vec_idxs[0 + static_cast(need_vmm_mask())]); + Vmm vmm_aux1 = Vmm(aux_vec_idxs[1 + static_cast(need_vmm_mask())]); - auto compute_cmp_mask = [&](const Vmm &vmm_src, - const Xbyak::Operand &compare_operand, int cmp_predicate) { + auto compute_cmp_mask = [&](const Vmm &vmm_src, const Xbyak::Operand &compare_operand, int cmp_predicate) { if (host_isa_ == x64::avx512_core) { h->vcmpps(k_mask, vmm_src, compare_operand, cmp_predicate); } else { @@ -1886,66 +1878,123 @@ void jit_erf_emitter::emit_isa(const std::vector &in_vec_idxs, const std } }; - auto exp_compute_vector_fwd = [&](const Vmm &vmm_src) { - // get mask of values lower than log(FLT_MIN) to zero them in the output - compute_cmp_mask(vmm_src, table_val("exp_ln_flt_min_f"), _cmp_lt_os); - - h->uni_vminps(vmm_src, vmm_src, table_val("exp_ln_flt_max_f")); - h->uni_vmaxps(vmm_src, vmm_src, table_val("exp_ln_flt_min_f")); - h->uni_vmovups(vmm_aux1, vmm_src); - - // calculate exp(x) - // fx = x * log2ef + 0.5 - h->uni_vmulps(vmm_src, vmm_src, table_val("exp_log2ef")); - h->uni_vaddps(vmm_src, vmm_src, table_val("half")); - - // tmp = floorf(fx) - const auto _op_floor = 1u; - h->uni_vroundps(vmm_aux2, vmm_src, _op_floor); - - // keep vmm_src = fx for further computations - h->uni_vmovups(vmm_src, vmm_aux2); - - // x = x - fx * ln2 - h->uni_vfnmadd231ps(vmm_aux1, vmm_aux2, table_val("ln2f")); - - // compute 2^n - h->uni_vcvtps2dq(vmm_aux2, vmm_src); - h->uni_vpaddd(vmm_aux2, vmm_aux2, table_val("exponent_bias")); - const int n_mantissa_bits = 23; - h->uni_vpslld(vmm_aux2, vmm_aux2, n_mantissa_bits); //Vmm(6) = 2^-fx - - // use vmm_src as tmp vmm_zero when applying mask - h->uni_vpxor(vmm_src, vmm_src, vmm_src); - // set zeroes at those points which were < log(FLT_MIN) - blend_with_mask(vmm_aux2, vmm_src); - - // compute polynomial - h->uni_vmovups(vmm_src, table_val("ex_pol5")); - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol4")); - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol3")); - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol2")); - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("ex_pol1")); - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("one")); - // y = y * 2^n - h->uni_vmulps(vmm_src, vmm_src, vmm_aux2); - }; + h->uni_vmovups(vmm_aux1, table_val("ln_flt_min_f")); + // get mask of values lower than log(FLT_MIN) to zero them in the output + compute_cmp_mask(vmm_src, vmm_aux1, _cmp_lt_os); - auto abs_compute_vector_fwd = [&](const Vmm &vmm_src) { - // compute abs(x) = _mm_and_ps(x, 01111..111)); - h->uni_vandps(vmm_src, vmm_src, table_val("positive_mask")); - }; + h->uni_vminps(vmm_dst, vmm_src, table_val("ln_flt_max_f")); + h->uni_vmaxps(vmm_dst, vmm_dst, vmm_aux1); + h->uni_vmovups(vmm_aux0, vmm_dst); + + // calculate exp(x) + // fx = x * log2ef + 0.5 + h->uni_vmulps(vmm_dst, vmm_dst, table_val("log2ef")); + h->uni_vaddps(vmm_dst, vmm_dst, table_val("half")); + + // tmp = floorf(fx) + const auto _op_floor = 1u; + h->uni_vroundps(vmm_aux1, vmm_dst, _op_floor); + + // keep vmm_dst = fx for further computations + h->uni_vmovups(vmm_dst, vmm_aux1); + + // x = x - fx * ln2 + h->uni_vfnmadd231ps(vmm_aux0, vmm_aux1, table_val("ln2f")); + + // compute 2^n + h->uni_vcvtps2dq(vmm_aux1, vmm_dst); + h->uni_vpaddd(vmm_aux1, vmm_aux1, table_val("exponent_bias")); + const int n_mantissa_bits = 23; + h->uni_vpslld(vmm_aux1, vmm_aux1, n_mantissa_bits); + + // use vmm_dst as tmp vmm_zero when applying mask + h->uni_vpxor(vmm_dst, vmm_dst, vmm_dst); + // set zeroes at those points which were < log(FLT_MIN) + blend_with_mask(vmm_aux1, vmm_dst); + + // compute polynomial + h->uni_vmovups(vmm_dst, table_val("pol5")); + h->uni_vfmadd213ps(vmm_dst, vmm_aux0, table_val("pol4")); + h->uni_vfmadd213ps(vmm_dst, vmm_aux0, table_val("pol3")); + h->uni_vfmadd213ps(vmm_dst, vmm_aux0, table_val("pol2")); + h->uni_vfmadd213ps(vmm_dst, vmm_aux0, table_val("pol1")); + h->uni_vfmadd213ps(vmm_dst, vmm_aux0, table_val("one")); + // y = y * 2^n + h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux1); +} + +void jit_exp_emitter::register_table_entries() { + push_arg_entry_of("pol1", 0x3f7ffffb, true); // p1 = 0.999999701f + push_arg_entry_of("pol2", 0x3efffee3, true); // p2 = 0.499991506f + push_arg_entry_of("pol3", 0x3e2aad40, true); // p3 = 0.166676521f + push_arg_entry_of("pol4", 0x3d2b9d0d, true); // p4 = 0.0418978221f + push_arg_entry_of("pol5", 0x3c07cfce, true); // p5 = 0.00828929059f + + push_arg_entry_of("one", CONST_1_F, true); + push_arg_entry_of("half", 0x3f000000, true); + push_arg_entry_of("ln2f", 0x3f317218, true); + push_arg_entry_of("log2ef", 0x3fb8aa3b, true); + push_arg_entry_of("ln_flt_max_f", 0x42b17218, true); + push_arg_entry_of("ln_flt_min_f", 0xc2aeac50, true); + push_arg_entry_of("exponent_bias", 0x0000007f, true); +} + +size_t jit_exp_emitter::aux_vecs_count() const { + return need_vmm_mask() ? 3 : 2; +} + +/// ERF /// +jit_erf_emitter::jit_erf_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + m_exp_emitter.reset(new jit_exp_emitter(host, host_isa, exec_prc)); + prepare_table(); +} + +jit_erf_emitter::jit_erf_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, ov::element::Type exec_prc) + : jit_erf_emitter(host, host_isa, exec_prc) {} + +size_t jit_erf_emitter::get_inputs_num() const { return 1; } + +std::set> jit_erf_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_erf_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == x64::sse41) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else if (host_isa_ == x64::avx2) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else if (host_isa_ == x64::avx512_core) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Unsupported ISA ", host_isa_); + } +} + +template +void jit_erf_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + using Vmm = typename conditional3::type; + Vmm vmm_src = Vmm(in_vec_idxs[0]); + Vmm vmm_dst = Vmm(out_vec_idxs[0]); + + Vmm vmm_aux0 = Vmm(aux_vec_idxs[0]); + Vmm vmm_aux1 = Vmm(aux_vec_idxs[1]); + Vmm vmm_aux2 = Vmm(aux_vec_idxs[2]); + Vmm vmm_aux3 = Vmm(aux_vec_idxs[3]); // IMPORTANT: we use vmm_aux3 to save `x` as exp_compute does not use it. h->uni_vmovups(vmm_aux3, vmm_src); // -exp(-x*x) - h->uni_vmulps(vmm_src, vmm_src, vmm_src); - h->uni_vxorps(vmm_src, vmm_src, table_val("sign_mask")); + h->uni_vmulps(vmm_dst, vmm_src, vmm_src); + h->uni_vxorps(vmm_dst, vmm_dst, table_val("sign_mask")); - exp_compute_vector_fwd(vmm_src); + // pass the current `aux_vec_idxs` to `exp_emitter` excepting `vmm_aux3` + auto exp_aux_vec_idxs = aux_vec_idxs; + exp_aux_vec_idxs.erase(std::find(exp_aux_vec_idxs.begin(), exp_aux_vec_idxs.end(), static_cast(vmm_aux3.getIdx()))); + m_exp_emitter->emit_code({static_cast(vmm_dst.getIdx())}, {static_cast(vmm_dst.getIdx())}, exp_aux_vec_idxs); - h->uni_vxorps(vmm_src, vmm_src, table_val("sign_mask")); + h->uni_vxorps(vmm_dst, vmm_dst, table_val("sign_mask")); // get sign h->uni_vmovups(vmm_aux0, vmm_aux3); @@ -1954,60 +2003,49 @@ void jit_erf_emitter::emit_isa(const std::vector &in_vec_idxs, const std // abs(x) h->uni_vmovups(vmm_aux1, vmm_aux3); // compute abs(x) = _mm_and_ps(x, 01111..111)); - abs_compute_vector_fwd(vmm_aux1); + h->uni_vandps(vmm_aux1, vmm_aux1, table_val("positive_mask")); // t = 1 / (p*x + 1) h->uni_vmovups(vmm_aux2, table_val("approx_const")); h->uni_vfmadd213ps(vmm_aux2, vmm_aux1, table_val("one")); - h->uni_vmovups(vmm_aux4, table_val("one")); - h->uni_vdivps(vmm_aux4, vmm_aux4, vmm_aux2); + h->uni_vmovups(vmm_aux3, table_val("one")); + h->uni_vdivps(vmm_aux3, vmm_aux3, vmm_aux2); // -exp(-x*x)*t - h->uni_vmulps(vmm_src, vmm_src, vmm_aux4); + h->uni_vmulps(vmm_dst, vmm_dst, vmm_aux3); // compute polynomialial r - h->uni_vmovups(vmm_aux1, table_val("erf_pol5")); - h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol4")); - h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol3")); - h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol2")); - h->uni_vfmadd213ps(vmm_aux1, vmm_aux4, table_val("erf_pol1")); + h->uni_vmovups(vmm_aux1, table_val("pol5")); + h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val("pol4")); + h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val("pol3")); + h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val("pol2")); + h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val("pol1")); // erf = sign * (1 - r * t * exp(-x*x)) - h->uni_vfmadd213ps(vmm_src, vmm_aux1, table_val("one")); - h->uni_vxorps(vmm_dst, vmm_src, vmm_aux0); + h->uni_vfmadd213ps(vmm_dst, vmm_aux1, table_val("one")); + h->uni_vxorps(vmm_dst, vmm_dst, vmm_aux0); } void jit_erf_emitter::register_table_entries() { push_arg_entry_of("approx_const", 0x3ea7ba05, true); // 0.3275911 - push_arg_entry_of("one_over_sqrt_two", 0x3f3504f3, true); - push_arg_entry_of("sign_mask", 0x80000000, true); - - push_arg_entry_of("ex_pol1", 0x3f7ffffb, true); // p1 = 0.999999701f - push_arg_entry_of("ex_pol2", 0x3efffee3, true); // p2 = 0.499991506f - push_arg_entry_of("ex_pol3", 0x3e2aad40, true); // p3 = 0.166676521f - push_arg_entry_of("ex_pol4", 0x3d2b9d0d, true); // p4 = 0.0418978221f - push_arg_entry_of("ex_pol5", 0x3c07cfce, true); // p5 = 0.00828929059f - - push_arg_entry_of("erf_pol1", 0x3e827906, true); // p1 = 0.254829592f - push_arg_entry_of("erf_pol2", 0xbe91a98e, true); // p2 = -0.284496736f - push_arg_entry_of("erf_pol3", 0x3fb5f0e3, true); // p3 = 1.421413741f - push_arg_entry_of("erf_pol4", 0xbfba00e3, true); // p4 = -1.453152027f - push_arg_entry_of("erf_pol5", 0x3f87dc22, true); // p5 = 1.061405429f - push_arg_entry_of("one", CONST_1_F, true); - push_arg_entry_of("half", 0x3f000000, true); - - push_arg_entry_of("exp_log2ef", 0x3fb8aa3b, true); - push_arg_entry_of("exp_ln_flt_max_f", 0x42b17218, true); - push_arg_entry_of("exp_ln_flt_min_f", 0xc2aeac50, true); - - push_arg_entry_of("ln2f", 0x3f317218, true); - push_arg_entry_of("exponent_bias", 0x0000007f, true); + push_arg_entry_of("sign_mask", 0x80000000, true); push_arg_entry_of("positive_mask", 0x7fffffff, true); + + push_arg_entry_of("pol1", 0x3e827906, true); // p1 = 0.254829592f + push_arg_entry_of("pol2", 0xbe91a98e, true); // p2 = -0.284496736f + push_arg_entry_of("pol3", 0x3fb5f0e3, true); // p3 = 1.421413741f + push_arg_entry_of("pol4", 0xbfba00e3, true); // p4 = -1.453152027f + push_arg_entry_of("pol5", 0x3f87dc22, true); // p5 = 1.061405429f } size_t jit_erf_emitter::aux_vecs_count() const { - return 5ul; + return 4ul; +} + +void jit_erf_emitter::emit_data() const { + jit_emitter::emit_data(); + m_exp_emitter->emit_data(); } /// SOFT SIGN /// diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.hpp index 606b0ef1ef90c8..c8c4b06d6f3347 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_eltwise_emitters.hpp @@ -525,6 +525,29 @@ class jit_negative_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_exp_emitter : public jit_emitter { +public: + jit_exp_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, + ov::element::Type exec_prc = ov::element::f32); + + jit_exp_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, + ov::element::Type exec_prc = ov::element::f32); + + size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + bool need_vmm_mask() const { return host_isa_ != dnnl::impl::cpu::x64::avx512_core; } + + void register_table_entries() override; + size_t aux_vecs_count() const override; +}; + class jit_erf_emitter : public jit_emitter { public: jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, @@ -533,6 +556,8 @@ class jit_erf_emitter : public jit_emitter { jit_erf_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, ov::element::Type exec_prc = ov::element::f32); + void emit_data() const override; + size_t get_inputs_num() const override; static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); @@ -546,6 +571,8 @@ class jit_erf_emitter : public jit_emitter { void register_table_entries() override; size_t aux_vecs_count() const override; + + std::unique_ptr m_exp_emitter {nullptr}; }; class jit_soft_sign_emitter : public jit_emitter { diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index f2f6ce503bd5e4..ed4d936fa49ae6 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -244,7 +244,6 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), @@ -262,6 +261,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), @@ -623,7 +623,6 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener OV_CASE(Algorithm::EltwiseAbs, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSqrt, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSoftRelu, jit_dnnl_aux_emitter), - OV_CASE(Algorithm::EltwiseExp, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseClamp, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseSwish, jit_dnnl_aux_emitter), OV_CASE(Algorithm::EltwiseHswish, jit_dnnl_aux_emitter), @@ -641,6 +640,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), @@ -1213,7 +1213,6 @@ const std::map& Eltwise::getIn }}, {ov::op::v0::Exp::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { node.algorithm = Algorithm::EltwiseExp; - node.onednnAlgorithm = dnnl::algorithm::eltwise_exp; }}, {SwishNode::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto swishOp = getNgraphOpAs(op); @@ -1873,7 +1872,6 @@ class EltwiseRefExecutor : public EltwiseRefBaseExecutor { case Algorithm::EltwiseAbs: case Algorithm::EltwiseSqrt: case Algorithm::EltwiseSoftRelu: - case Algorithm::EltwiseExp: case Algorithm::EltwiseClamp: case Algorithm::EltwiseSwish: case Algorithm::EltwiseHswish: @@ -1893,6 +1891,7 @@ class EltwiseRefExecutor : public EltwiseRefBaseExecutor { case Algorithm::EltwiseMod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break; case Algorithm::EltwiseMaximum: *dst_ptr_f = std::max(src_f[0], src_f[1]); break; case Algorithm::EltwiseMinimum: *dst_ptr_f = std::min(src_f[0], src_f[1]); break; + case Algorithm::EltwiseExp: *dst_ptr_f = expf(src_f[0]); break; case Algorithm::EltwiseSquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break; case Algorithm::EltwisePowerDynamic: *dst_ptr_f = powf(src_f[0], src_f[1]); break; case Algorithm::EltwiseEqual: *dst_ptr_f = src_f[0] == src_f[1]; break; From adeb3d2e0296db45a745ad6c02d4566570b18750 Mon Sep 17 00:00:00 2001 From: Alexey Moskalev Date: Tue, 22 Oct 2024 11:43:25 +0400 Subject: [PATCH 101/112] Adding CODE_OF_CONDUCT.md (#27100) Adding CODE_OF_CONDUCT.md to meet LF requirements. --- CODE_OF_CONDUCT.md | 119 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000000..5044453266940d --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,119 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +openvino_codeofconduct At intel DOT com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. From 28e1c821fbd04fe1c3949eed049c946575d7fe09 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 22 Oct 2024 13:37:14 +0200 Subject: [PATCH 102/112] [CPU][ARM] ACL upgrade to v24.09 (#26852) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- src/plugins/intel_cpu/thirdparty/ComputeLibrary | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/ComputeLibrary b/src/plugins/intel_cpu/thirdparty/ComputeLibrary index f1929dc994d8e5..c61bd3387403b7 160000 --- a/src/plugins/intel_cpu/thirdparty/ComputeLibrary +++ b/src/plugins/intel_cpu/thirdparty/ComputeLibrary @@ -1 +1 @@ -Subproject commit f1929dc994d8e5afae5c77ca66446344119a8592 +Subproject commit c61bd3387403b76d618915ccebf5e9585f52a071 From a02da923ac333a810f2adf2109e8f5aaf0c3191f Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 22 Oct 2024 19:36:15 +0200 Subject: [PATCH 103/112] Docs rename ovcategory in sitemap (#27177) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../openvino_custom_sphinx_sitemap/__init__.py | 2 +- docs/sphinx_setup/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index bb26683cd9e579..c82e0a8d5995f7 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -122,7 +122,7 @@ def process_coveo_meta(meta, url, link): namespace_element = ET.SubElement(url, namespace) for tag_name, tag_value in values.items(): - if tag_name == 'ovcategory': + if tag_name == 'ovdoctype': processed_link = process_link(link) ET.SubElement(namespace_element, tag_name).text = processed_link else: diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index 148309ccbafe96..def41af5943b3c 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -84,7 +84,7 @@ ov_sitemap_meta = [ ('coveo:metadata', { 'ovversion': version_name, - 'ovcategory': 'null' + 'ovdoctype': 'null' }) ] From 1cde01041f0254821a0c843289cb9dfc89e64f25 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 22 Oct 2024 19:40:29 +0200 Subject: [PATCH 104/112] [DOCS] port (#27192) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/sphinx_setup/_static/js/custom.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sphinx_setup/_static/js/custom.js b/docs/sphinx_setup/_static/js/custom.js index 52962cf0f7c7e6..3cfe907382c314 100644 --- a/docs/sphinx_setup/_static/js/custom.js +++ b/docs/sphinx_setup/_static/js/custom.js @@ -417,6 +417,7 @@ document.addEventListener('DOMContentLoaded', function () { await searchInterfaceSa.initialize({ accessToken: "xx1f2aebd3-4307-4632-aeea-17c13378b237", organizationId: "intelcorporationnonproduction2ybdyblf7", + organizationEndpoints: await searchInterface.getOrganizationEndpoints('intelcorporationnonproduction2ybdyblf7') }); searchInterfaceSa.executeFirstSearch(); } @@ -424,6 +425,7 @@ document.addEventListener('DOMContentLoaded', function () { await searchInterface.initialize({ accessToken: "xx1f2aebd3-4307-4632-aeea-17c13378b237", organizationId: "intelcorporationnonproduction2ybdyblf7", + organizationEndpoints: await searchInterface.getOrganizationEndpoints('intelcorporationnonproduction2ybdyblf7') }); searchInterface.executeFirstSearch(); } From 5ac9eda0424c107acd5a066530d6f95d1589bd17 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 22 Oct 2024 20:47:24 +0200 Subject: [PATCH 105/112] [tests] MSVC debug asserts on vector iterators incompatible in test (#27174) ### Details: - Fix `Assertion failed: vector iterators incompatible`, by correct elements erase from map. ### Tickets: - CVS-155346 --- .../subgraphs_dumper/include/utils/model.hpp | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp index a76c07622f40a5..c8183d7dbf2798 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/utils/model.hpp @@ -21,7 +21,7 @@ align_input_info(const std::shared_ptr& model, const std::map &in_info_ref, const std::unordered_map &matched_op); -// get set nodes of subgraph after start_node +// get set nodes of subgraph after start_node void get_subgraph_set_node(std::unordered_set>& nodes_to_check, const std::shared_ptr& node); @@ -51,14 +51,12 @@ generate_model(ov::NodeVector& nodes, auto orig_node_name = node->get_friendly_name(); cloned_node_map.insert({ orig_node_name, clone_node(node, is_copy_constants, false, orig_node_name) }); - + // create temporary vector to fill node output indexes std::vector out_ports(node->outputs().size()); std::iota(out_ports.begin(), out_ports.end(), 0); // fill by all nodes with output ports - model_output_nodes.insert({ - orig_node_name, - std::unordered_set(out_ports.begin(), out_ports.end()) }); + model_output_nodes.insert({orig_node_name, std::unordered_set(out_ports.begin(), out_ports.end())}); if (!ov::op::util::is_output(node) && !ov::op::util::is_constant(node) && !ov::op::util::is_parameter(node)) { @@ -83,7 +81,7 @@ generate_model(ov::NodeVector& nodes, if (orig_node_to_check.get_node()->shared_from_this() == node) { auto orig_in_node_name = orig_in_node->get_friendly_name(); auto cloned_in_node = cloned_node->get_input_node_shared_ptr(in_idx); - // if op input node is in subgraph replace parameters + // if op input node is in subgraph replace parameters // in cloned node by other nodes from the map if (cloned_node_map.count(orig_in_node_name)) { auto orig_in_node = cloned_node_map[orig_in_node_name]; @@ -192,17 +190,14 @@ generate_model(ov::NodeVector& nodes, } auto h1 = std::hash{}(string_to_hash); model->set_friendly_name(std::to_string(h1)); - { - auto it = nodes.begin(); - while (it != nodes.end()) { - if (cloned_node_map.count((*it)->get_friendly_name())) { - nodes.erase(it); - } else { - ++it; - } + for (auto it = nodes.begin(); it != nodes.end();) { + if (cloned_node_map.count((*it)->get_friendly_name())) { + it = nodes.erase(it); + } else { + ++it; } } - + return { model, model_input_info }; } From 5a396e6c06934e9a9a2daffe4c78f66844644247 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Wed, 23 Oct 2024 01:45:02 +0400 Subject: [PATCH 106/112] [TF FE] Skip one test case for keras Embedding on CPU due to timeout issue (#27188) **Details:** Skip one test case for keras Embedding on CPU due to timeout issue **Ticket:** 155622 --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow2_keras_tests/test_tf2_keras_embedding.py | 4 ++++ tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py index c0c8fd9dada071..04695d5ba7c723 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_embedding.py @@ -42,6 +42,8 @@ def create_keras_emb_net(self, input_names, input_shapes, input_type, input_dim, @pytest.mark.precommit def test_keras_emb_float32(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_keras_emb_net(**params, ir_version=ir_version), ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params) @@ -62,6 +64,8 @@ def test_keras_emb_float32(self, params, ie_device, precision, ir_version, temp_ @pytest.mark.precommit def test_keras_emb_without_zero_mask_float32(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_keras_emb_net(**params, ir_version=ir_version), ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py index 11c1d2a2ffed17..103f99e3f4043f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py @@ -126,6 +126,8 @@ def test_bias_add_placeholder_const_4D(self, params, ie_device, precision, ir_ve @pytest.mark.nightly def test_bias_add_2_consts_4D(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'CPU': + pytest.skip('155622: OpenVINO runtime timeout on CPU') self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend), ie_device, precision, ir_version, temp_dir=temp_dir, From 2fc59f50239afcbda5ec6b0394b09b14e25a0628 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Wed, 23 Oct 2024 11:07:29 +0900 Subject: [PATCH 107/112] Use wzp u8 type and bugfix for FC dyn-quan (#26816) ### Details: - Modify wzp data type for clDNN FC - Bugfix for clDNN FC dynamic quantize ### Tickets: - CVS-150930 --------- Signed-off-by: Min, Byung-il Signed-off-by: Min, Byungil --- .../fully_connected_gpu_bf_tiled.cl | 13 ++- .../fully_connected_kernel_bf_tiled.cpp | 5 +- .../convert_fc_to_compressed.cpp | 23 +++-- .../convert_fc_to_compressed.hpp | 2 +- .../src/plugin/transformations_pipeline.cpp | 9 +- .../convert_fc_to_compressed_test.cpp | 83 +++++++++++++------ 6 files changed, 91 insertions(+), 44 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 57545b0df37cff..70c55bfb73b8f5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -952,6 +952,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // Calculate zero-point and scale only for DECOMPRESSION_SCALE_POST_OP enabled // Calculate weight : w = (w - dzp) * ds + // if DECOMPRESSION_ZP_TERM is not enabled, then dzp is ACCUMULATOR_VAL_ZERO. #if DECOMPRESSION_ZP_TERM #if DECOMPRESSION_ZP_SCALAR DQ_SLM_FILTER_UNPACKED_VEC dzp = (DQ_SLM_FILTER_UNPACKED_VEC)(DECOMPRESSION_ZP_VALUE); @@ -976,8 +977,6 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( } } #endif - #else - DQ_SLM_FILTER_UNPACKED_VEC dzp = (DQ_SLM_FILTER_UNPACKED_VEC)(ACCUMULATOR_VAL_ZERO); #endif #if FILTER_LOAD_BLOCK_SIZE == 2 @@ -1026,7 +1025,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( weights_offset += TILE_K_OFM_PACKED * TILE_OFM_PER_OSV_SIZE * SIMD; - #if DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE > DECOMPRESSION_SCALE_GROUP_SIZE) + #if DQ_DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE > DECOMPRESSION_SCALE_GROUP_SIZE) unroll_for (uint bi = 0; bi < TILE_B; ++bi) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { const uint offset_ofm = out_f + fi*SIMD + sglid; @@ -1046,7 +1045,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( #endif } // Whole tile_k elements of each iteration : ki - #if DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE <= DECOMPRESSION_SCALE_GROUP_SIZE) + #if DQ_DECOMPRESSION_SCALE_POST_OP && (TILE_IFM_ELEMENTS_SIZE <= DECOMPRESSION_SCALE_GROUP_SIZE) // Dynamic-quantizing group size set to same or smaller than scale group size if ((ni % NUM_LOOP_IN_DYN_QUAN_GROUP) == (NUM_LOOP_IN_DYN_QUAN_GROUP - 1)) { const uint ni_offset = ((ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; @@ -1175,7 +1174,7 @@ KERNEL(fc)( #endif ) { #if USE_SLM - #if DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if DYNAMIC_QUANTIZE __local int dq_wei_local_mem[SIMD * TILE_OFM * SIMD]; #else __local ACCUMULATOR_TYPE wei_local_mem[TILE_IFM * SIMD * TILE_OFM * SIMD]; @@ -1317,7 +1316,7 @@ KERNEL(fc)( #endif ); } else { - #if USE_SLM && DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if USE_SLM && DYNAMIC_QUANTIZE FUNC_CALL(fc_bf_tiled_kernel_dyn_quan)( OPTIONAL_SHAPE_INFO_TENSOR input, @@ -1364,7 +1363,7 @@ KERNEL(fc)( #endif } #else - #if USE_SLM && DYNAMIC_QUANTIZE && (TILE_OFM == 2) + #if USE_SLM && DYNAMIC_QUANTIZE FUNC_CALL(fc_bf_tiled_kernel_dyn_quan)( OPTIONAL_SHAPE_INFO_TENSOR input, diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index b26b11ce97df6a..9c95345e0900b5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -375,6 +375,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4) { if (!params.is_shape_agnostic && batch == 1) { + if (should_dynamic_quantize(params)) + return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + // Tuning for Meteor Lake if (is_weight_vertical(params, output_f)) { if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { @@ -616,7 +619,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para // Validated perf gain, Dynamic quantize force enable SCALE_POST_OP for char type multiplication if (should_dynamic_quantize(params)) { jit.AddConstant(MakeJitConstant("DYNAMIC_QUANTIZE", 1)); - jit.AddConstant(MakeJitConstant("DECOMPRESSION_SCALE_POST_OP", 1)); + jit.AddConstant(MakeJitConstant("DQ_DECOMPRESSION_SCALE_POST_OP", 1)); jit.AddConstant(MakeJitConstant("DQ_TYPE", "char")); jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", quantize_grp_size)); } else { diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 885da895b91166..315a93190fdc90 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -24,7 +24,7 @@ namespace ov { namespace intel_gpu { -ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8) { +ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { using namespace ov::pass::pattern; auto compressed_constant = [](const ov::Output& output) { @@ -81,6 +81,12 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon bool has_transpose = pattern_map.count(transpose_m); auto scale_shape = pattern_map.at(mul_const_m).get_shape(); bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1; + bool sub_with_convert = (pattern_map.count(sub_with_convert_m) > 0) ? true : false; + + auto weight_ptr = std::dynamic_pointer_cast(pattern_map.at(weights_m).get_node_shared_ptr()); + bool weight_u8 = false; + if (weight_ptr->get_element_type() == ov::element::u8 || weight_ptr->get_element_type() == ov::element::i8) + weight_u8 = true; auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); @@ -97,11 +103,17 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon return std::make_shared(*constant, new_shape); }; - auto convert_u4const_to_u8 = [convert_u4zp_to_u8](std::shared_ptr node) { + auto convert_const_to_u8 = [&](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); - if (constant->get_element_type() != ov::element::u4 || !convert_u4zp_to_u8) + // Convert ZP to u8 + if (constant->get_element_type() == ov::element::u8) return std::dynamic_pointer_cast(constant); - return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + if (constant->get_element_type() == ov::element::u4) + return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + if (weight_u8 && sub_with_convert) + return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + + return std::dynamic_pointer_cast(constant); }; @@ -111,8 +123,7 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon const bool with_zero_point = pattern_map.count(sub_no_convert_m) > 0 || pattern_map.count(sub_with_convert_m) > 0; if (with_zero_point) { - // WA: Convert ZP to u8 for OneDNN case to avoid u4 reorder - optional_zero_point = convert_u4const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr())); + optional_zero_point = convert_const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr())); } std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp index 641f55ead5fdaf..d2bc71a91f1285 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class ConvertFullyConnectedToFullyConnectedCompressed: public ov::pass::MatcherPass { public: OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0"); - ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8 = false); + ConvertFullyConnectedToFullyConnectedCompressed(); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b75519ac40e678..f97b7fae126b47 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -810,7 +810,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(device_info.supports_immad); + manager.register_pass(); bool disable_horizontal_fc_fusion = false; GPU_DEBUG_GET_INSTANCE(debug_config); @@ -819,10 +819,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (!disable_horizontal_fc_fusion) manager.register_pass(); + + // ZP should not be folded for FC. But still, ZP should be folded for Gather. + // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. + manager.register_pass(supported_woq_types, true); if (device_info.supports_immad) { - // For OneDNN, ZP should not be folded for FC. But still, ZP should be folded for Gather. - // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. - manager.register_pass(supported_woq_types, true); if (disable_horizontal_fc_fusion) manager.register_pass(); } diff --git a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp index 12398c8221f4b7..ada1c1314da040 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/convert_fc_to_compressed_test.cpp @@ -37,7 +37,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { auto convert = std::make_shared(weights_const, ov::element::f32); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(convert, scale_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -46,7 +46,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed1) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const); @@ -63,7 +63,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { auto sub = std::make_shared(convert, zp_const); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto scale = std::make_shared(sub, scale_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, scale, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -72,7 +72,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed2) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); @@ -92,7 +92,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -101,7 +101,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed3) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); @@ -115,13 +115,14 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 4, 4 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4, 1 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { -1, 16 }); auto reshape = std::make_shared(scale, reshape_const, false); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, reshape, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -130,9 +131,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 32, 16 }, { 1 }); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 32, 4 }, { 1 }); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); @@ -144,15 +145,16 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1, 1 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1, 1 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); auto reshape = std::make_shared(scale, reshape_const, false); auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose = std::make_shared(reshape, transpose_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto fc = std::make_shared(input1, transpose, no_bias); model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); @@ -163,11 +165,11 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed5) { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 16, 32 }, { 1 }); auto transpose_weights_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_weights = std::make_shared(weights_const, transpose_weights_const); - auto no_bias = std::make_shared(); + auto no_bias = std::make_shared(); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 1, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 1, 1 }, { 1 }); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, zp_const); model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); @@ -179,8 +181,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f32); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f32); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -202,7 +205,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed6) { auto scale_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); @@ -216,8 +219,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f16); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -239,7 +243,7 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed7) { auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); auto fc_compressed = std::make_shared(input1, transpose_weights, no_bias, transpose_scale, transpose_zp); @@ -332,8 +336,9 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { { auto weights_const = ov::op::v0::Constant::create(ov::element::u4, ov::Shape{ 4, 4, 32 }, { 1 }); auto convert = std::make_shared(weights_const, ov::element::f16); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); - auto sub = std::make_shared(convert, zp_const); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 1, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); + auto sub = std::make_shared(convert, zp_convert); auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 1, 32 }, { 1 }); auto scale = std::make_shared(sub, scale_const); auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 16, -1 }); @@ -373,7 +378,8 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); auto transpose_scale_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_scale = std::make_shared(scale_const, transpose_scale_const); - auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 4, 32 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 4, 32 }, { 1 }); + auto zp_convert = std::make_shared(zp_const, ov::element::f16); auto transpose_zp_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); auto transpose_zp = std::make_shared(zp_const, transpose_zp_const); @@ -402,6 +408,33 @@ TEST_F(TransformationTestsF, ConvertFCToCompressed8) { } TEST_F(TransformationTestsF, ConvertFCToCompressed9) { + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto convert = std::make_shared(weights_const, ov::element::f16); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto sub = std::make_shared(convert, zp_const); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto scale = std::make_shared(sub, scale_const); + auto no_bias = std::make_shared(); + auto fc = std::make_shared(input1, scale, no_bias); + + model = std::make_shared(ov::NodeVector{ fc }, ov::ParameterVector{ input1 }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); + auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); + auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto zp_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{ 32, 1 }, { 1 }); + auto no_bias = std::make_shared(); + auto fc_compressed = std::make_shared(input1, weights_const, no_bias, scale_const, zp_const); + + model_ref = std::make_shared(ov::NodeVector{ fc_compressed }, ov::ParameterVector{ input1 }); + } +} + +TEST_F(TransformationTestsF, ConvertFCToCompressed10) { { auto input1 = std::make_shared(ov::element::f16, ov::PartialShape{ -1, 16 }); auto weights_const = ov::op::v0::Constant::create(ov::element::u8, ov::Shape{ 32, 16 }, { 1 }); From f5dc4f5e5f193717c56877104c606072533fd6f7 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Wed, 23 Oct 2024 08:44:54 +0200 Subject: [PATCH 108/112] [PyOV] update numpy <2.2.0 (#27176) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- src/bindings/python/constraints.txt | 2 +- src/bindings/python/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index b3a8267e4c1f14..65ce94d71b385e 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.1.0 # Python bindings, frontends +numpy>=1.16.6,<2.2.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e311c6ed6438db..a2d63161fe764c 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.1.0 +numpy>=1.16.6,<2.2.0 openvino-telemetry>=2023.2.1 packaging From 121917a6ce6aa1aa9e0a9ae94130681a419ecb73 Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Wed, 23 Oct 2024 14:56:46 +0800 Subject: [PATCH 109/112] support new isa avx2_vnni_2 in threading scheduling (#27194) ### Details: - *support new isa avx2_vnni_2 in threading scheduling* ### Tickets: - *154908* --- src/plugins/intel_cpu/src/cpu_streams_calculation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index c0e1e96547cec7..7f5f968b10c3fe 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -549,6 +549,7 @@ int get_model_prefer_threads(const int num_streams, break; case dnnl::cpu_isa::avx512_core_vnni: case dnnl::cpu_isa::avx2_vnni: + case dnnl::cpu_isa::avx2_vnni_2: isaSpecificThreshold = 2.0f; break; case dnnl::cpu_isa::avx512_core_amx: From 93379b5f187daa6e542880e9672d1a611e71861c Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Wed, 23 Oct 2024 08:01:40 +0100 Subject: [PATCH 110/112] [CI] [GHA] Update list of workflows for rerunner, add step for statistics (#27156) ### Tickets: - *144089* --- .github/workflows/workflow_rerunner.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml index 7da00df4e46d32..89c39669e67720 100644 --- a/.github/workflows/workflow_rerunner.yml +++ b/.github/workflows/workflow_rerunner.yml @@ -3,11 +3,17 @@ name: Rerun Workflow with Known Errors on: workflow_run: workflows: - - Linux (Ubuntu 20.04, Python 3.11) + - Linux (Ubuntu 20.04, Python 3.9) + - Linux (Ubuntu 22.04, Python 3.11) + - Linux (Ubuntu 24.04, Python 3.12) + - Debian 10 ARM + - Android ARM64 with vcpkg + - Android x64 - Linux ARM64 (Ubuntu 20.04, Python 3.11) - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) - - Windows (VS 2019, Python 3.11) + - Windows (VS 2019, Python 3.11, Release) + - Windows (VS 2019, Python 3.11, Debug) - Windows Conditional Compilation (VS 2022, Python 3.11) types: - completed @@ -56,6 +62,10 @@ jobs: if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} run: echo "Rerun retriggered for ${{ github.event.workflow_run.html_url }} with ticket ${{ env.FOUND_ERROR_TICKET }}" + - name: ${{ github.event.workflow_run.html_url }} + if: ${{ env.PIPELINE_RETRIGGERED == 'true' }} + run: echo "Step for statistics gathering" + rerunner_tests: name: Rerunner Tests if: ${{ github.event_name == 'pull_request' && github.repository_owner == 'openvinotoolkit' }} From 56a0dca15c5bdbab7c07bc941265cb37fa26e1f3 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Wed, 23 Oct 2024 09:02:26 +0200 Subject: [PATCH 111/112] [TRANSFORMATIONS] Move print_model transformation to transformation's folder (#26420) [TRANSFORMATIONS] Move print_model transformation to transformation's folder Move print_model transformation to transformation's folder - Tickets: * [CVS-151257](https://jira.devtools.intel.com/browse/CVS-151257) --------- Signed-off-by: Andrii Staikov --- .../include/transformations}/utils/print_model.hpp | 0 .../intel_cpu/src/transformations/transformation_pipeline.cpp | 2 +- .../intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/{plugins/intel_cpu/src => common/transformations/include/transformations}/utils/print_model.hpp (100%) diff --git a/src/plugins/intel_cpu/src/utils/print_model.hpp b/src/common/transformations/include/transformations/utils/print_model.hpp similarity index 100% rename from src/plugins/intel_cpu/src/utils/print_model.hpp rename to src/common/transformations/include/transformations/utils/print_model.hpp diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index e45b6379d1e968..e98045bd32dbbf 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -100,7 +100,7 @@ #include "transformations/rt_info/keep_const_precision.hpp" #include "transformations/transpose_sinking/ts_shape_of.hpp" #include "utils/ngraph_transformation.hpp" -#include "utils/print_model.hpp" +#include "transformations/utils/print_model.hpp" // LPT transformations #include "low_precision/add.hpp" diff --git a/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp b/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp index ca55337fc3d6b8..d8516d9ce8cf39 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/state_concat_sdpa.cpp @@ -17,7 +17,7 @@ #include #include "common_test_utils/ov_test_utils.hpp" -#include "utils/print_model.hpp" +#include "transformations/utils/print_model.hpp" using namespace testing; using namespace ov; From a852c4af6da0c966879285a587d5b4208a7e92c5 Mon Sep 17 00:00:00 2001 From: Vishniakov Nikolai Date: Wed, 23 Oct 2024 09:57:26 +0200 Subject: [PATCH 112/112] [OV JS] Perform inference in electron app in nodejs e2e test (#27131) ### Details: - Extend e2e test of Node.js API by performing test inference ### Tickets: - 154192 --- .../node/tests/e2e/demo-electron-app/index.js | 40 ++++++++++++++++--- .../js/node/tests/e2e/electron-app.test.js | 29 +++++++------- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js index cfa5fd27b0fa4e..58cc6b3b3cf450 100644 --- a/src/bindings/js/node/tests/e2e/demo-electron-app/index.js +++ b/src/bindings/js/node/tests/e2e/demo-electron-app/index.js @@ -1,11 +1,39 @@ const { app } = require('electron'); const { addon: ov } = require('openvino-node'); -app.whenReady().then(() => { - console.log('Creating OpenVINO Runtime Core'); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const core = new ov.Core(); - console.log('Created OpenVINO Runtime Core'); +const epsilon = 0.5; // To avoid very small numbers +const pathToModel = '../tests/unit/test_models/test_model_fp32.xml'; + +main(); + +async function main() { + await app.whenReady(); + + try { + console.log('Creating OpenVINO Runtime Core'); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const core = new ov.Core(); + console.log('Created OpenVINO Runtime Core'); + + const model = await core.readModel(pathToModel); + console.log('Model read successfully:', model); + const compiledModel = await core.compileModel(model, 'CPU'); + const inferRequest = compiledModel.createInferRequest(); + console.log('Infer request created:', inferRequest); + + const tensorData = Float32Array.from( + { length: 3072 }, + () => Math.random() + epsilon, + ); + const tensor = new ov.Tensor(ov.element.f32, [1, 3, 32, 32], tensorData); + console.log('Tensor created:', tensor); + + const result = await inferRequest.inferAsync([tensor]); + console.log('Infer request result:', result); + } catch (error) { + console.error('Error:', error); + app.exit(1); + } app.exit(0); -}); +} diff --git a/src/bindings/js/node/tests/e2e/electron-app.test.js b/src/bindings/js/node/tests/e2e/electron-app.test.js index 01e84dea884502..98982a5f941263 100644 --- a/src/bindings/js/node/tests/e2e/electron-app.test.js +++ b/src/bindings/js/node/tests/e2e/electron-app.test.js @@ -1,24 +1,17 @@ /* global describe, it, before, after */ const fs = require('node:fs'); +const util = require('node:util'); const assert = require('node:assert'); const { exec } = require('child_process'); +const execPromise = util.promisify(exec); +const { testModels, downloadTestModel } = require('../unit/utils.js'); describe('E2E testing for OpenVINO as an Electron dependency.', function() { this.timeout(50000); - before((done) => { - exec( - 'cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project', - (error) => { - if (error) { - console.error(`exec error: ${error}`); - - return done(error); - } - - done(); - }, - ); + before(async () => { + await downloadTestModel(testModels.testModelFP32); + await execPromise('cp -r ./tests/e2e/demo-electron-app/ demo-electron-app-project'); }); it('should install dependencies', (done) => { @@ -37,7 +30,7 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { }); it('should run electron package and verify output', (done) => { - exec('cd demo-electron-app-project && npm start', (error, stdout) => { + exec(`cd demo-electron-app-project && npm start`, (error, stdout) => { if (error) { console.error(`exec error: ${error}`); @@ -48,6 +41,14 @@ describe('E2E testing for OpenVINO as an Electron dependency.', function() { stdout.includes('Created OpenVINO Runtime Core'), 'Check that openvino-node operates fine', ); + assert( + stdout.includes('Model read successfully: ModelWrap {}'), + 'Check that model is read successfully', + ); + assert( + stdout.includes('Infer request result: { fc_out: TensorWrap {} }'), + 'Check that infer request result is successful', + ); done(); }); });