Skip to content

Commit cc58db0

Browse files
authored
Backport gh-2101 (#2108)
* Revert gh-2082 with w/a for gemm issue in OneMKL (#2101) * Revert "Implement a workaround to gemm issue in OneMKL (#2082)" This reverts commit 178342c. * Add test to explicitly cover the w/a for gemm and gemm_batch * Update test to reproduce the exact issue * Set release date
1 parent e4883a2 commit cc58db0

File tree

6 files changed

+19
-55
lines changed

6 files changed

+19
-55
lines changed

CHANGELOG.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7-
## [0.16.0] - 09/DD/2024
7+
## [0.16.0] - 10/14/2024
88

99
This release reaches an important milestone by making offloading fully asynchronous. Calls to `dpnp` submit tasks for execution to DPC++ runtime and return without waiting for execution of these tasks to finish. The sequential semantics a user comes to expect from execution of Python script is preserved though.
1010
In addition, this release completes implementation of `dpnp.fft` module and adds several new array manipulation, indexing and elementwise routines. Moreover, it adds support to build `dpnp` for Nvidia GPUs.
@@ -120,7 +120,6 @@ In addition, this release completes implementation of `dpnp.fft` module and adds
120120
* Fixed a crash in `dpnp.choose` caused by missing control of releasing temporary allocated device memory [#2063](https://github.com/IntelPython/dpnp/pull/2063)
121121
* Resolved compilation warning and error while building in debug mode [#2066](https://github.com/IntelPython/dpnp/pull/2066)
122122
* Fixed an issue with asynchronous execution in `dpnp.fft` module [#2067](https://github.com/IntelPython/dpnp/pull/2067)
123-
* Added a workaround to fix the incorrect result from `dpnp.matmul` computing on Lunar Lake or Arrow Lake Battlemage graphics [#2082](https://github.com/IntelPython/dpnp/pull/2082)
124123

125124
## [0.15.0] - 05/25/2024
126125

dpnp/backend/extensions/blas/blas_py.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,6 @@ PYBIND11_MODULE(_blas_impl, m)
127127
py::arg("resultC"), py::arg("depends") = py::list());
128128
}
129129

130-
{
131-
m.def("_is_lnl_bm_architecture", &blas_ns::_is_lnl_bm_architecture,
132-
"Return ``True`` if SYCL device belongs to either Lunar Lake or "
133-
"Battlemage G21 Intel GPU architecture",
134-
py::arg("device"));
135-
}
136-
137130
{
138131
m.def("_gemm_batch", &blas_ns::gemm_batch,
139132
"Call `gemm_batch` from OneMKL BLAS library to compute "

dpnp/backend/extensions/blas/gemm.cpp

-16
Original file line numberDiff line numberDiff line change
@@ -323,22 +323,6 @@ std::tuple<sycl::event, sycl::event, bool>
323323
return std::make_tuple(args_ev, gemm_ev, is_row_major);
324324
}
325325

326-
bool _is_lnl_bm_architecture(const sycl::device &dev)
327-
{
328-
#if !defined(USE_ONEMKL_CUBLAS)
329-
namespace syclex = sycl::ext::oneapi::experimental;
330-
const auto arch = dev.get_info<syclex::info::device::architecture>();
331-
switch (arch) {
332-
case syclex::architecture::intel_gpu_lnl_m: /* Lunar Lake */
333-
case syclex::architecture::intel_gpu_bmg_g21: /* Battlemage G21 */
334-
return true;
335-
default:
336-
return false;
337-
}
338-
#endif // !defined(USE_ONEMKL_CUBLAS)
339-
return false;
340-
}
341-
342326
template <typename fnT, typename Tab, typename Tc>
343327
struct GemmContigFactory
344328
{

dpnp/backend/extensions/blas/gemm.hpp

-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ extern std::tuple<sycl::event, sycl::event, bool>
3939
const dpctl::tensor::usm_ndarray &resultC,
4040
const std::vector<sycl::event> &depends);
4141

42-
extern bool _is_lnl_bm_architecture(const sycl::device &dev);
43-
4442
extern std::tuple<sycl::event, sycl::event, bool>
4543
gemm_batch(sycl::queue &exec_q,
4644
const dpctl::tensor::usm_ndarray &matrixA,

dpnp/dpnp_utils/dpnp_utils_linearalgebra.py

-28
Original file line numberDiff line numberDiff line change
@@ -894,34 +894,6 @@ def dpnp_matmul(
894894
)
895895
_manager.add_event_pair(ht_ev, gemv_ev)
896896
elif call_flag == "gemm":
897-
# MKLD-17976: due to known issue in OneMKL on Lunar Lake and
898-
# Battlemage G21 Intel GPU architectures, it forces
899-
# to implement a temporary workaround with extra copying of
900-
# an input array in case when it has a small size and
901-
# non-zero offset
902-
# The issue was detected by failing tests for eig/eigh
903-
# TODO: remove the workaround once OneMKL issue is resolved
904-
if bi._is_lnl_bm_architecture(exec_q.get_sycl_device()):
905-
906-
def _need_to_copy(a):
907-
a_usm = dpnp.get_usm_ndarray(a)
908-
if a_usm._element_offset > 0 and a_usm.size < 16:
909-
return True
910-
return False
911-
912-
x1 = _copy_array(
913-
x1,
914-
copy_flag=_need_to_copy(x1),
915-
dtype=compute_dtype,
916-
order=res_order,
917-
)
918-
x2 = _copy_array(
919-
x2,
920-
copy_flag=_need_to_copy(x2),
921-
dtype=compute_dtype,
922-
order=res_order,
923-
)
924-
925897
result = _gemm_matmul(
926898
exec_q,
927899
x1,

tests/test_mathematical.py

+18
Original file line numberDiff line numberDiff line change
@@ -3824,6 +3824,24 @@ def test_matmul_alias(self):
38243824
result2 = dpnp.linalg.matmul(a, b)
38253825
assert_array_equal(result1, result2)
38263826

3827+
@pytest.mark.parametrize(
3828+
"sh1, sh2",
3829+
[
3830+
((2, 3, 3), (2, 3, 3)),
3831+
((3, 3, 3, 3), (3, 3, 3, 3)),
3832+
],
3833+
ids=["gemm", "gemm_batch"],
3834+
)
3835+
def test_matmul_with_offsets(self, sh1, sh2):
3836+
size1, size2 = numpy.prod(sh1, dtype=int), numpy.prod(sh2, dtype=int)
3837+
a = numpy.random.randint(-5, 5, size1).reshape(sh1).astype("f8")
3838+
b = numpy.random.randint(-5, 5, size2).reshape(sh2).astype("f8")
3839+
ia, ib = dpnp.array(a), dpnp.array(b)
3840+
3841+
result = ia[1] @ ib[1]
3842+
expected = a[1] @ b[1]
3843+
assert_array_equal(result, expected)
3844+
38273845

38283846
class TestMatmulInvalidCases:
38293847
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)