Add optional out argument for transpose and copy in `StencilMat…

…rix` and `BlockLinearOperator` (#357) Implement the possibility to pass an `out` argument to the methods `copy` and `transpose` of classes `StencilMatrix` and `BlockLinearOperator`. This is specially useful to avoid creating temporaries when a linear operator is copied or transposed frequently, for example in the case of iterative algorithms which update a matrix at every step. Serial and parallel tests were added to check the implementation.
pyccel · Dec 4, 2023 · 8ac3eb5 · 8ac3eb5
1 parent 78567a7
commit 8ac3eb5
Show file tree

Hide file tree

Showing 4 changed files with 247 additions and 47 deletions.
diff --git a/psydac/linalg/block.py b/psydac/linalg/block.py
@@ -672,13 +672,35 @@ def _dot(blocks, v, out, n_rows, n_cols, inc):
                 out[i] += Lij.dot(v[j], out=inc[i])
 
     # ...
-    def transpose(self, conjugate=False):
-        blocks, blocks_T = self.compute_interface_matrices_transpose()
-        blocks = {(j, i): b.transpose(conjugate=conjugate) for (i, j), b in blocks.items()}
-        blocks.update(blocks_T)
-        mat = BlockLinearOperator(self.codomain, self.domain, blocks=blocks)
-        mat.set_backend(self._backend)
-        return mat
+    def transpose(self, conjugate=False, out=None):
+        """"
+        Return the transposed BlockLinearOperator, or the Hermitian Transpose if conjugate==True
+
+        Parameters
+        ----------
+        conjugate : Bool(optional)
+            True to get the Hermitian adjoint.
+
+        out : BlockLinearOperator(optional)
+            Optional out for the transpose to avoid temporaries
+        """
+        if out is not None:
+            assert isinstance(out, BlockLinearOperator)
+            assert out.codomain is self.domain
+            assert out.domain is self.codomain
+            for (i, j), Lij in self._blocks.items():
+                if out[j,i]==None:
+                    out[j, i] = Lij.transpose(conjugate=conjugate)
+                else:
+                    Lij.transpose(conjugate=conjugate, out=out[j,i])
+        else:
+            blocks, blocks_T = self.compute_interface_matrices_transpose()
+            blocks = {(j, i): b.transpose(conjugate=conjugate) for (i, j), b in blocks.items()}
+            blocks.update(blocks_T)
+            out = BlockLinearOperator(self.codomain, self.domain, blocks=blocks)
+
+        out.set_backend(self._backend)
+        return out
 
     #--------------------------------------
     # Overridden properties/methods
@@ -863,15 +885,30 @@ def backend(self):
         return self._backend
 
     # ...
-    def copy(self):
-        blocks = {ij: Bij.copy() for ij, Bij in self._blocks.items()}
-        mat = BlockLinearOperator(self.domain, self.codomain, blocks=blocks)
-        if self._backend is not None:
-            mat._func = self._func
-            mat._args = self._args
-            mat._blocks_as_args = [mat._blocks[key]._data for key in self._blocks]
-            mat._backend = self._backend
-        return mat
+    def copy(self, out=None):
+        """
+        Create a copy of self, that can potentially be stored in a given BlockLinearOperator.
+
+        Parameters
+        ----------
+        out : BlockLinearOperator(optional)
+            The existing BlockLinearOperator in which we want to copy self.
+        """
+        if out is not None:
+            assert isinstance(out, BlockLinearOperator)
+            assert out.domain is self.domain
+            assert out.codomain is self.codomain
+        else:
+            out = BlockLinearOperator(self.domain, self.codomain)
+
+        for (i, j), Lij in self._blocks.items():
+            if out[i,j]==None:
+                out[i, j] = Lij.copy()
+            else:
+                Lij.copy(out=out[i,j])
+
+        out.set_backend(self._backend)
+
 
     # ...
     def __imul__(self, a):
@@ -999,7 +1036,7 @@ def compute_interface_matrices_transpose(self):
                             data_exchanger.update_ghost_regions(array_minus=block_ij_k1k2._data)
 
                             if cart_i.is_comm_null:
-                                blocks_T[j,i][k2,k1] = block_ij_k1k2.transpose(Mt=block_ji_k2k1)
+                                blocks_T[j,i][k2,k1] = block_ij_k1k2.transpose(out=block_ji_k2k1)
                     else:
                         continue
 
@@ -1061,7 +1098,7 @@ def compute_interface_matrices_transpose(self):
                             data_exchanger.update_ghost_regions(array_plus=block_ji_k2k1._data)
 
                             if cart_j.is_comm_null:
-                                blocks_T[i,j][k1,k2] = block_ji_k2k1.transpose(Mt=block_ij_k1k2)
+                                blocks_T[i,j][k1,k2] = block_ji_k2k1.transpose(out=block_ij_k1k2)
 
                     else:
                         continue
@@ -1111,7 +1148,7 @@ def compute_interface_matrices_transpose(self):
                     data_exchanger.update_ghost_regions(array_minus=block_ij._data)
 
                     if cart_i.is_comm_null:
-                        blocks_T[j,i] = block_ij.transpose(Mt=block_ji)
+                        blocks_T[j,i] = block_ij.transpose(out=block_ji)
 
                 if not cart_j.is_comm_null:
                     if cart_ij.intercomm.rank == 0:
@@ -1138,7 +1175,7 @@ def compute_interface_matrices_transpose(self):
                     data_exchanger.update_ghost_regions(array_plus=block_ji._data)
 
                     if cart_j.is_comm_null:
-                        blocks_T[i,j] = block_ji.transpose(Mt=block_ij)
+                        blocks_T[i,j] = block_ji.transpose(out=block_ij)
 
         return blocks, blocks_T
 

diff --git a/psydac/linalg/stencil.py b/psydac/linalg/stencil.py
@@ -1037,9 +1037,17 @@ def vdot( self, v, out=None):
         return out
 
     # ...
-    def transpose(self, conjugate=False):
-        """ Create new StencilMatrix Mt, where domain and codomain are swapped
-            with respect to original matrix M, and Mt_{ij} = M_{ji}.
+    def transpose(self, conjugate=False, out=None):
+        """"
+        Return the transposed StencilMatrix, or the Hermitian Transpose if conjugate==True
+
+        Parameters
+        ----------
+        conjugate : Bool(optional)
+            True to get the Hermitian adjoint.
+
+        out : StencilMatrix(optional)
+            Optional out for the transpose to avoid temporaries
         """
         # For clarity rename self
         M = self
@@ -1049,14 +1057,20 @@ def transpose(self, conjugate=False):
             M.update_ghost_regions()
 
         # Create new matrix where domain and codomain are swapped
-        Mt = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend)
+        if out is not None :
+            assert isinstance(out, StencilMatrix)
+            assert out.codomain == M.domain
+            assert out.domain == M.codomain
+
+        else :
+            out = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend)
 
         # Call low-level '_transpose' function (works on Numpy arrays directly)
         if conjugate:
-            self._transpose_func(np.conjugate(M._data), Mt._data, **self._transpose_args)
+            self._transpose_func(np.conjugate(M._data), out._data, **self._transpose_args)
         else:
-            self._transpose_func(M._data, Mt._data, **self._transpose_args)
-        return Mt
+            self._transpose_func(M._data, out._data, **self._transpose_args)
+        return out
 
     # ...
     def toarray(self, **kwargs):
@@ -1190,12 +1204,25 @@ def max(self):
         return self._data.max()
 
     #...
-    def copy(self):
-        M = StencilMatrix( self.domain, self.codomain, self._pads, self._backend )
-        M._data[:] = self._data[:]
-        M._func    = self._func
-        M._args    = self._args
-        return M
+    def copy(self, out = None):
+        """
+        Create a copy of self, that can potentially be stored in a given StencilMatrix.
+
+        Parameters
+        ----------
+        out : StencilMatrix(optional)
+            The existing StencilMatrix in which we want to copy self.
+        """
+        if out is not None :
+            assert isinstance(out, StencilMatrix)
+            assert out.domain == self.domain
+            assert out.codomain == self.codomain
+        else :
+            out = StencilMatrix( self.domain, self.codomain, self._pads, self._backend )
+        out._data[:] = self._data[:]
+        out._func    = self._func
+        out._args    = self._args
+        return out
 
     #...
     def __imul__(self, a):
@@ -2020,26 +2047,26 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
             new_nrows[d] += er
 
     # ...
-    def transpose( self, conjugate=False, Mt=None):
+    def transpose( self, conjugate=False, out=None):
         """ Create new StencilInterfaceMatrix Mt, where domain and codomain are swapped
             with respect to original matrix M, and Mt_{ij} = M_{ji}.
         """
 
         # For clarity rename self
         M = self
 
-        if Mt is None:
+        if out is None:
             # Create new matrix where domain and codomain are swapped
 
-            Mt = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext,
+            out = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext,
                                         flip=M.flip, pads=M.pads, backend=M.backend)
 
         # Call low-level '_transpose' function (works on Numpy arrays directly)
         if conjugate:
-            M._transpose_func(np.conjugate(M._data), Mt._data, **M._transpose_args)
+            M._transpose_func(np.conjugate(M._data), out._data, **M._transpose_args)
         else:
-            M._transpose_func(M._data, Mt._data, **M._transpose_args)
-        return Mt
+            M._transpose_func(M._data, out._data, **M._transpose_args)
+        return out
 
     def _prepare_transpose_args(self):
 

diff --git a/psydac/linalg/tests/test_block.py b/psydac/linalg/tests/test_block.py
@@ -170,17 +170,29 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2  ):
     assert L1.nonzero_block_indices == ((0,0),(0,1),(1,0))
     assert L1.backend()==None
 
+    # Test copy method with an out
+    L4 = BlockLinearOperator( W, W )
+    L1.copy(out=L4)
+    assert L1.domain == W
+    assert L1.codomain == W
+    assert L1.dtype == dtype
+    assert L1.n_block_rows == 2
+    assert L1.n_block_cols == 2
+    assert L1.nonzero_block_indices == ((0,0),(0,1),(1,0))
+    assert L1.backend()==None
+
     L2 = BlockLinearOperator( W, W, blocks=list_blocks )
     L3 = BlockLinearOperator( W, W )
 
     L3[0,0] = M1
     L3[0,1] = M2
     L3[1,0] = M3
 
-    # Convert L1, L2 and L3 to COO form
+    # Convert L1, L2, L3 and L4 to COO form
     coo1 = L1.tosparse().tocoo()
     coo2 = L2.tosparse().tocoo()
     coo3 = L3.tosparse().tocoo()
+    coo4 = L4.tosparse().tocoo()
 
     # Check if the data are in the same place
     assert np.array_equal( coo1.col , coo2.col  )
@@ -190,6 +202,46 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2  ):
     assert np.array_equal( coo1.col , coo3.col  )
     assert np.array_equal( coo1.row , coo3.row  )
     assert np.array_equal( coo1.data, coo3.data )
+
+    assert np.array_equal( coo1.col , coo4.col  )
+    assert np.array_equal( coo1.row , coo4.row  )
+    assert np.array_equal( coo1.data, coo4.data )
+
+    dict_blocks = {(0,0):M1, (0,1):M2}
+
+    L1 = BlockLinearOperator( W, W, blocks=dict_blocks )
+
+    # Test transpose
+    LT1 = L1.transpose()
+    LT2 = BlockLinearOperator( W, W )
+    L1.transpose(out=LT2)
+
+    assert LT1.domain == W
+    assert LT1.codomain == W
+    assert LT1.dtype == dtype
+    assert LT1.n_block_rows == 2
+    assert LT1.n_block_cols == 2
+    assert LT1.nonzero_block_indices == ((0,0),(1,0))
+    assert LT1.backend()==None
+
+    assert LT2.domain == W
+    assert LT2.codomain == W
+    assert LT2.dtype == dtype
+    assert LT2.n_block_rows == 2
+    assert LT2.n_block_cols == 2
+    assert LT2.nonzero_block_indices == ((0,0),(1,0))
+    assert LT2.backend()==None
+
+    #convert to scipy for tests
+    LT1_sp = LT1.tosparse()
+    LT2_sp = LT2.tosparse()
+    L1_spT  = L1.tosparse().T
+
+    # Check if the data are in the same place
+    assert abs(LT1_sp - L1_spT).max()< 1e-14
+    assert abs(LT2_sp - L1_spT).max()< 1e-14
+
+
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float, complex] )
 @pytest.mark.parametrize( 'ndim', [1, 2, 3] )
@@ -937,6 +989,56 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ):
     # Check data in 1D array
     assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
     assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+
+    # Test copy with an out 
+    # Create random matrix 
+    N1 = StencilMatrix( V, V )
+    N2 = StencilMatrix( V, V )
+    N3 = StencilMatrix( V, V )
+    N4 = StencilMatrix( V, V )
+
+    for k1 in range(-p1,p1+1):
+        for k2 in range(-p2,p2+1):
+            N1[:,:,k1,k2] = factor*random()
+            N2[:,:,k1,k2] = factor*random()
+            N3[:,:,k1,k2] = factor*random()
+            N4[:,:,k1,k2] = factor*random()
+
+    K = BlockLinearOperator( W, W )
+    N = BlockLinearOperator( W, W )
+
+
+    K[0,0] = N1
+    K[0,1] = N2
+    K[1,0] = N3
+    K[1,1] = N4
+
+    #replace the random entries to check they are really overwritten
+    K.copy(out=N)
+    L.copy(out=K)
+
+    # Compute Block-vector product
+    K.dot(X, out= Y)
+
+    # Check data in 1D array
+    assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+
+    # Test transpose with an out, check that we overwrite the random entries
+    L.transpose(out = N)
+
+    # Compute Block-vector product
+    Z = N.dot(X)
+
+    # Compute matrix-vector products for each block
+    y1 = M1.T.dot(x1) + M3.T.dot(x2)
+    y2 = M2.T.dot(x1) + M4.T.dot(x2)
+
+    # Check data in 1D array
+    assert np.allclose( Z.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert np.allclose( Z.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+
+
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float, complex] )
 @pytest.mark.parametrize( 'n1', [8, 16] )