Skip to content

Commit

Permalink
Add optional out argument for transpose and copy in `StencilMat…
Browse files Browse the repository at this point in the history
…rix` and `BlockLinearOperator` (#357)

Implement the possibility to pass an `out` argument to the methods
`copy` and `transpose` of classes `StencilMatrix` and `BlockLinearOperator`.

This is specially useful to avoid creating temporaries when a linear operator
is copied or transposed frequently, for example in the case of iterative
algorithms which update a matrix at every step.

Serial and parallel tests were added to check the implementation.
  • Loading branch information
vcarlier authored Dec 4, 2023
1 parent 78567a7 commit 8ac3eb5
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 47 deletions.
77 changes: 57 additions & 20 deletions psydac/linalg/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,13 +672,35 @@ def _dot(blocks, v, out, n_rows, n_cols, inc):
out[i] += Lij.dot(v[j], out=inc[i])

# ...
def transpose(self, conjugate=False):
blocks, blocks_T = self.compute_interface_matrices_transpose()
blocks = {(j, i): b.transpose(conjugate=conjugate) for (i, j), b in blocks.items()}
blocks.update(blocks_T)
mat = BlockLinearOperator(self.codomain, self.domain, blocks=blocks)
mat.set_backend(self._backend)
return mat
def transpose(self, conjugate=False, out=None):
""""
Return the transposed BlockLinearOperator, or the Hermitian Transpose if conjugate==True
Parameters
----------
conjugate : Bool(optional)
True to get the Hermitian adjoint.
out : BlockLinearOperator(optional)
Optional out for the transpose to avoid temporaries
"""
if out is not None:
assert isinstance(out, BlockLinearOperator)
assert out.codomain is self.domain
assert out.domain is self.codomain
for (i, j), Lij in self._blocks.items():
if out[j,i]==None:
out[j, i] = Lij.transpose(conjugate=conjugate)
else:
Lij.transpose(conjugate=conjugate, out=out[j,i])
else:
blocks, blocks_T = self.compute_interface_matrices_transpose()
blocks = {(j, i): b.transpose(conjugate=conjugate) for (i, j), b in blocks.items()}
blocks.update(blocks_T)
out = BlockLinearOperator(self.codomain, self.domain, blocks=blocks)

out.set_backend(self._backend)
return out

#--------------------------------------
# Overridden properties/methods
Expand Down Expand Up @@ -863,15 +885,30 @@ def backend(self):
return self._backend

# ...
def copy(self):
blocks = {ij: Bij.copy() for ij, Bij in self._blocks.items()}
mat = BlockLinearOperator(self.domain, self.codomain, blocks=blocks)
if self._backend is not None:
mat._func = self._func
mat._args = self._args
mat._blocks_as_args = [mat._blocks[key]._data for key in self._blocks]
mat._backend = self._backend
return mat
def copy(self, out=None):
"""
Create a copy of self, that can potentially be stored in a given BlockLinearOperator.
Parameters
----------
out : BlockLinearOperator(optional)
The existing BlockLinearOperator in which we want to copy self.
"""
if out is not None:
assert isinstance(out, BlockLinearOperator)
assert out.domain is self.domain
assert out.codomain is self.codomain
else:
out = BlockLinearOperator(self.domain, self.codomain)

for (i, j), Lij in self._blocks.items():
if out[i,j]==None:
out[i, j] = Lij.copy()
else:
Lij.copy(out=out[i,j])

out.set_backend(self._backend)


# ...
def __imul__(self, a):
Expand Down Expand Up @@ -999,7 +1036,7 @@ def compute_interface_matrices_transpose(self):
data_exchanger.update_ghost_regions(array_minus=block_ij_k1k2._data)

if cart_i.is_comm_null:
blocks_T[j,i][k2,k1] = block_ij_k1k2.transpose(Mt=block_ji_k2k1)
blocks_T[j,i][k2,k1] = block_ij_k1k2.transpose(out=block_ji_k2k1)
else:
continue

Expand Down Expand Up @@ -1061,7 +1098,7 @@ def compute_interface_matrices_transpose(self):
data_exchanger.update_ghost_regions(array_plus=block_ji_k2k1._data)

if cart_j.is_comm_null:
blocks_T[i,j][k1,k2] = block_ji_k2k1.transpose(Mt=block_ij_k1k2)
blocks_T[i,j][k1,k2] = block_ji_k2k1.transpose(out=block_ij_k1k2)

else:
continue
Expand Down Expand Up @@ -1111,7 +1148,7 @@ def compute_interface_matrices_transpose(self):
data_exchanger.update_ghost_regions(array_minus=block_ij._data)

if cart_i.is_comm_null:
blocks_T[j,i] = block_ij.transpose(Mt=block_ji)
blocks_T[j,i] = block_ij.transpose(out=block_ji)

if not cart_j.is_comm_null:
if cart_ij.intercomm.rank == 0:
Expand All @@ -1138,7 +1175,7 @@ def compute_interface_matrices_transpose(self):
data_exchanger.update_ghost_regions(array_plus=block_ji._data)

if cart_j.is_comm_null:
blocks_T[i,j] = block_ji.transpose(Mt=block_ij)
blocks_T[i,j] = block_ji.transpose(out=block_ij)

return blocks, blocks_T

Expand Down
65 changes: 46 additions & 19 deletions psydac/linalg/stencil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,9 +1037,17 @@ def vdot( self, v, out=None):
return out

# ...
def transpose(self, conjugate=False):
""" Create new StencilMatrix Mt, where domain and codomain are swapped
with respect to original matrix M, and Mt_{ij} = M_{ji}.
def transpose(self, conjugate=False, out=None):
""""
Return the transposed StencilMatrix, or the Hermitian Transpose if conjugate==True
Parameters
----------
conjugate : Bool(optional)
True to get the Hermitian adjoint.
out : StencilMatrix(optional)
Optional out for the transpose to avoid temporaries
"""
# For clarity rename self
M = self
Expand All @@ -1049,14 +1057,20 @@ def transpose(self, conjugate=False):
M.update_ghost_regions()

# Create new matrix where domain and codomain are swapped
Mt = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend)
if out is not None :
assert isinstance(out, StencilMatrix)
assert out.codomain == M.domain
assert out.domain == M.codomain

else :
out = StencilMatrix(M.codomain, M.domain, pads=self._pads, backend=self._backend)

# Call low-level '_transpose' function (works on Numpy arrays directly)
if conjugate:
self._transpose_func(np.conjugate(M._data), Mt._data, **self._transpose_args)
self._transpose_func(np.conjugate(M._data), out._data, **self._transpose_args)
else:
self._transpose_func(M._data, Mt._data, **self._transpose_args)
return Mt
self._transpose_func(M._data, out._data, **self._transpose_args)
return out

# ...
def toarray(self, **kwargs):
Expand Down Expand Up @@ -1190,12 +1204,25 @@ def max(self):
return self._data.max()

#...
def copy(self):
M = StencilMatrix( self.domain, self.codomain, self._pads, self._backend )
M._data[:] = self._data[:]
M._func = self._func
M._args = self._args
return M
def copy(self, out = None):
"""
Create a copy of self, that can potentially be stored in a given StencilMatrix.
Parameters
----------
out : StencilMatrix(optional)
The existing StencilMatrix in which we want to copy self.
"""
if out is not None :
assert isinstance(out, StencilMatrix)
assert out.domain == self.domain
assert out.codomain == self.codomain
else :
out = StencilMatrix( self.domain, self.codomain, self._pads, self._backend )
out._data[:] = self._data[:]
out._func = self._func
out._args = self._args
return out

#...
def __imul__(self, a):
Expand Down Expand Up @@ -2020,26 +2047,26 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
new_nrows[d] += er

# ...
def transpose( self, conjugate=False, Mt=None):
def transpose( self, conjugate=False, out=None):
""" Create new StencilInterfaceMatrix Mt, where domain and codomain are swapped
with respect to original matrix M, and Mt_{ij} = M_{ji}.
"""

# For clarity rename self
M = self

if Mt is None:
if out is None:
# Create new matrix where domain and codomain are swapped

Mt = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext,
out = StencilInterfaceMatrix(M.codomain, M.domain, M.codomain_start, M.domain_start, M.codomain_axis, M.domain_axis, M.codomain_ext, M.domain_ext,
flip=M.flip, pads=M.pads, backend=M.backend)

# Call low-level '_transpose' function (works on Numpy arrays directly)
if conjugate:
M._transpose_func(np.conjugate(M._data), Mt._data, **M._transpose_args)
M._transpose_func(np.conjugate(M._data), out._data, **M._transpose_args)
else:
M._transpose_func(M._data, Mt._data, **M._transpose_args)
return Mt
M._transpose_func(M._data, out._data, **M._transpose_args)
return out

def _prepare_transpose_args(self):

Expand Down
104 changes: 103 additions & 1 deletion psydac/linalg/tests/test_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,17 +170,29 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2 ):
assert L1.nonzero_block_indices == ((0,0),(0,1),(1,0))
assert L1.backend()==None

# Test copy method with an out
L4 = BlockLinearOperator( W, W )
L1.copy(out=L4)
assert L1.domain == W
assert L1.codomain == W
assert L1.dtype == dtype
assert L1.n_block_rows == 2
assert L1.n_block_cols == 2
assert L1.nonzero_block_indices == ((0,0),(0,1),(1,0))
assert L1.backend()==None

L2 = BlockLinearOperator( W, W, blocks=list_blocks )
L3 = BlockLinearOperator( W, W )

L3[0,0] = M1
L3[0,1] = M2
L3[1,0] = M3

# Convert L1, L2 and L3 to COO form
# Convert L1, L2, L3 and L4 to COO form
coo1 = L1.tosparse().tocoo()
coo2 = L2.tosparse().tocoo()
coo3 = L3.tosparse().tocoo()
coo4 = L4.tosparse().tocoo()

# Check if the data are in the same place
assert np.array_equal( coo1.col , coo2.col )
Expand All @@ -190,6 +202,46 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2 ):
assert np.array_equal( coo1.col , coo3.col )
assert np.array_equal( coo1.row , coo3.row )
assert np.array_equal( coo1.data, coo3.data )

assert np.array_equal( coo1.col , coo4.col )
assert np.array_equal( coo1.row , coo4.row )
assert np.array_equal( coo1.data, coo4.data )

dict_blocks = {(0,0):M1, (0,1):M2}

L1 = BlockLinearOperator( W, W, blocks=dict_blocks )

# Test transpose
LT1 = L1.transpose()
LT2 = BlockLinearOperator( W, W )
L1.transpose(out=LT2)

assert LT1.domain == W
assert LT1.codomain == W
assert LT1.dtype == dtype
assert LT1.n_block_rows == 2
assert LT1.n_block_cols == 2
assert LT1.nonzero_block_indices == ((0,0),(1,0))
assert LT1.backend()==None

assert LT2.domain == W
assert LT2.codomain == W
assert LT2.dtype == dtype
assert LT2.n_block_rows == 2
assert LT2.n_block_cols == 2
assert LT2.nonzero_block_indices == ((0,0),(1,0))
assert LT2.backend()==None

#convert to scipy for tests
LT1_sp = LT1.tosparse()
LT2_sp = LT2.tosparse()
L1_spT = L1.tosparse().T

# Check if the data are in the same place
assert abs(LT1_sp - L1_spT).max()< 1e-14
assert abs(LT2_sp - L1_spT).max()< 1e-14


#===============================================================================
@pytest.mark.parametrize( 'dtype', [float, complex] )
@pytest.mark.parametrize( 'ndim', [1, 2, 3] )
Expand Down Expand Up @@ -937,6 +989,56 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ):
# Check data in 1D array
assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )

# Test copy with an out
# Create random matrix
N1 = StencilMatrix( V, V )
N2 = StencilMatrix( V, V )
N3 = StencilMatrix( V, V )
N4 = StencilMatrix( V, V )

for k1 in range(-p1,p1+1):
for k2 in range(-p2,p2+1):
N1[:,:,k1,k2] = factor*random()
N2[:,:,k1,k2] = factor*random()
N3[:,:,k1,k2] = factor*random()
N4[:,:,k1,k2] = factor*random()

K = BlockLinearOperator( W, W )
N = BlockLinearOperator( W, W )


K[0,0] = N1
K[0,1] = N2
K[1,0] = N3
K[1,1] = N4

#replace the random entries to check they are really overwritten
K.copy(out=N)
L.copy(out=K)

# Compute Block-vector product
K.dot(X, out= Y)

# Check data in 1D array
assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )

# Test transpose with an out, check that we overwrite the random entries
L.transpose(out = N)

# Compute Block-vector product
Z = N.dot(X)

# Compute matrix-vector products for each block
y1 = M1.T.dot(x1) + M3.T.dot(x2)
y2 = M2.T.dot(x1) + M4.T.dot(x2)

# Check data in 1D array
assert np.allclose( Z.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
assert np.allclose( Z.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )


#===============================================================================
@pytest.mark.parametrize( 'dtype', [float, complex] )
@pytest.mark.parametrize( 'n1', [8, 16] )
Expand Down
Loading

0 comments on commit 8ac3eb5

Please sign in to comment.