diff --git a/docs/source/modules/linalg/block.rst b/docs/source/modules/linalg/block.rst index 2874201c5..b6feaefa2 100644 --- a/docs/source/modules/linalg/block.rst +++ b/docs/source/modules/linalg/block.rst @@ -4,7 +4,6 @@ linalg.block * :ref:`BlockVectorSpace ` * :ref:`BlockVector ` * :ref:`BlockLinearOperator ` - * :ref:`BlockDiagonalSolver ` .. inheritance-diagram:: psydac.linalg.block @@ -31,11 +30,3 @@ BlockLinearOperator .. autoclass:: psydac.linalg.block.BlockLinearOperator :members: - -.. _blockdiagonalsolver: - -BlockDiagonalSolver -------------------- - -.. autoclass:: psydac.linalg.block.BlockDiagonalSolver - :members: diff --git a/docs/source/modules/linalg/direct_solvers.rst b/docs/source/modules/linalg/direct_solvers.rst index 52451d3e2..452362851 100644 --- a/docs/source/modules/linalg/direct_solvers.rst +++ b/docs/source/modules/linalg/direct_solvers.rst @@ -1,20 +1,11 @@ linalg.direct_solvers ===================== - * :ref:`DirectSolver ` * :ref:`BandedSolver ` * :ref:`SparseSolver ` .. inheritance-diagram:: psydac.linalg.direct_solvers -.. _directsolver: - -DirectSolver ------------- - -.. autoclass:: psydac.linalg.direct_solvers.DirectSolver - :members: - .. _bandedsolver: BandedSolver diff --git a/psydac/feec/global_projectors.py b/psydac/feec/global_projectors.py index 7ecb67a7d..9106a9cbb 100644 --- a/psydac/feec/global_projectors.py +++ b/psydac/feec/global_projectors.py @@ -4,7 +4,7 @@ from psydac.linalg.kron import KroneckerLinearSolver from psydac.linalg.stencil import StencilVector -from psydac.linalg.block import BlockDiagonalSolver, BlockVector +from psydac.linalg.block import BlockLinearOperator, BlockVector from psydac.core.bsplines import quadrature_grid from psydac.utilities.quadratures import gauss_legendre from psydac.fem.basic import FemField @@ -168,7 +168,7 @@ def __init__(self, space, nquads = None): self._grid_x += [block_x] self._grid_w += [block_w] - solverblocks += [KroneckerLinearSolver(tensorspaces[i].vector_space, solvercells)] + solverblocks += [KroneckerLinearSolver(tensorspaces[i].vector_space, tensorspaces[i].vector_space, solvercells)] dataslice = tuple(slice(p, -p) for p in tensorspaces[i].vector_space.pads) dofs[i] = rhsblocks[i]._data[dataslice] @@ -177,11 +177,13 @@ def __init__(self, space, nquads = None): args = (*intp_x, *quad_x, *quad_w, *dofs) self._func = lambda *fun: func(*args, *fun) - # build a BlockDiagonalSolver, if necessary + # build a BlockLinearOperator, if necessary if len(solverblocks) == 1: self._solver = solverblocks[0] else: - self._solver = BlockDiagonalSolver(self._space.vector_space, blocks=solverblocks) + domain = codomain = self._space.vector_space + blocks = {(i, i): B_i for i, B_i in enumerate(solverblocks)} + self._solver = BlockLinearOperator(domain, codomain, blocks) @property def space(self): @@ -307,7 +309,7 @@ def __call__(self, fun): else: self._func(fun) - coeffs = self._solver.solve(self._rhs) + coeffs = self._solver.dot(self._rhs) return FemField(self._space, coeffs=coeffs) diff --git a/psydac/linalg/basic.py b/psydac/linalg/basic.py index 6a545c601..dc86957b4 100644 --- a/psydac/linalg/basic.py +++ b/psydac/linalg/basic.py @@ -1005,5 +1005,14 @@ def space(self): pass @abstractmethod - def solve(self, rhs, out=None, transposed=False): + def transpose(self): + """Return the transpose of the LinearSolver.""" pass + + @abstractmethod + def solve(self, rhs, out=None): + pass + + @property + def T(self): + return self.transpose() diff --git a/psydac/linalg/block.py b/psydac/linalg/block.py index fb2737d56..8075c07eb 100644 --- a/psydac/linalg/block.py +++ b/psydac/linalg/block.py @@ -7,12 +7,12 @@ from types import MappingProxyType from scipy.sparse import bmat, lil_matrix -from psydac.linalg.basic import VectorSpace, Vector, LinearOperator, LinearSolver, ZeroOperator -from psydac.ddm.cart import InterfaceCartDecomposition -from psydac.ddm.utilities import get_data_exchanger -from psydac.linalg.stencil import StencilVector, StencilMatrix +from psydac.linalg.basic import VectorSpace, Vector, LinearOperator +from psydac.linalg.stencil import StencilMatrix +from psydac.ddm.cart import InterfaceCartDecomposition +from psydac.ddm.utilities import get_data_exchanger -__all__ = ('BlockVectorSpace', 'BlockVector', 'BlockLinearOperator', 'BlockDiagonalSolver') +__all__ = ('BlockVectorSpace', 'BlockVector', 'BlockLinearOperator') #=============================================================================== class BlockVectorSpace(VectorSpace): @@ -1321,138 +1321,3 @@ def func(blocks, v, out, **args): self._func = func self._backend = backend - -#=============================================================================== -class BlockDiagonalSolver( LinearSolver ): - """ - A LinearSolver that can be written as blocks of other LinearSolvers, - i.e. it can be seen as a solver for linear equations with block-diagonal matrices. - - The space of this solver has to be of the type BlockVectorSpace. - - Parameters - ---------- - V : psydac.linalg.block.BlockVectorSpace - Space of the new blocked linear solver. - - blocks : dict | list | tuple - LinearSolver objects (optional). - - a) 'blocks' can be dictionary with - . key = integer i >= 0 - . value = corresponding LinearSolver Lii - - b) 'blocks' can be list of LinearSolvers (or tuple of these) where blocks[i] - is the LinearSolver Lii (if None, we assume null operator) - - """ - def __init__( self, V, blocks=None ): - - assert isinstance( V, BlockVectorSpace ) - - self._space = V - self._nblocks = V.n_blocks - - # Store blocks in list (hence, they can be manually changed later) - self._blocks = [None] * self._nblocks - - if blocks: - - if isinstance( blocks, dict ): - for i, L in blocks.items(): - self[i] = L - - elif isinstance( blocks, (list, tuple) ): - for i, L in enumerate( blocks ): - self[i] = L - - else: - raise ValueError( "Blocks can only be given as dict or 1D list/tuple." ) - - #-------------------------------------- - # Abstract interface - #-------------------------------------- - @property - def space( self ): - """ - The space this BlockDiagonalSolver works on. - """ - return self._space - - # ... - def solve( self, rhs, out=None, transposed=False ): - """ - Solves the linear system for the given right-hand side rhs. - An out vector can be supplied, otherwise a new vector will be allocated. - - This operation supports in-place operations, given that the underlying solvers - do as well. - - Parameters - ---------- - rhs : BlockVector - The input right-hand side. - - out : BlockVector | NoneType - The output vector, or None. - - transposed : Bool - If true, and supported by the underlying solvers, - rhs is solved against the transposed right-hand sides. - - Returns - ------- - out : BlockVector - Either `out`, if given as input parameter, or a newly-allocated vector. - In all cases, it holds the result of the computation. - """ - assert isinstance(rhs, BlockVector) - assert rhs.space is self.space - if out is None: - out = self.space.zeros() - - assert isinstance(out, BlockVector) - assert out.space is self.space - - rhs.update_ghost_regions() - - for i, L in enumerate(self._blocks): - if L is None: - raise NotImplementedError('All solvers have to be defined.') - L.solve(rhs[i], out=out[i], transposed=transposed) - - return out - - #-------------------------------------- - # Other properties/methods - #-------------------------------------- - @property - def blocks( self ): - """ Immutable 1D view (tuple) of the linear solvers, - including the empty blocks as 'None' objects. - """ - return tuple(self._blocks) - # ... - @property - def n_blocks( self ): - """ - The number of blocks in the matrix. - """ - return self._nblocks - - # ... - def __getitem__( self, key ): - assert 0 <= key < self._nblocks - - return self._blocks.get( key, None ) - - # ... - def __setitem__( self, key, value ): - assert 0 <= key < self._nblocks - - assert isinstance( value, LinearSolver ) - - # Check domain of rhs - assert value.space is self.space[key] - - self._blocks[key] = value diff --git a/psydac/linalg/direct_solvers.py b/psydac/linalg/direct_solvers.py index db6a49c2a..2d57c0f06 100644 --- a/psydac/linalg/direct_solvers.py +++ b/psydac/linalg/direct_solvers.py @@ -9,29 +9,10 @@ from psydac.linalg.basic import LinearSolver -__all__ = ('DirectSolver', 'BandedSolver', 'SparseSolver') +__all__ = ('BandedSolver', 'SparseSolver') #=============================================================================== -class DirectSolver( LinearSolver ): - """ - Abstract class for direct linear solvers. - - """ - - #------------------------------------- - # Deferred methods - #------------------------------------- - @property - @abstractmethod - def space( self ): - pass - - @abstractmethod - def solve( self, rhs, out=None, transposed=False ): - pass - -#=============================================================================== -class BandedSolver ( DirectSolver ): +class BandedSolver(LinearSolver): """ Solve the equation Ax = b for x, assuming A is banded matrix. @@ -47,10 +28,11 @@ class BandedSolver ( DirectSolver ): Banded matrix. """ - def __init__( self, u, l, bmat ): + def __init__(self, u, l, bmat, transposed=False): self._u = u self._l = l + self._transposed = transposed # ... LU factorization if bmat.dtype == np.float32: @@ -66,7 +48,7 @@ def __init__( self, u, l, bmat ): self._factor_function = zgbtrf self._solver_function = zgbtrs else: - msg = f'Cannot create a DirectSolver for bmat.dtype = {bmat.dtype}' + msg = f'Cannot create a BandedSolver for bmat.dtype = {bmat.dtype}' raise NotImplementedError(msg) self._bmat, self._ipiv, self._finfo = self._factor_function(bmat, l, u) @@ -77,22 +59,40 @@ def __init__( self, u, l, bmat ): self._dtype = bmat.dtype @property - def finfo( self ): + def finfo(self): return self._finfo @property - def sinfo( self ): + def sinfo(self): return self._sinfo #-------------------------------------- # Abstract interface #-------------------------------------- @property - def space( self ): + def space(self): return self._space + def transpose(self): + cls = type(self) + obj = super().__new__(cls) + + obj._u = self._l + obj._l = self._u + obj._bmat = self._bmat + obj._ipiv = self._ipiv + obj._finfo = self._finfo + obj._factor_function = self._factor_function + obj._solver_function = self._solver_function + obj._sinfo = None + obj._space = self._space + obj._dtype = self._dtype + obj._transposed = not self._transposed + + return obj + #... - def solve( self, rhs, out=None, transposed=False ): + def solve(self, rhs, out=None): """ Solves for the given right-hand side. @@ -106,12 +106,11 @@ def solve( self, rhs, out=None, transposed=False ): out : ndarray | NoneType Output vector. If given, it has to have the same shape and datatype as rhs. - - transposed : bool - If and only if set to true, we solve against the transposed matrix. (supported by the underlying solver) """ assert rhs.T.shape[0] == self._bmat.shape[1] + transposed = self._transposed + if out is None: preout, self._sinfo = self._solver_function(self._bmat, self._l, self._u, rhs.T, self._ipiv, trans=transposed) @@ -134,7 +133,7 @@ def solve( self, rhs, out=None, transposed=False ): return out #=============================================================================== -class SparseSolver ( DirectSolver ): +class SparseSolver (LinearSolver): """ Solve the equation Ax = b for x, assuming A is scipy sparse matrix. @@ -144,22 +143,33 @@ class SparseSolver ( DirectSolver ): Generic sparse matrix. """ - def __init__( self, spmat ): + def __init__(self, spmat, transposed=False): - assert isinstance( spmat, spmatrix ) + assert isinstance(spmat, spmatrix) self._space = np.ndarray - self._splu = splu( spmat.tocsc() ) + self._splu = splu(spmat.tocsc()) + self._transposed = transposed #-------------------------------------- # Abstract interface #-------------------------------------- @property - def space( self ): + def space(self): return self._space + def transpose(self): + cls = type(self) + obj = super().__new__(cls) + + obj._space = self._space + obj._splu = self._splu + obj._transposed = not self._transposed + + return obj + #... - def solve( self, rhs, out=None, transposed=False ): + def solve(self, rhs, out=None): """ Solves for the given right-hand side. @@ -173,21 +183,19 @@ def solve( self, rhs, out=None, transposed=False ): out : ndarray | NoneType Output vector. If given, it has to have the same shape and datatype as rhs. - - transposed : bool - If and only if set to true, we solve against the transposed matrix. (supported by the underlying solver) """ assert rhs.T.shape[0] == self._splu.shape[1] + transposed = self._transposed if out is None: - out = self._splu.solve( rhs.T, trans='T' if transposed else 'N' ).T + out = self._splu.solve(rhs.T, trans='T' if transposed else 'N').T else: assert out.shape == rhs.shape assert out.dtype == rhs.dtype # currently no in-place solve exposed - out[:] = self._splu.solve( rhs.T, trans='T' if transposed else 'N' ).T + out[:] = self._splu.solve(rhs.T, trans='T' if transposed else 'N').T return out diff --git a/psydac/linalg/fft.py b/psydac/linalg/fft.py index 6cd566353..5d67d178b 100644 --- a/psydac/linalg/fft.py +++ b/psydac/linalg/fft.py @@ -48,8 +48,11 @@ def __init__(self, function): @property def space(self): return np.ndarray + + def transpose(self): + raise NotImplementedError('transpose() is not implemented for OneDimSolvers') - def solve(self, rhs, out=None, transposed=False): + def solve(self, rhs, out=None): if out is None: out = np.empty_like(rhs) @@ -67,7 +70,7 @@ def __init__(self, space, functions): else: onedimsolver = DistributedFFTBase.OneDimSolver(functions) solvers = [onedimsolver] * space.ndim - self._isolver = KroneckerLinearSolver(space, solvers) + self._isolver = KroneckerLinearSolver(space, space, solvers) # ... @property diff --git a/psydac/linalg/kron.py b/psydac/linalg/kron.py index 0186f119f..2e2f8bb10 100644 --- a/psydac/linalg/kron.py +++ b/psydac/linalg/kron.py @@ -371,7 +371,7 @@ def exchange_assembly_data( self ): def set_backend(self, backend): pass #============================================================================== -class KroneckerLinearSolver(LinearSolver): +class KroneckerLinearSolver(LinearOperator): """ A solver for Ax=b, where A is a Kronecker matrix from arbirary dimension d, defined by d solvers. We also need information about the space of b. @@ -380,34 +380,45 @@ class KroneckerLinearSolver(LinearSolver): ---------- V : StencilVectorSpace The space b will live in; i.e. which gives us information about - the distribution of the right-hand sides. + the distribution of the right-hand side b. + + W : StencilVectorSpace + The space x will live in; i.e. which gives us information about + the distribution of the unknown vector x. solvers : list of LinearSolver The components of A in each dimension. Attributes ---------- - space : StencilVectorSpace - The space our vectors to solve live in. + domain : StencilVectorSpace + The space of the rhs vector b. + + codomain : StencilVectorSpace + The space of the unknown vector x. """ - def __init__(self, V, solvers): + def __init__(self, V, W, solvers): assert isinstance(V, StencilVectorSpace) + assert isinstance(W, StencilVectorSpace) assert hasattr( solvers, '__iter__' ) for solver in solvers: assert isinstance(solver, LinearSolver) assert V.ndim == len(solvers) + assert W.ndim == len(solvers) + assert V.npts == W.npts # general arguments - self._space = V + self._domain = V + self._codomain = W self._solvers = solvers - self._parallel = self._space.parallel - self._dtype = self._space._dtype + self._parallel = self._domain.parallel + self._dtype = self._codomain._dtype if self._parallel: - self._mpi_type = V._mpi_type + self._mpi_type = self._domain._mpi_type else: self._mpi_type = None - self._ndim = self._space.ndim + self._ndim = self._codomain.ndim # compute and setup solver arguments self._setup_solvers() @@ -424,12 +435,12 @@ def _setup_solvers(self): (which potentially utilize MPI). """ # slice sizes - starts = np.array(self._space.starts) - ends = np.array(self._space.ends) + 1 + starts = np.array(self._domain.starts) + ends = np.array(self._domain.ends) + 1 self._slice = tuple([slice(s, e) for s,e in zip(starts, ends)]) # local and global sizes - nglobals = self._space.npts + nglobals = self._domain.npts nlocals = ends - starts self._localsize = np.product(nlocals) mglobals = self._localsize // nlocals @@ -446,15 +457,15 @@ def _setup_solvers(self): # useful e.g. if we have little data in some directions # (and thus no data distributed there) - if not self._parallel or self._space.cart.subcomm[i].size <= 1: + if not self._parallel or self._domain.cart.subcomm[i].size <= 1: # serial solve solver_passes[i] = KroneckerLinearSolver.KroneckerSolverSerialPass( self._solvers[i], nglobals[i], mglobals[i]) else: # for the parallel case, use Alltoallv solver_passes[i] = KroneckerLinearSolver.KroneckerSolverParallelPass( - self._solvers[i], self._space._mpi_type, i, - self._space.cart, mglobals[i], nglobals[i], nlocals[i], self._localsize) + self._solvers[i], self._domain._mpi_type, i, + self._domain.cart, mglobals[i], nglobals[i], nlocals[i], self._localsize) # we have a parallel solve pass now, so we are not completely local any more self._allserial = False @@ -499,14 +510,33 @@ def _allocate_temps(self): else: temp2 = np.empty((self._tempsize,), dtype=self._dtype) return temp1, temp2 + + @property + def domain(self): + return self._domain @property - def space(self): - """ - Returns the space associated to this solver (i.e. where the information - about the cartesian distribution is taken from). - """ - return self._space + def codomain(self): + return self._codomain + + @property + def dtype(self): + return None + + def toarray(self): + raise NotImplementedError('toarray() is not defined for KroneckerLinearSolvers.') + + def tosparse(self): + raise NotImplementedError('tosparse() is not defined for KroneckerLinearSolvers.') + + def transpose(self, conjugate=False): + new_domain = self._codomain + new_codomain = self._domain + new_solvers = [solver.transpose() for solver in self._solvers] + return KroneckerLinearSolver(new_domain, new_codomain, new_solvers) + + def dot(self, v, out=None): + return self.solve(v, out=out) @property def solvers(self): @@ -515,18 +545,18 @@ def solvers(self): """ return tuple(self._solvers) - def solve(self, rhs, out=None, transposed=False): + def solve(self, rhs, out=None): """ Solves Ax=b where A is a Kronecker product matrix (and represented as such), and b is a suitable vector. """ # type checks - assert rhs.space is self._space + assert rhs.space is self._domain if out is not None: assert isinstance( out, StencilVector ) - assert out.space is self._space + assert out.space is self._codomain else: out = StencilVector( rhs.space ) @@ -534,12 +564,12 @@ def solve(self, rhs, out=None, transposed=False): outslice = out[self._slice] # call the actual kernel - self._solve_nd(inslice, outslice, transposed) + self._solve_nd(inslice, outslice) out.update_ghost_regions() return out - def _solve_nd(self, inslice, outslice, transposed): + def _solve_nd(self, inslice, outslice): """ The internal solve loop. Can handle arbitrary dimensions. """ @@ -552,14 +582,14 @@ def _solve_nd(self, inslice, outslice, transposed): # internal passes for i in range(self._ndim - 1): # solve direction - self._solver_passes[i].solve_pass(temp1, temp2, transposed) + self._solver_passes[i].solve_pass(temp1, temp2) # reorder and swap self._reorder_temp_to_temp(temp1, temp2, i) temp1, temp2 = temp2, temp1 # last pass - self._solver_passes[-1].solve_pass(temp1, temp2, transposed) + self._solver_passes[-1].solve_pass(temp1, temp2) # copy to output self._reorder_temp_to_outslice(temp1, outslice) @@ -604,7 +634,7 @@ class KroneckerSolverSerialPass: Parameters ---------- - solver : DirectSolver + solver : BandedSolver or SparseSolver The internally used solver class. nglobal : int @@ -629,7 +659,7 @@ def required_memory(self): """ return self._datasize - def solve_pass(self, workmem, tempmem, transposed): + def solve_pass(self, workmem, tempmem): """ Solves the data available in workmem, assuming that all data is available locally. @@ -642,16 +672,13 @@ def solve_pass(self, workmem, tempmem, transposed): tempmem : ndarray Ignored, it exists for compatibility with the parallel solver. - - transposed : bool - True, if and only if we want to solve against the transposed matrix instead. """ # reshape necessary memory in column-major view = workmem[:self._datasize] view.shape = (self._numrhs,self._dimrhs) # call solver in in-place mode - self._solver.solve(view, out=view, transposed=transposed) + self._solver.solve(view, out=view) class KroneckerSolverParallelPass: """ @@ -671,7 +698,7 @@ class KroneckerSolverParallelPass: Parameters ---------- - solver : DirectSolver + solver : BandedSolver or SparseSolver The internally used solver class. mpi_type : MPI type @@ -816,7 +843,7 @@ def _contiguous_to_blocked(self, blocked, contiguous): contiguouspart.shape = (self._mlocal,end-start) contiguouspart[:] = blocked_view[:,start:end] - def solve_pass(self, workmem, tempmem, transposed): + def solve_pass(self, workmem, tempmem): """ Solves the data available in workmem in a distributed manner, using MPI_Alltoallv. @@ -829,9 +856,6 @@ def solve_pass(self, workmem, tempmem, transposed): tempmem : ndarray Temporary array of the same minimum size as workmem. - - transposed : bool - True, if and only if we want to solve against the transposed matrix instead. """ # preparation sourceargs = [workmem[:self._localsize], self._source_transfer, self._mpi_type] @@ -844,7 +868,7 @@ def solve_pass(self, workmem, tempmem, transposed): self._blocked_to_contiguous(workmem, tempmem) # actual solve (source contains the data) - self._serialsolver.solve_pass(workmem, tempmem, transposed) + self._serialsolver.solve_pass(workmem, tempmem) # ordered stripes -> blocked stripes self._contiguous_to_blocked(workmem, tempmem) @@ -853,7 +877,7 @@ def solve_pass(self, workmem, tempmem, transposed): self._comm.Alltoallv(targetargs, sourceargs) #============================================================================== -def kronecker_solve(solvers, rhs, out=None, transposed=False): +def kronecker_solve(solvers, rhs, out=None): """ Solve linear system Ax=b with A=kron( A_n, A_{n-1}, ..., A_2, A_1 ), given $n$ separate linear solvers $L_n$ for the 1D problems $A_n x_n = b_n$: @@ -883,5 +907,5 @@ def kronecker_solve(solvers, rhs, out=None, transposed=False): else: out = StencilVector(rhs.space) - kronsolver = KroneckerLinearSolver(rhs.space, solvers) - return kronsolver.solve(rhs, out=out, transposed=transposed) + kronsolver = KroneckerLinearSolver(rhs.space, rhs.space, solvers) + return kronsolver.solve(rhs, out=out) diff --git a/psydac/linalg/tests/test_block.py b/psydac/linalg/tests/test_block.py index c67f49262..00aa98b0d 100644 --- a/psydac/linalg/tests/test_block.py +++ b/psydac/linalg/tests/test_block.py @@ -8,7 +8,7 @@ from psydac.linalg.direct_solvers import SparseSolver from psydac.linalg.stencil import StencilVectorSpace, StencilVector, StencilMatrix from psydac.linalg.block import BlockVectorSpace, BlockVector -from psydac.linalg.block import BlockLinearOperator, BlockDiagonalSolver +from psydac.linalg.block import BlockLinearOperator from psydac.linalg.utilities import array_to_psydac from psydac.linalg.kron import KroneckerLinearSolver from psydac.api.settings import PSYDAC_BACKEND_GPYCCEL @@ -192,114 +192,6 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2 ): assert np.array_equal( coo1.data, coo3.data ) #=============================================================================== @pytest.mark.parametrize( 'dtype', [float, complex] ) -@pytest.mark.parametrize( 'n1', [8, 16] ) -@pytest.mark.parametrize( 'n2', [8, 12] ) -@pytest.mark.parametrize( 'p1', [1, 2] ) -@pytest.mark.parametrize( 'p2', [1, 3] ) -@pytest.mark.parametrize( 'P1', [True, False] ) -@pytest.mark.parametrize( 'P2', [True] ) -def test_2D_block_diagonal_solver_serial_init( dtype, n1, n2, p1, p2, P1, P2 ): - # set seed for reproducibility - seed(n1*n2*p1*p2) - - D = DomainDecomposition([n1,n2], periods=[P1,P2]) - - # Partition the points - npts = [n1,n2] - global_starts, global_ends = compute_global_starts_ends(D, npts) - - cart = CartDecomposition(D, npts, global_starts, global_ends, pads=[p1,p2], shifts=[1,1]) - - # Create vector spaces, stencil matrices, and stencil vectors - V = StencilVectorSpace( cart, dtype=dtype) - - # Fill in stencil matrices based on diagonal index - if dtype==complex: - f1=lambda k1,k2: 10j*k1+k2 - else: - f1=lambda k1,k2: 10*k1+k2 - - m11 = np.zeros((n1, n1), dtype=dtype) - m12 = np.zeros((n2, n2), dtype=dtype) - for j in range(n1): - for i in range(-p1,p1+1): - m11[j, max(0, min(n1-1, j+i))] = f1(j,i) - for j in range(n2): - for i in range(-p2,p2+1): - m12[j, max(0, min(n2-1, j+i))] = f1(j,5*i)+2. - - - if dtype==complex: - f2=lambda k1,k2: 10j*k1**2+k2**3 - else: - f2=lambda k1,k2: 10*k1**2+k2**3 - - m21 = np.zeros((n1, n1), dtype=dtype) - m22 = np.zeros((n2, n2), dtype=dtype) - - for j in range(n1): - for i in range(-p1,p1+1): - m21[j, max(0, min(n1-1, j+i))] = f2(j,i) - for j in range(n2): - for i in range(-p2,p2+1): - m22[j, max(0, min(n2-1, j+i))] = f2(j,2*i)+2. - - M11 = SparseSolver( spa.csc_matrix(m11) ) - M12 = SparseSolver( spa.csc_matrix(m12) ) - M21 = SparseSolver( spa.csc_matrix(m21) ) - M22 = SparseSolver( spa.csc_matrix(m22) ) - M1 = KroneckerLinearSolver(V, [M11,M12]) - M2 = KroneckerLinearSolver(V, [M21,M22]) - x1 = StencilVector( V ) - x2 = StencilVector( V ) - - W = BlockVectorSpace(V, V) - - # Fill in vector with random values, then update ghost regions - for i1 in range(n1): - for i2 in range(n2): - x1[i1,i2] = 2.0*random() - 1.0 - x2[i1,i2] = 5.0*random() - 1.0 - x1.update_ghost_regions() - x2.update_ghost_regions() - - # Construct a BlockVector object containing x1 and x2 - # |x1| - # X = | | - # |x2| - - X = BlockVector(W) - X[0] = x1 - X[1] = x2 - - # Construct a BlockDiagonalSolver object containing M1, M2 using 3 ways - # |M1 0 | - # L = | | - # |0 M2| - - dict_blocks = {0:M1, 1:M2} - list_blocks = [M1, M2] - - L1 = BlockDiagonalSolver( W, blocks=dict_blocks ) - L2 = BlockDiagonalSolver( W, blocks=list_blocks ) - - L3 = BlockDiagonalSolver( W ) - - # Test for not allowing undefinedness - errresult = False - try: - L3.solve(X) - except NotImplementedError: - errresult = True - assert errresult - - L3[0] = M1 - L3[1] = M2 - assert L3.space == W - assert L3.blocks == (M1, M2) - assert L3.n_blocks == 2 -#=============================================================================== -@pytest.mark.parametrize( 'dtype', [float, complex] ) @pytest.mark.parametrize( 'ndim', [1, 2, 3] ) @pytest.mark.parametrize( 'p', [1, 2] ) @pytest.mark.parametrize( 'P1', [True, False] ) @@ -787,155 +679,6 @@ def test_block_linear_operator_serial_dot( dtype, n1, n2, p1, p2, P1, P2 ): @pytest.mark.parametrize( 'p2', [1, 3] ) @pytest.mark.parametrize( 'P1', [True, False] ) @pytest.mark.parametrize( 'P2', [True] ) -def test_block_diagonal_solver_serial_dot( dtype, n1, n2, p1, p2, P1, P2 ): - # set seed for reproducibility - seed(n1*n2*p1*p2) - - D = DomainDecomposition([n1,n2], periods=[P1,P2]) - - # Partition the points - npts = [n1,n2] - global_starts, global_ends = compute_global_starts_ends(D, npts) - - cart = CartDecomposition(D, npts, global_starts, global_ends, pads=[p1,p2], shifts=[1,1]) - - # Create vector spaces, stencil matrices, and stencil vectors - V = StencilVectorSpace( cart, dtype=dtype) - - # Fill in stencil matrices based on diagonal index - if dtype==complex: - f1=lambda k1,k2: 10j*k1+k2 - else: - f1=lambda k1,k2: 10*k1+k2 - - m11 = np.zeros((n1, n1), dtype=dtype) - m12 = np.zeros((n2, n2), dtype=dtype) - for j in range(n1): - for i in range(-p1,p1+1): - m11[j, max(0, min(n1-1, j+i))] = f1(j,i) - for j in range(n2): - for i in range(-p2,p2+1): - m12[j, max(0, min(n2-1, j+i))] = f1(j,5*i)+2. - - - if dtype==complex: - f2=lambda k1,k2: 10j*k1**2+k2**3 - else: - f2=lambda k1,k2: 10*k1**2+k2**3 - - m21 = np.zeros((n1, n1), dtype=dtype) - m22 = np.zeros((n2, n2), dtype=dtype) - for j in range(n1): - for i in range(-p1,p1+1): - m21[j, max(0, min(n1-1, j+i))] = f2(j,i) - for j in range(n2): - for i in range(-p2,p2+1): - m22[j, max(0, min(n2-1, j+i))] = f2(j,2*i)+2. - - M11 = SparseSolver( spa.csc_matrix(m11) ) - M12 = SparseSolver( spa.csc_matrix(m12) ) - M21 = SparseSolver( spa.csc_matrix(m21) ) - M22 = SparseSolver( spa.csc_matrix(m22) ) - M1 = KroneckerLinearSolver(V, [M11,M12]) - M2 = KroneckerLinearSolver(V, [M21,M22]) - x1 = StencilVector( V ) - x2 = StencilVector( V ) - - W = BlockVectorSpace(V, V) - - # Fill in vector with random values, then update ghost regions - for i1 in range(n1): - for i2 in range(n2): - x1[i1,i2] = 2.0*random() - 1.0 - x2[i1,i2] = 5.0*random() - 1.0 - x1.update_ghost_regions() - x2.update_ghost_regions() - - # Construct a BlockVector object containing x1 and x2 - # |x1| - # X = | | - # |x2| - - X = BlockVector(W) - X[0] = x1 - X[1] = x2 - - # Construct a BlockDiagonalSolver object containing M1, M2 using 3 ways - # |M1 0 | - # L = | | - # |0 M2| - - dict_blocks = {0:M1, 1:M2} - list_blocks = [M1, M2] - - L1 = BlockDiagonalSolver( W, blocks=dict_blocks ) - L2 = BlockDiagonalSolver( W, blocks=list_blocks ) - - L3 = BlockDiagonalSolver( W ) - - # Test for not allowing undefinedness - errresult = False - try: - L3.solve(X) - except NotImplementedError: - errresult = True - assert errresult - - L3[0] = M1 - L3[1] = M2 - - # Compute BlockDiagonalSolver product - Y1 = L1.solve(X) - Y2 = L2.solve(X) - Y3 = L3.solve(X) - - # Transposed - Yt = L1.solve(X, transposed=True) - - # Test other in/out methods - Y4a = W.zeros() - Y4b = L1.solve(X, out=Y4a) - assert Y4b is Y4a - - Y5a = W.zeros() - Y5a[0] = x1.copy() - Y5a[1] = x2.copy() - Y5b = L1.solve(Y5a, out=Y5a) - assert Y5b is Y5a - - # Solve linear equations for each block - y1 = M1.solve(x1) - y2 = M2.solve(x2) - - y1t = M1.solve(x1, transposed=True) - y2t = M2.solve(x2, transposed=True) - - # Check data in 1D array - assert np.allclose( Y1.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y1.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y2.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y2.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y3.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y3.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y4a.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y4a.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y5a.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y5a.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Yt.blocks[0].toarray(), y1t.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Yt.blocks[1].toarray(), y2t.toarray(), rtol=1e-14, atol=1e-14 ) -#=============================================================================== -@pytest.mark.parametrize( 'dtype', [float, complex] ) -@pytest.mark.parametrize( 'n1', [8, 16] ) -@pytest.mark.parametrize( 'n2', [8, 12] ) -@pytest.mark.parametrize( 'p1', [1, 2] ) -@pytest.mark.parametrize( 'p2', [1, 3] ) -@pytest.mark.parametrize( 'P1', [True, False] ) -@pytest.mark.parametrize( 'P2', [True] ) def test_block_2d_array_to_psydac_1( dtype, n1, n2, p1, p2, P1, P2 ): #Define a factor for the data @@ -1196,155 +939,6 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ): assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) #=============================================================================== @pytest.mark.parametrize( 'dtype', [float, complex] ) -@pytest.mark.parametrize( 'n1', [8,16] ) -@pytest.mark.parametrize( 'n2', [8,32] ) -@pytest.mark.parametrize( 'p1', [1,3] ) -@pytest.mark.parametrize( 'p2', [2] ) -@pytest.mark.parametrize( 'P1', [True, False] ) -@pytest.mark.parametrize( 'P2', [True] ) -@pytest.mark.parallel -def test_block_diagonal_solver_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ): - # Define a factor for the data - if dtype == complex: - factor = 1j - else: - factor = 1 - # set seed for reproducibility - seed(n1*n2*p1*p2) - - from mpi4py import MPI - comm = MPI.COMM_WORLD - D = DomainDecomposition([n1,n2], periods=[P1,P2], comm=comm) - - # Partition the points - npts = [n1,n2] - global_starts, global_ends = compute_global_starts_ends(D, npts) - - cart = CartDecomposition(D, npts, global_starts, global_ends, pads=[p1,p2], shifts=[1,1]) - - # Create vector spaces, stencil matrices, and stencil vectors - V = StencilVectorSpace( cart, dtype=dtype ) - - s1,s2 = V.starts - e1,e2 = V.ends - - # Fill in stencil matrices based on diagonal index - m11 = np.zeros((n1, n1),dtype=dtype) - m12 = np.zeros((n2, n2),dtype=dtype) - for j in range(n1): - for i in range(-p1,p1+1): - m11[j, max(0, min(n1-1, j+i))] = 10*factor*j+i - for j in range(n2): - for i in range(-p2,p2+1): - m12[j, max(0, min(n2-1, j+i))] = 20*factor*j+5*i+2. - - m21 = np.zeros((n1, n1),dtype=dtype) - m22 = np.zeros((n2, n2),dtype=dtype) - for j in range(n1): - for i in range(-p1,p1+1): - m21[j, max(0, min(n1-1, j+i))] = 10*factor*j**2+i**3 - for j in range(n2): - for i in range(-p2,p2+1): - m22[j, max(0, min(n2-1, j+i))] = 20*factor*j**2+i**3+2. - - M11 = SparseSolver( spa.csc_matrix(m11) ) - M12 = SparseSolver( spa.csc_matrix(m12) ) - M21 = SparseSolver( spa.csc_matrix(m21) ) - M22 = SparseSolver( spa.csc_matrix(m22) ) - M1 = KroneckerLinearSolver(V, [M11,M12]) - M2 = KroneckerLinearSolver(V, [M21,M22]) - x1 = StencilVector( V ) - x2 = StencilVector( V ) - - W = BlockVectorSpace(V, V) - - # Fill in vector with random values, then update ghost regions - for i1 in range(s1,e1+1): - for i2 in range(s2,e2+1): - x1[i1,i2] = 2.0*factor*random() - 1.0 - x2[i1,i2] = 5.0*factor*random() - 1.0 - x1.update_ghost_regions() - x2.update_ghost_regions() - - # Construct a BlockVector object containing x1 and x2 - # |x1| - # X = | | - # |x2| - - X = BlockVector(W) - X[0] = x1 - X[1] = x2 - - # Construct a BlockDiagonalSolver object containing M1, M2 using 3 ways - # |M1 0 | - # L = | | - # |0 M2| - - dict_blocks = {0:M1, 1:M2} - list_blocks = [M1, M2] - - L1 = BlockDiagonalSolver( W, blocks=dict_blocks ) - L2 = BlockDiagonalSolver( W, blocks=list_blocks ) - - L3 = BlockDiagonalSolver( W ) - - # Test for not allowing undefinedness - errresult = False - try: - L3.solve(X) - except NotImplementedError: - errresult = True - assert errresult - - L3[0] = M1 - L3[1] = M2 - - # Compute BlockDiagonalSolver product - Y1 = L1.solve(X) - Y2 = L2.solve(X) - Y3 = L3.solve(X) - - # Transposed - Yt = L1.solve(X, transposed=True) - - # Test other in/out methods - Y4a = W.zeros() - Y4b = L1.solve(X, out=Y4a) - assert Y4b is Y4a - - Y5a = W.zeros() - Y5a[0] = x1.copy() - Y5a[1] = x2.copy() - Y5b = L1.solve(Y5a, out=Y5a) - assert Y5b is Y5a - - # Solve linear equations for each block - y1 = M1.solve(x1) - y2 = M2.solve(x2) - - y1t = M1.solve(x1, transposed=True) - y2t = M2.solve(x2, transposed=True) - - # Check data in 1D array - assert np.allclose( Y1.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y1.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y2.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y2.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y3.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y3.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y4a.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y4a.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Y5a.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Y5a.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 ) - - assert np.allclose( Yt.blocks[0].toarray(), y1t.toarray(), rtol=1e-14, atol=1e-14 ) - assert np.allclose( Yt.blocks[1].toarray(), y2t.toarray(), rtol=1e-14, atol=1e-14 ) -#=============================================================================== -@pytest.mark.parametrize( 'dtype', [float, complex] ) @pytest.mark.parametrize( 'n1', [8, 16] ) @pytest.mark.parametrize( 'n2', [8, 32] ) @pytest.mark.parametrize( 'p1', [1, 3] ) diff --git a/psydac/linalg/tests/test_kron_direct_solver.py b/psydac/linalg/tests/test_kron_direct_solver.py index 836b0b1b3..873500b5a 100644 --- a/psydac/linalg/tests/test_kron_direct_solver.py +++ b/psydac/linalg/tests/test_kron_direct_solver.py @@ -3,13 +3,26 @@ import pytest import time import numpy as np -from mpi4py import MPI -from psydac.ddm.cart import DomainDecomposition, CartDecomposition +from mpi4py import MPI + + from scipy.sparse import csc_matrix, dia_matrix, kron from scipy.sparse.linalg import splu -from psydac.linalg.stencil import StencilVectorSpace, StencilVector, StencilMatrix -from psydac.linalg.kron import KroneckerLinearSolver + +from sympde.calculus import dot +from sympde.expr import BilinearForm, integral +from sympde.topology import Line +from sympde.topology import Cube, Square +from sympde.topology import Derham +from sympde.topology import elements_of + +from psydac.api.discretization import discretize +from psydac.ddm.cart import DomainDecomposition, CartDecomposition +from psydac.linalg.block import BlockLinearOperator from psydac.linalg.direct_solvers import SparseSolver, BandedSolver +from psydac.linalg.kron import KroneckerLinearSolver +from psydac.linalg.solvers import inverse +from psydac.linalg.stencil import StencilVectorSpace, StencilVector, StencilMatrix #=============================================================================== def compute_global_starts_ends(domain_decomposition, npts): @@ -70,10 +83,10 @@ def matrix_to_sparse(A): A.remove_spurious_entries() return SparseSolver(A.tosparse()) -def random_matrix(seed, space): - A = StencilMatrix(space, space) - p = space.pads[0] - dtype = space.dtype +def random_matrix(seed, domain, codomain): + A = StencilMatrix(domain, codomain) + p = domain.pads[0] # by definition of the StencilMatrix, domain.pads == codomain.pads + dtype = domain.dtype # by definition of the StencilMatrix, domain.dtype == codomain.dtype # for now, take matrices like this (as in the other tests) if dtype==complex: @@ -107,31 +120,41 @@ def compare_solve(seed, comm, npts, pads, periods, direct_solver, dtype=float, t # vector spaces comm = MPI.COMM_WORLD D = DomainDecomposition(npts, periods=periods, comm=comm) + D2 = DomainDecomposition(npts, periods=periods, comm=comm) # Partition the points global_starts, global_ends = compute_global_starts_ends(D, npts) + global_starts2, global_ends2 = compute_global_starts_ends(D2, npts) cart = CartDecomposition(D, npts, global_starts, global_ends, pads=pads, shifts=[1]*len(pads)) + cart2 = CartDecomposition(D2, npts, global_starts2, global_ends2, pads=pads, shifts=[1]*len(pads)) V = StencilVectorSpace(cart, dtype=dtype) + W = StencilVectorSpace(cart2, dtype=dtype) Ds = [DomainDecomposition([n], periods=[P]) for n,P in zip(npts, periods)] carts = [CartDecomposition(Di, [n], *compute_global_starts_ends(Di, [n]), pads=[p], shifts=[1]) for Di,n,p in zip(Ds, npts, pads)] Vs = [StencilVectorSpace(carti, dtype=dtype) for carti in carts] + Ds2 = [DomainDecomposition([n], periods=[P]) for n,P in zip(npts, periods)] + carts2 = [CartDecomposition(Di, [n], *compute_global_starts_ends(Di, [n]), pads=[p], shifts=[1]) for Di,n,p in zip(Ds2, npts, pads)] + Ws = [StencilVectorSpace(carti, dtype=dtype) for carti in carts2] localslice = tuple([slice(s, e+1) for s, e in zip(V.starts, V.ends)]) if verbose: print(f'[{rank}] Vector spaces built', flush=True) # bulid matrices (A) - A = [random_matrix(seed+i+1, Vi) for i,Vi in enumerate(Vs)] + A = [random_matrix(seed+i+1, Vi, Wi) for i,(Vi, Wi) in enumerate(zip(Vs, Ws))] solvers = [direct_solver(Ai) for Ai in A] if verbose: print(f'[{rank}] Matrices built', flush=True) # vector to solve for (Y) - Y = StencilVector(V) + if transposed: + Y = StencilVector(W) + else: + Y = StencilVector(V) Y_glob = random_vectordata(seed, npts, dtype=dtype) Y[localslice] = Y_glob[localslice] Y.update_ghost_regions() @@ -141,9 +164,16 @@ def compare_solve(seed, comm, npts, pads, periods, direct_solver, dtype=float, t # solve in two different ways X_glob = kron_solve_seq_ref(Y_glob, A, transposed) - Xout = StencilVector(V) + if transposed: + Xout = StencilVector(V) + else: + Xout = StencilVector(W) - X = KroneckerLinearSolver(V, solvers).solve(Y, out=Xout, transposed=transposed) + solver = KroneckerLinearSolver(V, W, solvers) + if transposed: + solver = solver.T + + X = solver.solve(Y, out=Xout) assert X is Xout if verbose: @@ -162,6 +192,165 @@ def compare_solve(seed, comm, npts, pads, periods, direct_solver, dtype=float, t # compare for equality assert np.allclose( X[localslice], X_glob[localslice], rtol=1e-8, atol=1e-8 ) +def get_M1_block_kron_solver(V1, ncells, degree, periodic): + """ + Given a 3D DeRham sequenece (V0 = H(grad) --grad--> V1 = H(curl) --curl--> V2 = H(div) --div--> V3 = L2) + discretized using ncells, degree and periodic, + + domain = Cube('C', bounds1=(0, 1), bounds2=(0, 1), bounds3=(0, 1)) + derham = Derham(domain) + domain_h = discretize(domain, ncells=ncells, periodic=periodic, comm=comm) + derham_h = discretize(derham, domain_h, degree=degree), + + returns the inverse of the mass matrix M1 as a BlockLinearOperator consisting of three KroneckerLinearSolvers on the diagonal. + """ + # assert 3D + assert len(ncells) == 3 + assert len(degree) == 3 + assert len(periodic) == 3 + + # 1D domain to be discreticed using the respective values of ncells, degree, periodic + domain_1d = Line('L', bounds=(0,1)) + derham_1d = Derham(domain_1d) + + # storage for the 1D mass matrices + M0_matrices = [] + M1_matrices = [] + + # assembly of the 1D mass matrices + for (n, p, P) in zip(ncells, degree, periodic): + + domain_1d_h = discretize(domain_1d, ncells=[n], periodic=[P]) + derham_1d_h = discretize(derham_1d, domain_1d_h, degree=[p]) + + u_1d_0, v_1d_0 = elements_of(derham_1d.V0, names='u_1d_0, v_1d_0') + u_1d_1, v_1d_1 = elements_of(derham_1d.V1, names='u_1d_1, v_1d_1') + + a_1d_0 = BilinearForm((u_1d_0, v_1d_0), integral(domain_1d, u_1d_0 * v_1d_0)) + a_1d_1 = BilinearForm((u_1d_1, v_1d_1), integral(domain_1d, u_1d_1 * v_1d_1)) + + a_1d_0_h = discretize(a_1d_0, domain_1d_h, (derham_1d_h.V0, derham_1d_h.V0)) + a_1d_1_h = discretize(a_1d_1, domain_1d_h, (derham_1d_h.V1, derham_1d_h.V1)) + + M_1d_0 = a_1d_0_h.assemble() + M_1d_1 = a_1d_1_h.assemble() + + M0_matrices.append(M_1d_0) + M1_matrices.append(M_1d_1) + + V1_1 = V1[0] + V1_2 = V1[1] + V1_3 = V1[2] + + B1_mat = [M1_matrices[0], M0_matrices[1], M0_matrices[2]] + B2_mat = [M0_matrices[0], M1_matrices[1], M0_matrices[2]] + B3_mat = [M0_matrices[0], M0_matrices[1], M1_matrices[2]] + + B1_solvers = [matrix_to_bandsolver(Ai) for Ai in B1_mat] + B2_solvers = [matrix_to_bandsolver(Ai) for Ai in B2_mat] + B3_solvers = [matrix_to_bandsolver(Ai) for Ai in B3_mat] + + B1_kron_inv = KroneckerLinearSolver(V1_1, V1_1, B1_solvers) + B2_kron_inv = KroneckerLinearSolver(V1_2, V1_2, B2_solvers) + B3_kron_inv = KroneckerLinearSolver(V1_3, V1_3, B3_solvers) + + M1_block_kron_solver = BlockLinearOperator(V1, V1, ((B1_kron_inv, None, None), + (None, B2_kron_inv, None), + (None, None, B3_kron_inv))) + + return M1_block_kron_solver + + + +def get_inverse_mass_matrices(derham_h, domain_h): + """ + Given a 2D DeRham sequence (V0 = H(grad) --curl--> V1 = H(div) --div--> V2 = L2) + and its discrete domain, which shall be rectangular, + returns the inverse of the mass matrices for all three spaces using kronecker solvers. + """ + # assert 2D + # Maybe should add more assert regarding the types and the domain form in case this get used in general context. + + V0h = derham_h.V0.vector_space + V1h = derham_h.V1.vector_space + V2h = derham_h.V2.vector_space + + bounds1 = domain_h.domain.bounds1 + bounds2 = domain_h.domain.bounds2 + + ncells = domain_h.ncells[domain_h.domain.name] + degree = derham_h.V0.degree + periodic = domain_h.periodic[domain_h.domain.name] + + assert len(ncells) == 2 + assert len(degree) == 2 + assert len(periodic) == 2 + + # 1D domain to be discreticed using the respective values of ncells, degree, periodic + list_domain_1d = [Line('L1', bounds=bounds1), Line('L2', bounds=bounds2)] + list_derham_1d = [Derham(domain_1d) for domain_1d in list_domain_1d] + + # storage for the 1D mass matrices + M0_matrices = [] + M1_matrices = [] + + # assembly of the 1D mass matrices + for (n, p, P, domain_1d, derham_1d) in zip(ncells, degree, periodic, list_domain_1d, list_derham_1d): + + domain_1d_h = discretize(domain_1d, ncells=[n], periodic=[P]) + derham_1d_h = discretize(derham_1d, domain_1d_h, degree=[p]) + + u_1d_0, v_1d_0 = elements_of(derham_1d.V0, names='u_1d_0, v_1d_0') + u_1d_1, v_1d_1 = elements_of(derham_1d.V1, names='u_1d_1, v_1d_1') + + a_1d_0 = BilinearForm((u_1d_0, v_1d_0), integral(domain_1d, u_1d_0 * v_1d_0)) + a_1d_1 = BilinearForm((u_1d_1, v_1d_1), integral(domain_1d, u_1d_1 * v_1d_1)) + + a_1d_0_h = discretize(a_1d_0, domain_1d_h, (derham_1d_h.V0, derham_1d_h.V0)) + a_1d_1_h = discretize(a_1d_1, domain_1d_h, (derham_1d_h.V1, derham_1d_h.V1)) + + M_1d_0 = a_1d_0_h.assemble() + M_1d_1 = a_1d_1_h.assemble() + + M0_matrices.append(M_1d_0) + M1_matrices.append(M_1d_1) + + #V0 H1 space + + B_mat_V0 = [M0_matrices[0], M0_matrices[1]] + + B_solvers_V0 = [matrix_to_bandsolver(Ai) for Ai in B_mat_V0] + + M0_kron_solver = KroneckerLinearSolver(V0h, V0h, B_solvers_V0) + + + #V1 Hdiv space + V1_1 = V1h[0] + V1_2 = V1h[1] + + B1_mat_V1 = [M0_matrices[0], M1_matrices[1]] + B2_mat_V1 = [M1_matrices[0], M0_matrices[1]] + + B1_solvers_V1 = [matrix_to_bandsolver(Ai) for Ai in B1_mat_V1] + B2_solvers_V1 = [matrix_to_bandsolver(Ai) for Ai in B2_mat_V1] + + B1_kron_inv_V1 = KroneckerLinearSolver(V1_1, V1_1, B1_solvers_V1) + B2_kron_inv_V1 = KroneckerLinearSolver(V1_2, V1_2, B2_solvers_V1) + + M1_block_kron_solver = BlockLinearOperator(V1h, V1h, ((B1_kron_inv_V1, None), + (None, B2_kron_inv_V1))) + + #V2 L2 space + + B_mat_V2 = [M1_matrices[0], M1_matrices[1]] + + B_solvers_V2 = [matrix_to_bandsolver(Ai) for Ai in B_mat_V2] + + M2_kron_solver = KroneckerLinearSolver(V2h, V2h, B_solvers_V2) + + + return M0_kron_solver, M1_block_kron_solver, M2_kron_solver + #=============================================================================== # tests of the direct solvers @pytest.mark.parametrize( 'dtype', [float, complex] ) @@ -177,12 +366,14 @@ def test_direct_solvers(dtype, seed, n, p, P, nrhs, direct_solver, transposed): D = DomainDecomposition([n], periods=[P]) cart = CartDecomposition(D, [n], *compute_global_starts_ends(D, [n]), pads=[p], shifts=[1]) - # space (V) + # domain V and codomain W V = StencilVectorSpace( cart, dtype=dtype ) # bulid matrices (A) - A = random_matrix(seed+1, V) + A = random_matrix(seed+1, V, V) solver = direct_solver(A) + if transposed: + solver = solver.T # vector to solve for (Y) Y_glob = np.stack([random_vectordata(seed + i, [n], dtype) for i in range(nrhs)], axis=0) @@ -198,15 +389,15 @@ def test_direct_solvers(dtype, seed, n, p, P, nrhs, direct_solver, transposed): X_glob = C_op.solve(Y_glob.T).T # new vector allocation - X_glob2 = solver.solve(Y_glob, transposed=transposed) + X_glob2 = solver.solve(Y_glob) # solve with out vector X_glob3 = Y_glob.copy() - X_glob4 = solver.solve(Y_glob, out=X_glob3, transposed=transposed) + X_glob4 = solver.solve(Y_glob, out=X_glob3) # solve in-place X_glob5 = Y_glob.copy() - X_glob6 = solver.solve(X_glob5, out=X_glob5, transposed=transposed) + X_glob6 = solver.solve(X_glob5, out=X_glob5) # compare results assert X_glob4 is X_glob3 @@ -363,6 +554,221 @@ def test_kron_solver_nd_par(seed, dim, dtype): npts_base = 4 compare_solve(seed, MPI.COMM_WORLD, [npts_base]*dim, [1]*dim, [False]*dim, matrix_to_sparse, dtype=dtype, transposed=False, verbose=False) + +#=============================================================================== + +# test Kronecker solver of the M1 mass matrix of our 3D DeRham sequence, as described in the get_M1_block_kron_solver method +@pytest.mark.parametrize( 'ncells', [[8, 8, 8], [8, 16, 8]] ) +@pytest.mark.parametrize( 'degree', [[2, 2, 2]] ) +@pytest.mark.parametrize( 'periodic', [[True, True, True]] ) +@pytest.mark.parallel +def test_3d_m1_solver(ncells, degree, periodic): + + comm = MPI.COMM_WORLD + domain = Cube('C', bounds1=(0, 1), bounds2=(0, 1), bounds3=(0, 1)) + derham = Derham(domain) + domain_h = discretize(domain, ncells=ncells, periodic=periodic, comm=comm) + derham_h = discretize(derham, domain_h, degree=degree) + V1 = derham_h.V1.vector_space + P0, P1, P2, P3 = derham_h.projectors() + + # obtain an iterative M1 solver the usual way + u1, v1 = elements_of(derham.V1, names='u1, v1') + a1 = BilinearForm((u1, v1), integral(domain, dot(u1, v1))) + a1_h = discretize(a1, domain_h, (derham_h.V1, derham_h.V1)) + M1 = a1_h.assemble() + tol = 1e-12 + maxiter = 1000 + M1_iterative_solver = inverse(M1, 'cg', tol = tol, maxiter=maxiter) + + # obtain a direct M1 solver utilizing the Block-Kronecker structure of M1 + M1_direct_solver = get_M1_block_kron_solver(V1, ncells, degree, periodic) + + # obtain x and rhs = M1 @ x, both elements of derham_h.V1 + def get_A_fun(n=1, m=1, A0=1e04): + """Get the tuple A = (A1, A2, A3), where each entry is a function taking x,y,z as input.""" + + mu_tilde = np.sqrt(m**2 + n**2) + + eta = lambda x, y, z: x**2 * (1-x)**2 * y**2 * (1-y)**2 * z**2 * (1-z)**2 + + u1 = lambda x, y, z: A0 * (n/mu_tilde) * np.sin(np.pi * m * x) * np.cos(np.pi * n * y) + u2 = lambda x, y, z: -A0 * (m/mu_tilde) * np.cos(np.pi * m * x) * np.sin(np.pi * n * y) + u3 = lambda x, y, z: A0 * np.sin(np.pi * m * x) * np.sin(np.pi * n * y) + + A1 = lambda x, y, z: eta(x, y, z) * u1(x, y, z) + A2 = lambda x, y, z: eta(x, y, z) * u2(x, y, z) + A3 = lambda x, y, z: eta(x, y, z) * u3(x, y, z) + + A = (A1, A2, A3) + return A + x = P1(get_A_fun()).coeffs + rhs = M1 @ x + + # solve M1 @ x = rhs for x two ways + # pass -s to see timings + # on my local machine, executing + # mpirun -n 4 python -m pytest test_kron_direct_solver.py::test_3d_m1_solver -s + # I can report the following data: + + ### 4 processes, test case 1 (ncells=[8, 8, 8]): + + # Solving for x using the iterative solver: 23.73982548713684 seconds + # Solving for x using the iterative solver: 23.820897102355957 seconds + # Solving for x using the iterative solver: 23.783425092697144 seconds + # Solving for x using the iterative solver: 23.71373987197876 seconds + # Solving for x using the direct solver: 0.3333120346069336 seconds + # Solving for x using the direct solver: 0.3369138240814209 seconds + # Solving for x using the direct solver: 0.33652329444885254 seconds + # Solving for x using the direct solver: 0.34088802337646484 seconds + + ###4 processes, test case 2 (ncells=[8, 16, 8]): + # Solving for x using the iterative solver: 82.10541296005249 seconds + # Solving for x using the iterative solver: 81.88263297080994 seconds + # Solving for x using the iterative solver: 82.07102465629578 seconds + # Solving for x using the iterative solver: 82.00282955169678 seconds + # Solving for x using the direct solver: 0.1675126552581787 seconds + # Solving for x using the direct solver: 0.17473626136779785 seconds + # Solving for x using the direct solver: 0.15992450714111328 seconds + # Solving for x using the direct solver: 0.17931437492370605 seconds + + # Note that on consecutive solves, with only a slightly changing rhs and recycle=True, the iterative solver won't perform as bad anymore. + + start = time.time() + x_iterative = M1_iterative_solver @ rhs + stop = time.time() + print(f"Solving for x using the iterative solver: {stop-start} seconds") + + start = time.time() + x_direct = M1_direct_solver @ rhs + stop = time.time() + print(f"Solving for x using the direct solver: {stop-start} seconds") + + # assert rhs_iterative is within the tolerance close to rhs, and so is rhs_direct + rhs_iterative = M1 @ x_iterative + rhs_direct = M1 @ x_direct + assert np.linalg.norm((rhs-rhs_iterative).toarray()) < tol + assert np.linalg.norm((rhs-rhs_direct).toarray()) < tol + + +#=============================================================================== + +# test Kronecker solver of the mass matrices of our 2D (H1,Hdiv,L2) DeRham sequence, using get_inverse_mass_matrices function + +@pytest.mark.parametrize( 'ncells', [[8, 8], [8, 16]] ) +@pytest.mark.parametrize( 'degree', [[2, 2], [2,3]] ) +@pytest.mark.parametrize( 'bounds', [[(0,1), (0,1)], [(0,0.5), (0,2.)]] ) +@pytest.mark.parametrize( 'periodic', [[True, True], [False,False]] ) +@pytest.mark.parallel +def test_2d_mass_solver(ncells, degree, bounds, periodic): + + comm = MPI.COMM_WORLD + domain = Square('Omega', bounds1=bounds[0], bounds2=bounds[1]) + derham = Derham(domain, ["H1", "Hdiv", "L2"]) + domain_h = discretize(domain, ncells=ncells, periodic=periodic, comm=comm) + derham_h = discretize(derham, domain_h, degree=degree) + + P0, P1, P2 = derham_h.projectors() + + # obtain an iterative M0 solver the usual way + u0, v0 = elements_of(derham.V0, names='u0, v0') + a0 = BilinearForm((u0, v0), integral(domain, u0*v0)) + a0_h = discretize(a0, domain_h, (derham_h.V0, derham_h.V0)) + M0 = a0_h.assemble() + tol = 1e-10 + maxiter = 1000 + M0_iterative_solver = inverse(M0, 'cg', tol = tol, maxiter=maxiter) + + # obtain an iterative M1 solver the usual way + u1, v1 = elements_of(derham.V1, names='u1, v1') + a1 = BilinearForm((u1, v1), integral(domain, dot(u1, v1))) + a1_h = discretize(a1, domain_h, (derham_h.V1, derham_h.V1)) + M1 = a1_h.assemble() + tol = 1e-10 + maxiter = 1000 + M1_iterative_solver = inverse(M1, 'cg', tol = tol, maxiter=maxiter) + + # obtain an iterative M2 solver the usual way + u2, v2 = elements_of(derham.V2, names='u0, v0') + a2 = BilinearForm((u2, v2), integral(domain, u2*v2)) + a2_h = discretize(a2, domain_h, (derham_h.V2, derham_h.V2)) + M2 = a2_h.assemble() + tol = 1e-10 + maxiter = 1000 + M2_iterative_solver = inverse(M2, 'cg', tol = tol, maxiter=maxiter) + + # obtain a direct M1 solver utilizing the Block-Kronecker structure of M1 + M0_direct_solver, M1_direct_solver, M2_direct_solver = get_inverse_mass_matrices(derham_h, domain_h) + + # obtain x and rhs = M1 @ x, both elements of derham_h.V1 + def get_A_fun_vec(n=1, m=1, A0=1e04): + """Get the tuple A = (A1, A2), where each entry is a function taking x,y,z as input.""" + + mu_tilde = np.sqrt(m**2 + n**2) + + eta = lambda x, y: x**2 * (1-x)**2 * y**2 * (1-y)**2 + + u1 = lambda x, y: A0 * (n/mu_tilde) * np.sin(np.pi * m * x) * np.cos(np.pi * n * y) + u2 = lambda x, y: -A0 * (m/mu_tilde) * np.cos(np.pi * m * x) * np.sin(np.pi * n * y) + + A1 = lambda x, y: eta(x, y) * u1(x, y) + A2 = lambda x, y: eta(x, y) * u2(x, y) + + A = (A1, A2) + return A + + def get_A_fun_scalar(n=1, m=1, A0=1e04): + """Get the tuple A = (A1, A2), where each entry is a function taking x,y,z as input.""" + + mu_tilde = np.sqrt(m**2 + n**2) + + eta = lambda x, y: x**2 * (1-x)**2 * y**2 * (1-y)**2 + + u = lambda x, y: A0 * (n/mu_tilde) * np.sin(np.pi * m * x) * np.cos(np.pi * n * y) + + A = lambda x, y: eta(x, y) * u(x, y) + return A + + + x0 = P0(get_A_fun_scalar()).coeffs + rhs = M0 @ x0 + + # solve M0 @ x9 = rhs for x two ways + x_iterative = M0_iterative_solver @ rhs + x_direct = M0_direct_solver @ rhs + + # assert rhs_iterative is within the tolerance close to rhs, and so is rhs_direct + rhs_iterative = M0 @ x_iterative + rhs_direct = M0 @ x_direct + assert np.linalg.norm((rhs-rhs_iterative).toarray()) < tol + assert np.linalg.norm((rhs-rhs_direct).toarray()) < tol + + x1 = P1(get_A_fun_vec()).coeffs + rhs = M1 @ x1 + + # solve M1 @ x = rhs for x two ways + x_iterative = M1_iterative_solver @ rhs + x_direct = M1_direct_solver @ rhs + + # assert rhs_iterative is within the tolerance close to rhs, and so is rhs_direct + rhs_iterative = M1 @ x_iterative + rhs_direct = M1 @ x_direct + assert np.linalg.norm((rhs-rhs_iterative).toarray()) < tol + assert np.linalg.norm((rhs-rhs_direct).toarray()) < tol + + x2 = P2(get_A_fun_scalar()).coeffs + rhs = M2 @ x2 + + # solve M2 @ x = rhs for x two ways + x_iterative = M2_iterative_solver @ rhs + x_direct = M2_direct_solver @ rhs + + # assert rhs_iterative is within the tolerance close to rhs, and so is rhs_direct + rhs_iterative = M2 @ x_iterative + rhs_direct = M2 @ x_direct + assert np.linalg.norm((rhs-rhs_iterative).toarray()) < tol + assert np.linalg.norm((rhs-rhs_direct).toarray()) < tol + #=============================================================================== if __name__ == '__main__':