From f3f6733ca3b98fd0b73fd1fa27e661a20087bc73 Mon Sep 17 00:00:00 2001
From: Julian Owezarek <julian.owezarek@tum.de>
Date: Tue, 26 Sep 2023 13:57:31 +0200
Subject: [PATCH 01/13] add kernel

---
 psydac/core/field_evaluation_kernels.py | 26 +++++++++++++++++++++++++
 psydac/fem/tensor.py                    |  7 +++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/psydac/core/field_evaluation_kernels.py b/psydac/core/field_evaluation_kernels.py
index b7972ba0a..56d7f9633 100644
--- a/psydac/core/field_evaluation_kernels.py
+++ b/psydac/core/field_evaluation_kernels.py
@@ -4,6 +4,32 @@
 # =============================================================================
 # Field evaluation functions
 # =============================================================================
+# -----------------------------------------------------------------------------
+# 0: Evaluation of single 3d field at single point
+# -----------------------------------------------------------------------------
+@template(name='T1', types=['float[:,:,:]', 'complex[:,:,:]'])
+@template(name='T2', types=['float[:]', 'complex[:]'])
+def eval_field_3d_once(local_coeffs: 'T1', 
+                       local_bases_0: 'T2', local_bases_1: 'T2', local_bases_2: 'T2'):
+    """
+    Parameters
+    ----------
+    local_coeffs: ndarray of floats
+        Active (local) coefficients of the fields in all directions
+
+    local_bases: list of ndarrays
+        Active (local) 1D-basis functions values at the point of evaluation. 
+    """
+    res = local_bases_0[0] - local_bases_0[0]
+
+    for i, c1 in enumerate(local_coeffs):
+        for j, c2 in enumerate(c1):
+            for k, c3 in enumerate(c2):
+                res += c3 * local_bases_0[i] * local_bases_1[j] * local_bases_2[k]
+
+    return res
+
+
 # -----------------------------------------------------------------------------
 # 1: Regular tensor grid without weight
 # -----------------------------------------------------------------------------
diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index e2e363e2d..1a74263ad 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -38,7 +38,8 @@
                                                   eval_fields_3d_no_weights,
                                                   eval_fields_3d_irregular_no_weights,
                                                   eval_fields_3d_weighted,
-                                                  eval_fields_3d_irregular_weighted)
+                                                  eval_fields_3d_irregular_weighted,
+                                                  eval_field_3d_once)
 
 __all__ = ('TensorFemSpace',)
 
@@ -230,9 +231,7 @@ def eval_field( self, field, *eta, weights=None):
         #   - Pros: small number of Python iterations = ldim
         #   - Cons: we create ldim-1 temporary objects of decreasing size
         #
-        res = coeffs
-        for basis in bases[::-1]:
-            res = np.dot( res, basis )
+        res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
 
 #        # Option 2: cycle over each element of 'coeffs' (touched only once)
 #        #   - Pros: no temporary objects are created

From e6a8f5d58f2ef34174fa0869f62d9380e5a22273 Mon Sep 17 00:00:00 2001
From: Julian Owezarek <julian.owezarek@tum.de>
Date: Tue, 26 Sep 2023 17:59:32 +0200
Subject: [PATCH 02/13] fix dimension error

---
 psydac/fem/tensor.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 1a74263ad..6fa53f2b0 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -231,7 +231,12 @@ def eval_field( self, field, *eta, weights=None):
         #   - Pros: small number of Python iterations = ldim
         #   - Cons: we create ldim-1 temporary objects of decreasing size
         #
-        res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+        if len(bases) == 3:
+            res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+        else:
+            res = coeffs
+            for basis in bases[::-1]:
+                res = np.dot( res, basis )
 
 #        # Option 2: cycle over each element of 'coeffs' (touched only once)
 #        #   - Pros: no temporary objects are created

From 6f956d96a9ec113f6c269075f7ea7e79c1b8a44f Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 09:20:29 +0100
Subject: [PATCH 03/13] update evaluation method

---
 psydac/fem/tensor.py | 247 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 222 insertions(+), 25 deletions(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 6fa53f2b0..c9fdfc43c 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -31,6 +31,10 @@
                                    cell_index,
                                    basis_ders_on_irregular_grid)
 
+from psydac.core.bsplines_kernels import (find_span_p,
+                                          basis_funs_p,
+                                          basis_funs_1st_der_p)
+
 from psydac.core.field_evaluation_kernels import (eval_fields_2d_no_weights,
                                                   eval_fields_2d_irregular_no_weights,
                                                   eval_fields_2d_weighted,
@@ -129,6 +133,9 @@ def __init__(self, domain_decomposition, *spaces, vector_space=None, cart=None,
         self._global_element_starts = domain_decomposition.global_element_starts
         self._global_element_ends   = domain_decomposition.global_element_ends
 
+        # computes once and for all ldim
+        self._ldim = sum([V.ldim for V in self.spaces])
+
         self.set_refined_space(self.ncells, self)
     #--------------------------------------------------------------------------
     # Abstract interface: read-only attributes
@@ -137,7 +144,7 @@ def __init__(self, domain_decomposition, *spaces, vector_space=None, cart=None,
     def ldim( self ):
         """ Parametric dimension.
         """
-        return sum([V.ldim for V in self.spaces])
+        return self._ldim
 
     @property
     def periodic(self):
@@ -184,19 +191,27 @@ def eval_field( self, field, *eta, weights=None):
         if weights:
             assert weights.space == field.coeffs.space
 
-        bases = []
         index = []
+        if weights:
+            w_index = []
+
+        if not hasattr(self,'_tmp_coeffs'):
+            self._tmp_coeffs = np.zeros([s.degree +1 for s in self.spaces])
+        if not hasattr(self,'_tmp_bf'):
+            self._tmp_bf = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+
+        coeffs = self._tmp_coeffs
 
         # Necessary if vector coeffs is distributed across processes
         if not field.coeffs.ghost_regions_in_sync:
             field.coeffs.update_ghost_regions()
 
-        for (x, xlim, space) in zip( eta, self.eta_lims, self.spaces ):
+        for i, (x, xlim, space) in enumerate(zip( eta, self.eta_lims, self.spaces )):
 
             knots  = space.knots
             degree = space.degree
-            span   =  find_span( knots, degree, x )
-
+            span   =  find_span_p( knots, degree, x )
+            vs = space.vector_space
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
             #-------------------------------------------------#
@@ -206,7 +221,9 @@ def eval_field( self, field, *eta, weights=None):
             if x == xlim[1] and x != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
-            basis = basis_funs( knots, degree, x, span)
+            
+            basis = self._tmp_bf[i]
+            basis_funs_p( knots, degree, x, span, out = basis)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
@@ -215,14 +232,18 @@ def eval_field( self, field, *eta, weights=None):
             # Determine local span
             wrap_x   = space.periodic and x > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
-
-            bases.append( basis )
-            index.append( slice( loc_span-degree, loc_span+1 ) )
+            start = vs.starts[0]
+            pad = vs.pads[0]
+            shift = vs.shifts[0]
+            index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
+            if weights:
+                w_index.append(slice(loc_span-degree, loc_span+1))
         # Get contiguous copy of the spline coefficients required for evaluation
+        bases = self._tmp_bf
         index  = tuple( index )
-        coeffs = field.coeffs[index].copy()
+        coeffs[:] = field.coeffs._data[index]
         if weights:
-            coeffs *= weights[index]
+            coeffs *= weights[w_index]
 
         # Evaluation of multi-dimensional spline
         # TODO: optimize
@@ -248,6 +269,82 @@ def eval_field( self, field, *eta, weights=None):
 #            res    += c * ndbasis
 
         return res
+    
+    def eval_fields_one_point( self, fields, *eta, weights=None):
+
+        for field in fields :
+            assert isinstance( field, FemField )
+            assert field.space is self
+            if weights:
+                assert weights.space == field.coeffs.space
+            if not field.coeffs.ghost_regions_in_sync:
+                # Necessary if vector coeffs is distributed across processes
+                field.coeffs.update_ghost_regions()
+
+        assert len( eta ) == self.ldim
+        
+        if not hasattr(self,'_tmp_coeffs'):
+            self._tmp_coeffs = np.zeros([s.degree +1 for s in self.spaces])
+        if not hasattr(self,'_tmp_bf'):
+            self._tmp_bf = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+
+        coeffs = self._tmp_coeffs
+
+        index = []
+        if weights:
+            w_index = []
+        
+        for i, (x, xlim, space) in enumerate(zip( eta, self.eta_lims, self.spaces )):
+
+            knots  = space.knots
+            degree = space.degree
+            span   =  find_span_p( knots, degree, x )
+            vs = space.vector_space
+
+            #-------------------------------------------------#
+            # Fix span for boundaries between subdomains      #
+            #-------------------------------------------------#
+            # TODO: Use local knot sequence instead of global #
+            #       one to get correct span in all situations #
+            #-------------------------------------------------#
+            if x == xlim[1] and x != knots[-1-degree]:
+                span -= 1
+            #-------------------------------------------------#
+            basis = self._tmp_bf[i]
+            basis_funs_p( knots, degree, x, span, out = basis)
+
+            # If needed, rescale B-splines to get M-splines
+            if space.basis == 'M':
+                basis *= space.scaling_array[span-degree : span+1]
+
+            # Determine local span
+            wrap_x   = space.periodic and x > xlim[1]
+            loc_span = span - space.nbasis if wrap_x else span
+            start = vs.starts[0]
+            pad = vs.pads[0]
+            shift = vs.shifts[0]
+            index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
+            if weights:
+                w_index.append(slice(loc_span-degree, loc_span+1))
+        
+        bases = self._tmp_bf
+        res_tot = []
+        for field in fields:
+            index  = tuple( index )
+            # Get contiguous copy of the spline coefficients required for evaluation
+            coeffs[:] = field.coeffs._data[index]
+            if weights:
+                coeffs *= weights[w_index]
+
+            if len(bases) == 3:
+                res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+            else:
+                res = coeffs
+                for basis in bases[::-1]:
+                    res = np.dot( res, basis )
+            res_tot.append(res)
+
+        return res_tot
 
     # ...
     def preprocess_regular_tensor_grid(self, grid, der=0, overlap=0):
@@ -618,15 +715,24 @@ def eval_field_gradient( self, field, *eta , weights=None):
         assert field.space is self
         assert len( eta ) == self.ldim
 
-        bases_0 = []
-        bases_1 = []
         index   = []
+        if weights:
+            w_index = []
+
+        if not hasattr(self,'_tmp_coeffs'):
+            self._tmp_coeffs = np.zeros([s.degree +1 for s in self.spaces])
+        if not hasattr(self,'_tmp_bf'):
+            self._tmp_bf = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+        if not hasattr(self,'_tmp_bf1d'):
+            self._tmp_bf1d = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+        coeffs = self._tmp_coeffs
 
-        for (x, xlim, space) in zip( eta, self.eta_lims, self.spaces ):
+        for i, (x, xlim, space) in enumerate(zip( eta, self.eta_lims, self.spaces )):
 
             knots   = space.knots
             degree  = space.degree
-            span    =  find_span( knots, degree, x )
+            span    =  find_span_p( knots, degree, x )
+            vs = space.vector_space
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
             #-------------------------------------------------#
@@ -636,8 +742,11 @@ def eval_field_gradient( self, field, *eta , weights=None):
             if x == xlim[1] and x != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
-            basis_0 = basis_funs(knots, degree, x, span)
-            basis_1 = basis_funs_1st_der(knots, degree, x, span)
+
+            basis_0 = self._tmp_bf[i]
+            basis_1 = self._tmp_bf1d[i]
+            basis_funs_p( knots, degree, x, span, out = basis_0)
+            basis_funs_1st_der_p(knots, degree, x, span, out = basis_1)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
@@ -649,26 +758,114 @@ def eval_field_gradient( self, field, *eta , weights=None):
             wrap_x   = space.periodic and x > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
 
-            bases_0.append( basis_0 )
-            bases_1.append( basis_1 )
-            index.append( slice( loc_span-degree, loc_span+1 ) )
-
+            start = vs.starts[0]
+            pad = vs.pads[0]
+            shift = vs.shifts[0]
+            index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
+            if weights:
+                w_index.append(slice( loc_span-degree, loc_span+1))
         # Get contiguous copy of the spline coefficients required for evaluation
+        bases_0 = self._tmp_bf
+        bases_1 = self._tmp_bf1d
         index  = tuple( index )
-        coeffs = field.coeffs[index].copy()
+        coeffs[:] = field.coeffs._data[index]
         if weights:
-            coeffs *=  weights[index]
+            coeffs *=  weights[w_index]
 
         # Evaluate each component of the gradient using algorithm described in "Option 1" above
         grad = []
         for d in range( self.ldim ):
             bases = [(bases_1[d] if i==d else bases_0[i]) for i in range( self.ldim )]
             res   = coeffs
-            for basis in bases[::-1]:
-                res = np.dot( res, basis )
+            if len(bases) == 3:
+                res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+            else :
+                for basis in bases[::-1]:
+                    res = np.dot( res, basis )
             grad.append( res )
 
         return grad
+    
+    def eval_fields_gradient_one_point( self, fields, *eta , weights=None):
+
+        for field in fields:
+            assert isinstance( field, FemField )
+            assert field.space is self
+        assert len( eta ) == self.ldim
+
+        index   = []
+        if weights:
+            w_index = []
+
+        if not hasattr(self,'_tmp_coeffs'):
+            self._tmp_coeffs = np.zeros([s.degree +1 for s in self.spaces])
+        if not hasattr(self,'_tmp_bf'):
+            self._tmp_bf = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+        if not hasattr(self,'_tmp_bf1d'):
+            self._tmp_bf1d = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+        coeffs = self._tmp_coeffs
+
+        for i, (x, xlim, space) in enumerate(zip( eta, self.eta_lims, self.spaces )):
+
+            knots   = space.knots
+            degree  = space.degree
+            span    =  find_span_p( knots, degree, x )
+            vs = space.vector_space
+            #-------------------------------------------------#
+            # Fix span for boundaries between subdomains      #
+            #-------------------------------------------------#
+            # TODO: Use local knot sequence instead of global #
+            #       one to get correct span in all situations #
+            #-------------------------------------------------#
+            if x == xlim[1] and x != knots[-1-degree]:
+                span -= 1
+            #-------------------------------------------------#
+            basis_0 = self._tmp_bf[i]
+            basis_1 = self._tmp_bf1d[i]
+            basis_funs_p( knots, degree, x, span, out = basis_0)
+            basis_funs_1st_der_p(knots, degree, x, span, out = basis_1)
+
+            # If needed, rescale B-splines to get M-splines
+            if space.basis == 'M':
+                scaling  = space.scaling_array[span-degree : span+1]
+                basis_0 *= scaling
+                basis_1 *= scaling
+
+            # Determine local span
+            wrap_x   = space.periodic and x > xlim[1]
+            loc_span = span - space.nbasis if wrap_x else span
+
+            start = vs.starts[0]
+            pad = vs.pads[0]
+            shift = vs.shifts[0]
+            index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
+            if weights:
+                w_index.append(slice( loc_span-degree, loc_span+1))
+        
+        index  = tuple( index )
+        bases_0 = self._tmp_bf
+        bases_1 = self._tmp_bf1d
+        grad_tot = []
+
+        for field in fields:
+            # Get contiguous copy of the spline coefficients required for evaluation
+            coeffs[:] = field.coeffs._data[index]
+            if weights:
+                coeffs *=  weights[w_index]
+
+            # Evaluate each component of the gradient using algorithm described in "Option 1" above
+            
+            grad_tot.append([])
+            for d in range( self.ldim ):
+                bases = [(bases_1[d] if i==d else bases_0[i]) for i in range( self.ldim )]
+                if len(bases) == 3:
+                    res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+                else :
+                    res   = coeffs
+                    for basis in bases[::-1]:
+                        res = np.dot( res, basis )
+                grad_tot[-1].append( res )
+        return grad_tot
 
     # ...
     def integral( self, f ):

From 33eb0bf5d6990e9261f5fc46eb03126f13f5ba1b Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 09:22:03 +0100
Subject: [PATCH 04/13] use new method in geometry

---
 psydac/mapping/discrete.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/psydac/mapping/discrete.py b/psydac/mapping/discrete.py
index 636b46b0f..39fc90c7c 100644
--- a/psydac/mapping/discrete.py
+++ b/psydac/mapping/discrete.py
@@ -133,11 +133,11 @@ def from_control_points(cls, tensor_space, control_points):
     # Abstract interface
     #--------------------------------------------------------------------------
     def __call__(self, *eta):
-        return [map_Xd(*eta) for map_Xd in self._fields]
+        return self.space.eval_fields_one_point(self._fields, *eta)
 
     # ...
     def jacobian(self, *eta):
-        return np.array([map_Xd.gradient(*eta) for map_Xd in self._fields])
+        return np.array(self.space.eval_fields_gradient_one_point(self._fields, *eta))
 
     # ...
     def jacobian_inv(self, *eta):

From 56c45d8425973b87bc52d7ca54ef067a607f5df7 Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 11:35:06 +0100
Subject: [PATCH 05/13] convert index to tuple to avoid bug

---
 psydac/core/tests/test_field_evaluation_kernel.py | 3 +++
 psydac/fem/tensor.py                              | 1 +
 2 files changed, 4 insertions(+)

diff --git a/psydac/core/tests/test_field_evaluation_kernel.py b/psydac/core/tests/test_field_evaluation_kernel.py
index 92dc8e0fd..abdd71655 100644
--- a/psydac/core/tests/test_field_evaluation_kernel.py
+++ b/psydac/core/tests/test_field_evaluation_kernel.py
@@ -642,3 +642,6 @@ def test_pushforwards_hcurl(ldim):
         pushforward_3d_hcurl(field_to_push, inv_jacobians, out)
 
     assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+
+if __name__ == '__main__':
+    test_regular_jacobians('bent_pipe.h5', 2)
diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index c9fdfc43c..9e716f8be 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -242,6 +242,7 @@ def eval_field( self, field, *eta, weights=None):
         bases = self._tmp_bf
         index  = tuple( index )
         coeffs[:] = field.coeffs._data[index]
+        w_index = tuple(w_index)
         if weights:
             coeffs *= weights[w_index]
 

From bca8e40fb219340b1192c8e352b281d1c8bd2d2c Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 15:33:42 +0100
Subject: [PATCH 06/13] fix weights

---
 psydac/fem/tensor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 9e716f8be..05eb88cb1 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -241,9 +241,9 @@ def eval_field( self, field, *eta, weights=None):
         # Get contiguous copy of the spline coefficients required for evaluation
         bases = self._tmp_bf
         index  = tuple( index )
-        coeffs[:] = field.coeffs._data[index]
-        w_index = tuple(w_index)
+        coeffs[:] = field.coeffs._data[index]        
         if weights:
+            w_index = tuple(w_index)
             coeffs *= weights[w_index]
 
         # Evaluation of multi-dimensional spline

From b7ccd51902db9337480afcb2b61a1c56695f5e4b Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 16:36:47 +0100
Subject: [PATCH 07/13] other weight bug...

---
 psydac/fem/tensor.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 05eb88cb1..a071809f2 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -771,6 +771,7 @@ def eval_field_gradient( self, field, *eta , weights=None):
         index  = tuple( index )
         coeffs[:] = field.coeffs._data[index]
         if weights:
+            w_index = tuple(w_index)
             coeffs *=  weights[w_index]
 
         # Evaluate each component of the gradient using algorithm described in "Option 1" above

From bcd6c4d347ea4cf24fa08cd15549fbb5506cab04 Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Wed, 6 Mar 2024 18:28:01 +0100
Subject: [PATCH 08/13] check type before kernels to avoid wrong type

---
 .../tests/test_field_evaluation_kernel.py     |  1 +
 psydac/fem/tensor.py                          | 45 +++++++++++--------
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/psydac/core/tests/test_field_evaluation_kernel.py b/psydac/core/tests/test_field_evaluation_kernel.py
index abdd71655..8304bd959 100644
--- a/psydac/core/tests/test_field_evaluation_kernel.py
+++ b/psydac/core/tests/test_field_evaluation_kernel.py
@@ -645,3 +645,4 @@ def test_pushforwards_hcurl(ldim):
 
 if __name__ == '__main__':
     test_regular_jacobians('bent_pipe.h5', 2)
+    test_regular_evaluations([np.sort(np.concatenate((np.zeros(3), np.random.random(9), np.ones(3)))) for i in range(2)], 2, [2] * 2,2)
diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index a071809f2..303db460e 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -210,7 +210,9 @@ def eval_field( self, field, *eta, weights=None):
 
             knots  = space.knots
             degree = space.degree
-            span   =  find_span_p( knots, degree, x )
+            # be sure it's the right type for kernels
+            fx = float(x)
+            span   =  find_span_p( knots, degree, fx )
             vs = space.vector_space
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
@@ -218,19 +220,19 @@ def eval_field( self, field, *eta, weights=None):
             # TODO: Use local knot sequence instead of global #
             #       one to get correct span in all situations #
             #-------------------------------------------------#
-            if x == xlim[1] and x != knots[-1-degree]:
+            if fx == xlim[1] and fx != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
             
             basis = self._tmp_bf[i]
-            basis_funs_p( knots, degree, x, span, out = basis)
+            basis_funs_p( knots, degree, fx, span, out = basis)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
                 basis *= space.scaling_array[span-degree : span+1]
 
             # Determine local span
-            wrap_x   = space.periodic and x > xlim[1]
+            wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
             start = vs.starts[0]
             pad = vs.pads[0]
@@ -299,7 +301,9 @@ def eval_fields_one_point( self, fields, *eta, weights=None):
 
             knots  = space.knots
             degree = space.degree
-            span   =  find_span_p( knots, degree, x )
+            # be sure it's the right type for kernels
+            fx = float(x)
+            span   =  find_span_p( knots, degree, fx )
             vs = space.vector_space
 
             #-------------------------------------------------#
@@ -308,18 +312,18 @@ def eval_fields_one_point( self, fields, *eta, weights=None):
             # TODO: Use local knot sequence instead of global #
             #       one to get correct span in all situations #
             #-------------------------------------------------#
-            if x == xlim[1] and x != knots[-1-degree]:
+            if fx == xlim[1] and fx != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
             basis = self._tmp_bf[i]
-            basis_funs_p( knots, degree, x, span, out = basis)
+            basis_funs_p( knots, degree, fx, span, out = basis)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
                 basis *= space.scaling_array[span-degree : span+1]
 
             # Determine local span
-            wrap_x   = space.periodic and x > xlim[1]
+            wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
             start = vs.starts[0]
             pad = vs.pads[0]
@@ -732,7 +736,9 @@ def eval_field_gradient( self, field, *eta , weights=None):
 
             knots   = space.knots
             degree  = space.degree
-            span    =  find_span_p( knots, degree, x )
+            # be sure it's the right type for kernels
+            fx = float(x)
+            span    =  find_span_p( knots, degree, fx )
             vs = space.vector_space
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
@@ -740,14 +746,15 @@ def eval_field_gradient( self, field, *eta , weights=None):
             # TODO: Use local knot sequence instead of global #
             #       one to get correct span in all situations #
             #-------------------------------------------------#
-            if x == xlim[1] and x != knots[-1-degree]:
+            if fx == xlim[1] and fx != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
 
             basis_0 = self._tmp_bf[i]
             basis_1 = self._tmp_bf1d[i]
-            basis_funs_p( knots, degree, x, span, out = basis_0)
-            basis_funs_1st_der_p(knots, degree, x, span, out = basis_1)
+            
+            basis_funs_p( knots, degree, fx, span, out = basis_0)
+            basis_funs_1st_der_p(knots, degree, fx, span, out = basis_1)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
@@ -756,7 +763,7 @@ def eval_field_gradient( self, field, *eta , weights=None):
                 basis_1 *= scaling
 
             # Determine local span
-            wrap_x   = space.periodic and x > xlim[1]
+            wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
 
             start = vs.starts[0]
@@ -811,7 +818,9 @@ def eval_fields_gradient_one_point( self, fields, *eta , weights=None):
 
             knots   = space.knots
             degree  = space.degree
-            span    =  find_span_p( knots, degree, x )
+            # be sure it's the right type for kernels
+            fx = float(x)
+            span    =  find_span_p( knots, degree, fx )
             vs = space.vector_space
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
@@ -819,13 +828,13 @@ def eval_fields_gradient_one_point( self, fields, *eta , weights=None):
             # TODO: Use local knot sequence instead of global #
             #       one to get correct span in all situations #
             #-------------------------------------------------#
-            if x == xlim[1] and x != knots[-1-degree]:
+            if fx == xlim[1] and fx != knots[-1-degree]:
                 span -= 1
             #-------------------------------------------------#
             basis_0 = self._tmp_bf[i]
             basis_1 = self._tmp_bf1d[i]
-            basis_funs_p( knots, degree, x, span, out = basis_0)
-            basis_funs_1st_der_p(knots, degree, x, span, out = basis_1)
+            basis_funs_p( knots, degree, fx, span, out = basis_0)
+            basis_funs_1st_der_p(knots, degree, fx, span, out = basis_1)
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
@@ -834,7 +843,7 @@ def eval_fields_gradient_one_point( self, fields, *eta , weights=None):
                 basis_1 *= scaling
 
             # Determine local span
-            wrap_x   = space.periodic and x > xlim[1]
+            wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
 
             start = vs.starts[0]

From 6871dcd0126a20cae9f60fe57a987e0d39cd48dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yaman=20G=C3=BC=C3=A7l=C3=BC?= <yaman.guclu@gmail.com>
Date: Fri, 8 Mar 2024 12:25:58 +0100
Subject: [PATCH 09/13] Improve kernel `eval_field_3d_once`

---
 psydac/core/field_evaluation_kernels.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/psydac/core/field_evaluation_kernels.py b/psydac/core/field_evaluation_kernels.py
index d3f611467..87c354ad0 100644
--- a/psydac/core/field_evaluation_kernels.py
+++ b/psydac/core/field_evaluation_kernels.py
@@ -7,10 +7,11 @@
 # -----------------------------------------------------------------------------
 # 0: Evaluation of single 3d field at single point
 # -----------------------------------------------------------------------------
-@template(name='T1', types=['float[:,:,:]', 'complex[:,:,:]'])
-@template(name='T2', types=['float[:]', 'complex[:]'])
-def eval_field_3d_once(local_coeffs: 'T1', 
-                       local_bases_0: 'T2', local_bases_1: 'T2', local_bases_2: 'T2'):
+@template(name='T', types=[float, complex])
+def eval_field_3d_once(local_coeffs : 'T[:,:,:]', 
+                       local_bases_0: 'float[:]',
+                       local_bases_1: 'float[:]',
+                       local_bases_2: 'float[:]'):
     """
     Parameters
     ----------
@@ -20,12 +21,16 @@ def eval_field_3d_once(local_coeffs: 'T1',
     local_bases: list of ndarrays
         Active (local) 1D-basis functions values at the point of evaluation. 
     """
-    res = local_bases_0[0] - local_bases_0[0]
-
-    for i, c1 in enumerate(local_coeffs):
-        for j, c2 in enumerate(c1):
-            for k, c3 in enumerate(c2):
-                res += c3 * local_bases_0[i] * local_bases_1[j] * local_bases_2[k]
+    n1, n2, n3 = local_coeffs.shape
+
+    res = local_coeffs[0, 0, 0] - local_coeffs[0, 0, 0]
+    for i1 in range(n1):
+        for i2 in range(n2):
+            for i3 in range(n3):
+                res += (local_coeffs[i1, i2, i3] *
+                        local_bases_0[i1] *
+                        local_bases_1[i2] *
+                        local_bases_2[i3])
 
     return res
 

From 26c69095cfc1d2c08925849503db9a0b331948ad Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Tue, 26 Mar 2024 10:20:42 +0100
Subject: [PATCH 10/13] fix access to starts, shifts and pads from the global
 vector space

---
 psydac/fem/tensor.py                          | 24 +++++++++----------
 psydac/fem/tests/test_spline_interpolation.py |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 303db460e..22931ce3d 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -234,9 +234,9 @@ def eval_field( self, field, *eta, weights=None):
             # Determine local span
             wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
-            start = vs.starts[0]
-            pad = vs.pads[0]
-            shift = vs.shifts[0]
+            start = self.vector_space.starts[i]
+            pad = self.vector_space.pads[i]
+            shift = self.vector_space.shifts[i]
             index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
             if weights:
                 w_index.append(slice(loc_span-degree, loc_span+1))
@@ -325,9 +325,9 @@ def eval_fields_one_point( self, fields, *eta, weights=None):
             # Determine local span
             wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
-            start = vs.starts[0]
-            pad = vs.pads[0]
-            shift = vs.shifts[0]
+            start = self.vector_space.starts[i]
+            pad = self.vector_space.pads[i]
+            shift = self.vector_space.shifts[i]
             index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
             if weights:
                 w_index.append(slice(loc_span-degree, loc_span+1))
@@ -766,9 +766,9 @@ def eval_field_gradient( self, field, *eta , weights=None):
             wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
 
-            start = vs.starts[0]
-            pad = vs.pads[0]
-            shift = vs.shifts[0]
+            start = self.vector_space.starts[i]
+            pad = self.vector_space.pads[i]
+            shift = self.vector_space.shifts[i]
             index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
             if weights:
                 w_index.append(slice( loc_span-degree, loc_span+1))
@@ -846,9 +846,9 @@ def eval_fields_gradient_one_point( self, fields, *eta , weights=None):
             wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
 
-            start = vs.starts[0]
-            pad = vs.pads[0]
-            shift = vs.shifts[0]
+            start = self.vector_space.starts[i]
+            pad = self.vector_space.pads[i]
+            shift = self.vector_space.shifts[i]
             index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
             if weights:
                 w_index.append(slice( loc_span-degree, loc_span+1))
diff --git a/psydac/fem/tests/test_spline_interpolation.py b/psydac/fem/tests/test_spline_interpolation.py
index 5912c9004..41ddcf30e 100644
--- a/psydac/fem/tests/test_spline_interpolation.py
+++ b/psydac/fem/tests/test_spline_interpolation.py
@@ -178,4 +178,4 @@ def test_SplineInterpolation2D_parallel_exact( nc1, nc2, deg1, deg2 ):
 # SCRIPT FUNCTIONALITY
 #===============================================================================
 if __name__ == '__main__':
-    test_SplineInterpolation2D_parallel_exact( 10, 16, 3, 5 )
+    test_SplineInterpolation2D_parallel_exact( 7, 7, 1, 1 )

From acc86a32886f507f4f8f41f49850807ad94e1fc3 Mon Sep 17 00:00:00 2001
From: vcarlier <valentin.carlier@ens.fr>
Date: Tue, 26 Mar 2024 11:15:04 +0100
Subject: [PATCH 11/13] fix allocation of ldim

---
 psydac/fem/tensor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 22931ce3d..2dca8609c 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -87,6 +87,9 @@ def __init__(self, domain_decomposition, *spaces, vector_space=None, cart=None,
         self._spaces = tuple(spaces)
         self._dtype   = dtype
 
+        # computes once and for all ldim
+        self._ldim = sum([V.ldim for V in self.spaces])
+
         if cart is not None:
             self._vector_space = StencilVectorSpace(cart, dtype=dtype)
         elif vector_space is not None:
@@ -133,9 +136,6 @@ def __init__(self, domain_decomposition, *spaces, vector_space=None, cart=None,
         self._global_element_starts = domain_decomposition.global_element_starts
         self._global_element_ends   = domain_decomposition.global_element_ends
 
-        # computes once and for all ldim
-        self._ldim = sum([V.ldim for V in self.spaces])
-
         self.set_refined_space(self.ncells, self)
     #--------------------------------------------------------------------------
     # Abstract interface: read-only attributes

From d6d4617cf04bf88c7b89390f08b38c4a934894cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yaman=20G=C3=BC=C3=A7l=C3=BC?= <yaman.guclu@gmail.com>
Date: Fri, 4 Oct 2024 10:23:26 +0200
Subject: [PATCH 12/13] Fix wrong merge in tensor.py

---
 psydac/fem/tensor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 32d186237..ed524cebc 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -101,7 +101,7 @@ def __init__(self, domain_decomposition, *spaces, vector_space=None, cart=None,
             vector_space = StencilVectorSpace(cart, dtype=dtype)
 
         # Store some info
-        self._ldim                 = sum(V.ldim for V in self.spaces)
+        self._ldim                 = sum(V.ldim for V in spaces)
         self._domain_decomposition = domain_decomposition
         self._spaces               = spaces
         self._dtype                = dtype

From 9963fd667530314ef72d7ec581b73db54a36dfc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yaman=20G=C3=BC=C3=A7l=C3=BC?= <yaman.guclu@gmail.com>
Date: Fri, 4 Oct 2024 11:52:11 +0200
Subject: [PATCH 13/13] Some cleanup in tensor.py

---
 psydac/fem/tensor.py | 72 ++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index ed524cebc..cfc0a89c7 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -169,26 +169,26 @@ def is_product(self):
         return False
 
     @property
-    def symbolic_space( self ):
+    def symbolic_space(self):
         return self._symbolic_space 
 
     @property
-    def interfaces( self ):
+    def interfaces(self):
         return self._interfaces_readonly
 
     @symbolic_space.setter
-    def symbolic_space( self, symbolic_space ):
+    def symbolic_space(self, symbolic_space):
         assert isinstance(symbolic_space, BasicFunctionSpace)
         self._symbolic_space = symbolic_space
 
     #--------------------------------------------------------------------------
     # Abstract interface: evaluation methods
     #--------------------------------------------------------------------------
-    def eval_field( self, field, *eta, weights=None):
+    def eval_field(self, field, *eta, weights=None):
 
-        assert isinstance( field, FemField )
+        assert isinstance(field, FemField)
         assert field.space is self
-        assert len( eta ) == self.ldim
+        assert len(eta) == self.ldim
         if weights:
             assert weights.space == field.coeffs.space
 
@@ -196,54 +196,68 @@ def eval_field( self, field, *eta, weights=None):
         if weights:
             w_index = []
 
-        if not hasattr(self,'_tmp_coeffs'):
-            self._tmp_coeffs = np.zeros([s.degree +1 for s in self.spaces])
-        if not hasattr(self,'_tmp_bf'):
+        # -------------------------------
+        # Local storage for this function
+        # -------------------------------
+        # [a] One N-dim. array w/ coefficients of non-zero N-dim. tensor-product basis functions
+        if not hasattr(self, '_tmp_coeffs'):
+            self._tmp_coeffs = np.zeros([s.degree + 1 for s in self.spaces])
+        #
+        # [b] List of N 1D arrays w/ values of non-zero 1D basis functions along each direction
+        if not hasattr(self, '_tmp_bf'):
             self._tmp_bf = [np.zeros(s.degree + 1, dtype=float) for s in self.spaces]
+        # -------------------------------
 
+        # Shortcuts
         coeffs = self._tmp_coeffs
+        bases  = self._tmp_bf
 
         # Necessary if vector coeffs is distributed across processes
         if not field.coeffs.ghost_regions_in_sync:
             field.coeffs.update_ghost_regions()
 
-        for i, (x, xlim, space) in enumerate(zip( eta, self.eta_lims, self.spaces )):
+        for i, (x, xlim, space) in enumerate(zip(eta, self.eta_lims, self.spaces)):
 
+            # Shortcuts
             knots  = space.knots
             degree = space.degree
-            # be sure it's the right type for kernels
+
+            # All kernels expect the coordinate 'x' to be a Python float
             fx = float(x)
-            span   =  find_span_p( knots, degree, fx )
-            vs = space.vector_space
+
+            # Compute span, i.e. index of last non-zero B-spline
+            span = find_span_p(knots, degree, fx)
+
             #-------------------------------------------------#
             # Fix span for boundaries between subdomains      #
             #-------------------------------------------------#
             # TODO: Use local knot sequence instead of global #
             #       one to get correct span in all situations #
             #-------------------------------------------------#
-            if fx == xlim[1] and fx != knots[-1-degree]:
+            if fx == xlim[1] and fx != knots[-1 - degree]:
                 span -= 1
             #-------------------------------------------------#
-            
-            basis = self._tmp_bf[i]
-            basis_funs_p( knots, degree, fx, span, out = basis)
+
+            # Compute values of non-zero B-splines
+            basis_funs_p(knots, degree, fx, span, out = bases[i])
 
             # If needed, rescale B-splines to get M-splines
             if space.basis == 'M':
-                basis *= space.scaling_array[span-degree : span+1]
+                bases[i] *= space.scaling_array[span - degree : span + 1]
 
             # Determine local span
             wrap_x   = space.periodic and fx > xlim[1]
             loc_span = span - space.nbasis if wrap_x else span
-            start = self.vector_space.starts[i]
-            pad = self.vector_space.pads[i]
-            shift = self.vector_space.shifts[i]
-            index.append( slice( loc_span-degree-start+shift*pad, loc_span+1-start+shift*pad ) )
+            start    = self.vector_space.starts[i]
+            pad      = self.vector_space.pads  [i]
+            shift    = self.vector_space.shifts[i]
+            index.append(slice(loc_span - degree - start + shift * pad,
+                                    loc_span + 1 - start + shift * pad))
             if weights:
-                w_index.append(slice(loc_span-degree, loc_span+1))
+                w_index.append(slice(loc_span - degree, loc_span + 1))
+
         # Get contiguous copy of the spline coefficients required for evaluation
-        bases = self._tmp_bf
-        index  = tuple( index )
+        index = tuple(index)
         coeffs[:] = field.coeffs._data[index]        
         if weights:
             w_index = tuple(w_index)
@@ -257,19 +271,19 @@ def eval_field( self, field, *eta, weights=None):
         #   - Cons: we create ldim-1 temporary objects of decreasing size
         #
         if len(bases) == 3:
-            res = eval_field_3d_once(coeffs, bases[0], bases[1], bases[2])
+            res = eval_field_3d_once(coeffs, *bases)
         else:
             res = coeffs
             for basis in bases[::-1]:
-                res = np.dot( res, basis )
+                res = np.dot(res, basis)
 
 #        # Option 2: cycle over each element of 'coeffs' (touched only once)
 #        #   - Pros: no temporary objects are created
 #        #   - Cons: large number of Python iterations = number of elements in 'coeffs'
 #        #
 #        res = 0.0
-#        for idx,c in np.ndenumerate( coeffs ):
-#            ndbasis = np.prod( [b[i] for i,b in zip( idx, bases )] )
+#        for idx, c in np.ndenumerate(coeffs):
+#            ndbasis = np.prod([b[i] for i, b in zip(idx, bases)])
 #            res    += c * ndbasis
 
         return res