Apply clang format

lattice · Dec 15, 2023 · bad098a · bad098a
1 parent fc3ea42
commit bad098a
Show file tree

Hide file tree

Showing 10 changed files with 42 additions and 30 deletions.
diff --git a/include/kernels/block_transpose.cuh b/include/kernels/block_transpose.cuh
@@ -48,10 +48,11 @@ namespace quda
     static constexpr const char *filename() { return KERNEL_FILE; }
 
     struct CacheDims {
-      static constexpr dim3 dims(dim3 block) {
-	block.x += 1;
-	block.z = 1;
-	return block;
+      static constexpr dim3 dims(dim3 block)
+      {
+        block.x += 1;
+        block.z = 1;
+        return block;
       }
     };
 

diff --git a/include/kernels/coarse_op_kernel.cuh b/include/kernels/coarse_op_kernel.cuh
@@ -1388,9 +1388,10 @@ namespace quda {
   };
 
   template <> struct storeCoarseSharedAtomic_impl<true> {
-    template <typename Arg> using CacheT =
-      complex<storeType>[Arg::max_color_height_per_block][Arg::max_color_width_per_block][4][Arg::coarseSpin][Arg::coarseSpin];
-    template <typename Arg> using Cache = SharedMemoryCache<CacheT<Arg>,DimsStatic<2,1,1>>;
+    template <typename Arg>
+    using CacheT = complex<storeType>[Arg::max_color_height_per_block][Arg::max_color_width_per_block][4]
+                                     [Arg::coarseSpin][Arg::coarseSpin];
+    template <typename Arg> using Cache = SharedMemoryCache<CacheT<Arg>, DimsStatic<2, 1, 1>>;
 
     template <typename VUV, typename Pack, typename Arg>
     inline __device__ void operator()(VUV &vuv, bool isDiagonal, int coarse_x_cb, int coarse_parity, int i0, int j0, int parity, const Pack &pack, const Arg &arg)

diff --git a/include/kernels/color_spinor_pack.cuh b/include/kernels/color_spinor_pack.cuh
@@ -175,12 +175,13 @@ namespace quda {
     template <typename Arg> struct CacheDims {
       static constexpr int Ms = spins_per_thread<true>(Arg::nSpin);
       static constexpr int Mc = colors_per_thread<true>(Arg::nColor);
-      static constexpr int color_spin_threads = (Arg::nSpin/Ms) * (Arg::nColor/Mc);
-      static constexpr dim3 dims(dim3 block) {
-	// pad the shared block size to avoid bank conflicts for native ordering
-	if (Arg::is_native) block.x = ((block.x + device::warp_size() - 1) / device::warp_size()) * device::warp_size();
-	block.y = color_spin_threads; // state the y block since we know it at compile time
-	return block;
+      static constexpr int color_spin_threads = (Arg::nSpin / Ms) * (Arg::nColor / Mc);
+      static constexpr dim3 dims(dim3 block)
+      {
+        // pad the shared block size to avoid bank conflicts for native ordering
+        if (Arg::is_native) block.x = ((block.x + device::warp_size() - 1) / device::warp_size()) * device::warp_size();
+        block.y = color_spin_threads; // state the y block since we know it at compile time
+        return block;
       }
     };
 

diff --git a/include/kernels/gauge_stout.cuh b/include/kernels/gauge_stout.cuh
@@ -135,8 +135,8 @@ namespace quda
       }
 
       Link U, Q;
-      ThreadLocalCache<Link,0,computeStapleRectangleOps> Stap;
-      ThreadLocalCache<Link,0,decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
+      ThreadLocalCache<Link, 0, computeStapleRectangleOps> Stap;
+      ThreadLocalCache<Link, 0, decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
 
       // This function gets stap = S_{mu,nu} i.e., the staple of length 3,
       // and the 1x2 and 2x1 rectangles of length 5. From the following paper:

diff --git a/include/kernels/gauge_wilson_flow.cuh b/include/kernels/gauge_wilson_flow.cuh
@@ -72,8 +72,8 @@ namespace quda
       // This function gets stap = S_{mu,nu} i.e., the staple of length 3,
       // and the 1x2 and 2x1 rectangles of length 5. From the following paper:
       // https://arxiv.org/abs/0801.1165
-      ThreadLocalCache<Link,0,computeStapleRectangleOps> Stap;
-      ThreadLocalCache<Link,0,decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
+      ThreadLocalCache<Link, 0, computeStapleRectangleOps> Stap;
+      ThreadLocalCache<Link, 0, decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
       computeStapleRectangle(arg, x, arg.E, parity, dir, Stap, Rect, Arg::wflow_dim);
       Z = arg.coeff1x1 * static_cast<const Link &>(Stap) + arg.coeff2x1 * static_cast<const Link &>(Rect);
       break;

diff --git a/include/kernels/hisq_paths_force.cuh b/include/kernels/hisq_paths_force.cuh
@@ -387,7 +387,9 @@ namespace quda {
             2 multiplies, 1 add, 1 rescale
       */
       template <typename LinkCache>
-      __device__ __host__ inline void lepage_force(int x[4], int point_a, int parity_a, Link &force_mu, LinkCache &Uab_cache) {
+      __device__ __host__ inline void lepage_force(int x[4], int point_a, int parity_a, Link &force_mu,
+                                                   LinkCache &Uab_cache)
+      {
         int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
         int parity_b = 1 - parity_a;
 
@@ -707,7 +709,8 @@ namespace quda {
             4 multiplies, 2 adds, 2 rescales
       */
       template <typename LinkCache>
-      __device__ __host__ inline void all_link(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
+      __device__ __host__ inline void all_link(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
+      {
         auto mycoeff_seven = parity_sign<typename Arg::real>(parity_a) * coeff_sign<sig_positive, typename Arg::real>(parity_a) * arg.coeff_seven;
 
         int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
@@ -801,7 +804,6 @@ namespace quda {
           force_sig = mm_add(mycoeff_seven * Oz, Od * Uda, force_sig);
           Matrix_cache.save(force_sig, 2);
         }
-
       }
 
       /**
@@ -820,7 +822,8 @@ namespace quda {
             2 multiplies, 2 adds, 2 rescales
       */
       template <typename LinkCache>
-      __device__ __host__ inline void side_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
+      __device__ __host__ inline void side_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
+      {
         int y[4] = {x[0], x[1], x[2], x[3]};
         int point_h = updateCoordExtendedIndexShiftMILC<flip_dir(nu_positive)>(y, arg.nu, arg);
         int parity_h = 1 - parity_a;
@@ -873,7 +876,8 @@ namespace quda {
             1 multiply, 1 add, 1 rescale
       */
       template <typename LinkCache>
-      __device__ __host__ inline void middle_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
+      __device__ __host__ inline void middle_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
+      {
         int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
         int parity_b = 1 - parity_a;
 
@@ -959,8 +963,8 @@ namespace quda {
         int parity_a = parity;
 
         // calculate p5_sig
-	constexpr int cacheLen = sig_positive ? 3 : 2;
-        ThreadLocalCache<Link,cacheLen> Matrix_cache;
+        constexpr int cacheLen = sig_positive ? 3 : 2;
+        ThreadLocalCache<Link, cacheLen> Matrix_cache;
 
         if constexpr (sig_positive) {
           Link force_sig = arg.force(arg.sig, point_a, parity_a);

diff --git a/lib/dslash5_domain_wall.cu b/lib/dslash5_domain_wall.cu
@@ -72,8 +72,10 @@ namespace quda
           && (type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS
               || type == Dslash5Type::M5_INV_ZMOBIUS)) {
         // spin components in shared depend on inversion algorithm
-	bool isInv = type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS || type == Dslash5Type::M5_INV_ZMOBIUS;
-        int nSpin = (!isInv || mobius_m5::var_inverse()) ? mobius_m5::use_half_vector() ? in.Nspin() / 2 : in.Nspin() : in.Nspin();
+        bool isInv = type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS
+          || type == Dslash5Type::M5_INV_ZMOBIUS;
+        int nSpin = (!isInv || mobius_m5::var_inverse()) ? mobius_m5::use_half_vector() ? in.Nspin() / 2 : in.Nspin() :
+                                                           in.Nspin();
         return 2 * nSpin * nColor * sizeof(typename mapper<Float>::type);
       } else {
         return 0;

diff --git a/lib/dslash_clover_helper.cu b/lib/dslash_clover_helper.cu
@@ -77,9 +77,9 @@ namespace quda {
     unsigned int sharedBytesPerThread() const
     {
       if (in.TwistFlavor() == QUDA_TWIST_SINGLET) {
-	return 0;
+        return 0;
       } else {
-	return (in.Nspin() / 2) * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type);
+        return (in.Nspin() / 2) * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type);
       }
     }
 

diff --git a/lib/gauge_loop_trace.cu b/lib/gauge_loop_trace.cu
@@ -47,7 +47,8 @@ namespace quda {
       return (p.count * mat_mul_flops + p.num_paths * (2 * Nc + 2)) * u.Volume();
     }
 
-    long long bytes() const override {
+    long long bytes() const override
+    {
       // links * one LatticeColorMatrix worth of data
       return p.count * u.Bytes() / 4;
     }

diff --git a/lib/gauge_wilson_flow.cu b/lib/gauge_wilson_flow.cu
@@ -25,7 +25,9 @@ namespace quda {
     unsigned int sharedBytesPerThread() const
     {
       // use ThreadLocalCache if using Symanzik improvement for two Link fields
-      return (wflow_type == QUDA_GAUGE_SMEAR_SYMANZIK_FLOW ? 2 * in.Ncolor() * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type) : 0)
+      return (wflow_type == QUDA_GAUGE_SMEAR_SYMANZIK_FLOW ?
+                2 * in.Ncolor() * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type) :
+                0)
         + 4 * sizeof(int); // for thread_array
     }