Skip to content

Commit

Permalink
Apply clang format
Browse files Browse the repository at this point in the history
  • Loading branch information
maddyscientist committed Dec 15, 2023
1 parent fc3ea42 commit bad098a
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 30 deletions.
9 changes: 5 additions & 4 deletions include/kernels/block_transpose.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ namespace quda
static constexpr const char *filename() { return KERNEL_FILE; }

struct CacheDims {
static constexpr dim3 dims(dim3 block) {
block.x += 1;
block.z = 1;
return block;
static constexpr dim3 dims(dim3 block)
{
block.x += 1;
block.z = 1;
return block;
}
};

Expand Down
7 changes: 4 additions & 3 deletions include/kernels/coarse_op_kernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1388,9 +1388,10 @@ namespace quda {
};

template <> struct storeCoarseSharedAtomic_impl<true> {
template <typename Arg> using CacheT =
complex<storeType>[Arg::max_color_height_per_block][Arg::max_color_width_per_block][4][Arg::coarseSpin][Arg::coarseSpin];
template <typename Arg> using Cache = SharedMemoryCache<CacheT<Arg>,DimsStatic<2,1,1>>;
template <typename Arg>
using CacheT = complex<storeType>[Arg::max_color_height_per_block][Arg::max_color_width_per_block][4]
[Arg::coarseSpin][Arg::coarseSpin];
template <typename Arg> using Cache = SharedMemoryCache<CacheT<Arg>, DimsStatic<2, 1, 1>>;

template <typename VUV, typename Pack, typename Arg>
inline __device__ void operator()(VUV &vuv, bool isDiagonal, int coarse_x_cb, int coarse_parity, int i0, int j0, int parity, const Pack &pack, const Arg &arg)
Expand Down
13 changes: 7 additions & 6 deletions include/kernels/color_spinor_pack.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,13 @@ namespace quda {
template <typename Arg> struct CacheDims {
static constexpr int Ms = spins_per_thread<true>(Arg::nSpin);
static constexpr int Mc = colors_per_thread<true>(Arg::nColor);
static constexpr int color_spin_threads = (Arg::nSpin/Ms) * (Arg::nColor/Mc);
static constexpr dim3 dims(dim3 block) {
// pad the shared block size to avoid bank conflicts for native ordering
if (Arg::is_native) block.x = ((block.x + device::warp_size() - 1) / device::warp_size()) * device::warp_size();
block.y = color_spin_threads; // state the y block since we know it at compile time
return block;
static constexpr int color_spin_threads = (Arg::nSpin / Ms) * (Arg::nColor / Mc);
static constexpr dim3 dims(dim3 block)
{
// pad the shared block size to avoid bank conflicts for native ordering
if (Arg::is_native) block.x = ((block.x + device::warp_size() - 1) / device::warp_size()) * device::warp_size();
block.y = color_spin_threads; // state the y block since we know it at compile time
return block;
}
};

Expand Down
4 changes: 2 additions & 2 deletions include/kernels/gauge_stout.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ namespace quda
}

Link U, Q;
ThreadLocalCache<Link,0,computeStapleRectangleOps> Stap;
ThreadLocalCache<Link,0,decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
ThreadLocalCache<Link, 0, computeStapleRectangleOps> Stap;
ThreadLocalCache<Link, 0, decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations

// This function gets stap = S_{mu,nu} i.e., the staple of length 3,
// and the 1x2 and 2x1 rectangles of length 5. From the following paper:
Expand Down
4 changes: 2 additions & 2 deletions include/kernels/gauge_wilson_flow.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ namespace quda
// This function gets stap = S_{mu,nu} i.e., the staple of length 3,
// and the 1x2 and 2x1 rectangles of length 5. From the following paper:
// https://arxiv.org/abs/0801.1165
ThreadLocalCache<Link,0,computeStapleRectangleOps> Stap;
ThreadLocalCache<Link,0,decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
ThreadLocalCache<Link, 0, computeStapleRectangleOps> Stap;
ThreadLocalCache<Link, 0, decltype(Stap)> Rect; // offset by Stap type to ensure non-overlapping allocations
computeStapleRectangle(arg, x, arg.E, parity, dir, Stap, Rect, Arg::wflow_dim);
Z = arg.coeff1x1 * static_cast<const Link &>(Stap) + arg.coeff2x1 * static_cast<const Link &>(Rect);
break;
Expand Down
18 changes: 11 additions & 7 deletions include/kernels/hisq_paths_force.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,9 @@ namespace quda {
2 multiplies, 1 add, 1 rescale
*/
template <typename LinkCache>
__device__ __host__ inline void lepage_force(int x[4], int point_a, int parity_a, Link &force_mu, LinkCache &Uab_cache) {
__device__ __host__ inline void lepage_force(int x[4], int point_a, int parity_a, Link &force_mu,
LinkCache &Uab_cache)
{
int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
int parity_b = 1 - parity_a;

Expand Down Expand Up @@ -707,7 +709,8 @@ namespace quda {
4 multiplies, 2 adds, 2 rescales
*/
template <typename LinkCache>
__device__ __host__ inline void all_link(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
__device__ __host__ inline void all_link(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
{
auto mycoeff_seven = parity_sign<typename Arg::real>(parity_a) * coeff_sign<sig_positive, typename Arg::real>(parity_a) * arg.coeff_seven;

int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
Expand Down Expand Up @@ -801,7 +804,6 @@ namespace quda {
force_sig = mm_add(mycoeff_seven * Oz, Od * Uda, force_sig);
Matrix_cache.save(force_sig, 2);
}

}

/**
Expand All @@ -820,7 +822,8 @@ namespace quda {
2 multiplies, 2 adds, 2 rescales
*/
template <typename LinkCache>
__device__ __host__ inline void side_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
__device__ __host__ inline void side_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
{
int y[4] = {x[0], x[1], x[2], x[3]};
int point_h = updateCoordExtendedIndexShiftMILC<flip_dir(nu_positive)>(y, arg.nu, arg);
int parity_h = 1 - parity_a;
Expand Down Expand Up @@ -873,7 +876,8 @@ namespace quda {
1 multiply, 1 add, 1 rescale
*/
template <typename LinkCache>
__device__ __host__ inline void middle_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache) {
__device__ __host__ inline void middle_five(int x[4], int point_a, int parity_a, LinkCache &Matrix_cache)
{
int point_b = linkExtendedIndexShiftMILC<sig_positive>(x, arg.sig, arg);
int parity_b = 1 - parity_a;

Expand Down Expand Up @@ -959,8 +963,8 @@ namespace quda {
int parity_a = parity;

// calculate p5_sig
constexpr int cacheLen = sig_positive ? 3 : 2;
ThreadLocalCache<Link,cacheLen> Matrix_cache;
constexpr int cacheLen = sig_positive ? 3 : 2;
ThreadLocalCache<Link, cacheLen> Matrix_cache;

if constexpr (sig_positive) {
Link force_sig = arg.force(arg.sig, point_a, parity_a);
Expand Down
6 changes: 4 additions & 2 deletions lib/dslash5_domain_wall.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ namespace quda
&& (type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS
|| type == Dslash5Type::M5_INV_ZMOBIUS)) {
// spin components in shared depend on inversion algorithm
bool isInv = type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS || type == Dslash5Type::M5_INV_ZMOBIUS;
int nSpin = (!isInv || mobius_m5::var_inverse()) ? mobius_m5::use_half_vector() ? in.Nspin() / 2 : in.Nspin() : in.Nspin();
bool isInv = type == Dslash5Type::M5_INV_DWF || type == Dslash5Type::M5_INV_MOBIUS
|| type == Dslash5Type::M5_INV_ZMOBIUS;
int nSpin = (!isInv || mobius_m5::var_inverse()) ? mobius_m5::use_half_vector() ? in.Nspin() / 2 : in.Nspin() :
in.Nspin();
return 2 * nSpin * nColor * sizeof(typename mapper<Float>::type);
} else {
return 0;
Expand Down
4 changes: 2 additions & 2 deletions lib/dslash_clover_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ namespace quda {
unsigned int sharedBytesPerThread() const
{
if (in.TwistFlavor() == QUDA_TWIST_SINGLET) {
return 0;
return 0;
} else {
return (in.Nspin() / 2) * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type);
return (in.Nspin() / 2) * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type);
}
}

Expand Down
3 changes: 2 additions & 1 deletion lib/gauge_loop_trace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ namespace quda {
return (p.count * mat_mul_flops + p.num_paths * (2 * Nc + 2)) * u.Volume();
}

long long bytes() const override {
long long bytes() const override
{
// links * one LatticeColorMatrix worth of data
return p.count * u.Bytes() / 4;
}
Expand Down
4 changes: 3 additions & 1 deletion lib/gauge_wilson_flow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ namespace quda {
unsigned int sharedBytesPerThread() const
{
// use ThreadLocalCache if using Symanzik improvement for two Link fields
return (wflow_type == QUDA_GAUGE_SMEAR_SYMANZIK_FLOW ? 2 * in.Ncolor() * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type) : 0)
return (wflow_type == QUDA_GAUGE_SMEAR_SYMANZIK_FLOW ?
2 * in.Ncolor() * in.Ncolor() * 2 * sizeof(typename mapper<Float>::type) :
0)
+ 4 * sizeof(int); // for thread_array
}

Expand Down

0 comments on commit bad098a

Please sign in to comment.