-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcluster2d_gpu.cuh
78 lines (71 loc) · 2.03 KB
/
cluster2d_gpu.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#pragma once
#include <iostream>
#include "cluster2d_kernels.cuh"
#include "errorcheck.cuh"
#include "umatrix_gpu.cuh"
class UF2dGPU
{
private:
MatrixGPU<bool> c;
MatrixGPU<unsigned long long> parent;
public:
UF2dGPU(const MatrixGPU<bool>& map) : c(map), parent(map.rows(), map.cols())
{
c.push();
}
void run(const unsigned bsize = 32U, const unsigned nthreads = 32U)
{
assert(nthreads > 0);
assert(bsize > 0);
assert(c.rows() % bsize == 0);
assert(c.cols() % bsize == 0);
const auto size_shared =
bsize * (bsize + 1) * (sizeof(bool) + sizeof(unsigned));
const auto nb_row = static_cast<unsigned>(c.rows() / bsize);
const auto nb_col = static_cast<unsigned>(c.cols() / bsize);
run_kernel_pre<<<dim3(nb_row, nb_col), nthreads, size_shared>>>(
&c[0], &parent[0], bsize);
cudaCheck(cudaPeekAtLastError());
run_kernel_post<<<dim3(nb_row, nb_col), nthreads>>>(&c[0], &parent[0],
bsize);
cudaCheck(cudaPeekAtLastError());
parent.pull();
cudaCheck(cudaDeviceSynchronize());
}
auto root(const unsigned long long ind, const bool pull = false) const
{
auto pos = ind;
if (pull)
{
parent.pull();
}
while (true)
{
const auto pos_new = parent[pos];
if (pos_new == pos)
{
break;
}
else
{
pos = pos_new;
}
}
return pos;
}
void debug(void) const
{
c.print();
parent.pull();
std::cout << std::endl;
for (auto i = 0ULL; i < parent.rows(); ++i)
{
for (auto j = 0ULL; j < parent.cols(); ++j)
{
std::cout << root(parent.index(i, j))
<< (j + 1 == parent.cols() ? "\n" : " ");
}
}
std::cout << std::endl;
}
};