-
Notifications
You must be signed in to change notification settings - Fork 0
/
cl-lmem-patterns.py
55 lines (39 loc) · 1.02 KB
/
cl-lmem-patterns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pyopencl as cl
import pyopencl.array
import numpy as np
import sys
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 5 * (2**10)**2
mf = cl.mem_flags
a = cl.array.zeros(queue, n, np.float32)
arg = int(sys.argv[1])
prg = cl.Program(ctx, """//CL//
#define ARGUMENT myarg
kernel void fill_vec(global volatile float *a, long int n)
{
local float loc_array[2048];
volatile local float *loc = loc_array;
long int li = get_local_id(0) % 32;
long int gi = get_global_id(0);
loc[li] = 0;
float x = 0;
for (int j = 0; j < 100; ++j)
{
#pragma unroll
for (int k = 0; k < 10; ++k)
x += loc[ARGUMENT * li];
}
loc[li] = x;
}
""".replace("myarg", str(arg))).build()
from time import time
ntrips = 10
queue.finish()
t1 = time()
for i in xrange(ntrips):
prg.fill_vec(queue, (n,), (128,), a.data, np.int64(n))
queue.finish()
t2 = time()
print "arg %d elapsed: %g s" % (arg, (t2-t1)/ntrips)
# vim: filetype=pyopencl