-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathmisc.cuh
60 lines (47 loc) · 1.68 KB
/
misc.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/*
kernels/misc.cuh -- Miscellaneous CUDA kernels
Copyright (c) 2021 Wenzel Jakob <wenzel.jakob@epfl.ch>
All rights reserved. Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
#include "common.h"
KERNEL void poke_u8(uint8_t *out, uint8_t value) {
*out = value;
}
KERNEL void poke_u16(uint16_t *out, uint16_t value) {
*out = value;
}
KERNEL void poke_u32(uint32_t *out, uint32_t value) {
*out = value;
}
KERNEL void poke_u64(uint64_t *out, uint64_t value) {
*out = value;
}
KERNEL void fill_64(uint64_t *out, uint32_t size, uint64_t value) {
for (uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size;
i += blockDim.x * gridDim.x)
out[i] = value;
}
struct VCallDataRecord {
int32_t size;
uint32_t offset;
const void *src;
};
KERNEL void aggregate(void *out, const VCallDataRecord *rec_, uint32_t size) {
uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size)
return;
VCallDataRecord rec = rec_[idx];
const void *src = rec.src;
void *dst = (uint8_t *) out + rec.offset;
switch (rec.size) {
case 1: *(uint8_t *) dst = (uint8_t) (uintptr_t) src; break;
case 2: *(uint16_t *) dst = (uint16_t) (uintptr_t) src; break;
case 4: *(uint32_t *) dst = (uint32_t) (uintptr_t) src; break;
case 8: *(uint64_t *) dst = (uint64_t) (uintptr_t) src; break;
case -1: *(uint8_t *) dst = *(uint8_t *) src; break;
case -2: *(uint16_t *) dst = *(uint16_t *) src; break;
case -4: *(uint32_t *) dst = *(uint32_t *) src; break;
case -8: *(uint64_t *) dst = *(uint64_t *) src; break;
}
}