forked from JuliaGPU/Metal.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmemory.jl
113 lines (96 loc) · 4.12 KB
/
memory.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# memory operations
# TODO: Properly use dispatch capabilities for these functions
## pointer type
# we cannot take a MTLBuffer's handle and work with that as it were a pointer to memory.
# instead, the Metal APIs always take the original handle and an offset parameter.
struct MtlPtr{T}
buffer::MTLBuffer
offset::UInt # in bytes
function MtlPtr{T}(buffer::MTLBuffer, offset=0) where {T}
new(buffer, offset)
end
end
Base.eltype(::Type{<:MtlPtr{T}}) where {T} = T
# limited arithmetic
Base.:(+)(x::MtlPtr{T}, y::Integer) where {T} = MtlPtr{T}(x.buffer, x.offset+y)
Base.:(-)(x::MtlPtr{T}, y::Integer) where {T} = MtlPtr{T}(x.buffer, x.offset-y)
Base.:(+)(x::Integer, y::MtlPtr{T}) where {T} = MtlPtr{T}(x.buffer, y+x.offset)
Base.convert(::Type{Ptr{T}}, ptr::MtlPtr) where {T} =
convert(Ptr{T}, ptr.buffer) + ptr.offset
## operations
# CPU -> GPU
function Base.unsafe_copyto!(dev::MTLDevice, dst::MtlPtr{T}, src::Ptr{T}, N::Integer;
queue::MTLCommandQueue=global_queue(dev), async::Bool=false) where T
storage_type = dst.buffer.storageMode
if storage_type == MTL.MTLStorageModePrivate
# stage through a shared buffer
nocopy = MTL.can_alloc_nocopy(src, N*sizeof(T))
tmp_buf = alloc(dev, N*sizeof(T), src; storage=SharedStorage, nocopy)
# copy to the private buffer
unsafe_copyto!(dev, MtlPtr{T}(dst.buffer, dst.offset), MtlPtr{T}(tmp_buf, 0), N;
queue, async=(nocopy && async))
free(tmp_buf)
elseif storage_type == MTL.MTLStorageModeShared
unsafe_copyto!(convert(Ptr{T}, dst), src, N)
elseif storage_type == MTL.MTLStorageModeManaged
unsafe_copyto!(convert(Ptr{T}, dst), src, N)
MTL.DidModifyRange!(dst.buffer, 1:N)
end
return dst
end
# GPU -> CPU
function Base.unsafe_copyto!(dev::MTLDevice, dst::Ptr{T}, src::MtlPtr{T}, N::Integer;
queue::MTLCommandQueue=global_queue(dev), async::Bool=false) where T
storage_type = src.buffer.storageMode
if storage_type == MTL.MTLStorageModePrivate
# stage through a shared buffer
nocopy = MTL.can_alloc_nocopy(dst, N*sizeof(T))
tmp_buf = if nocopy
alloc(dev, N*sizeof(T), dst; storage=SharedStorage, nocopy)
else
alloc(dev, N*sizeof(T); storage=SharedStorage)
end
unsafe_copyto!(dev, MtlPtr{T}(tmp_buf, 0), MtlPtr{T}(src.buffer, src.offset), N;
queue, async=(nocopy && async))
# copy from the shared buffer
if !nocopy
unsafe_copyto!(dst, convert(Ptr{T}, tmp_buf), N)
end
free(tmp_buf)
elseif storage_type == MTL.MTLStorageModeShared
unsafe_copyto!(dst, convert(Ptr{T}, src), N)
elseif storage_type == MTL.MTLStorageModeManaged
unsafe_copyto!(dst, convert(Ptr{T}, src), N)
end
return dst
end
# GPU -> GPU
@autoreleasepool function Base.unsafe_copyto!(dev::MTLDevice, dst::MtlPtr{T},
src::MtlPtr{T}, N::Integer;
queue::MTLCommandQueue=global_queue(dev),
async::Bool=false) where T
if N > 0
cmdbuf = MTLCommandBuffer(queue)
MTLBlitCommandEncoder(cmdbuf) do enc
MTL.append_copy!(enc, dst.buffer, dst.offset, src.buffer, src.offset, N * sizeof(T))
end
commit!(cmdbuf)
async || wait_completed(cmdbuf)
end
return dst
end
@autoreleasepool function unsafe_fill!(dev::MTLDevice, dst::MtlPtr{T},
value::Union{UInt8,Int8}, N::Integer;
queue::MTLCommandQueue=global_queue(dev),
async::Bool=false) where T
if N > 0
cmdbuf = MTLCommandBuffer(queue)
MTLBlitCommandEncoder(cmdbuf) do enc
MTL.append_fillbuffer!(enc, dst.buffer, value, N * sizeof(T), dst.offset)
end
commit!(cmdbuf)
async || wait_completed(cmdbuf)
end
return dst
end
# TODO: Implement generic fill since mtBlitCommandEncoderFillBuffer is limiting