Skip to content

Commit

Permalink
save 1 6/24/24
Browse files Browse the repository at this point in the history
  • Loading branch information
GavinH2024 committed Jun 24, 2024
1 parent 5fbbe42 commit 6531e7f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 23 deletions.
7 changes: 4 additions & 3 deletions src/track_a_drift_cpu.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using CUDA, BenchmarkTools, Test, Adapt
using CUDA, BenchmarkTools
include("low_level/sqrt_one.jl")
include("low_level/structures.jl")

Expand Down Expand Up @@ -37,7 +37,7 @@ function track_a_drift(p_in, drift)
end



"""test input"""
x = fill(1.0, 1024)
y = fill(1.0, 1024)
z = fill(0.75, 1024)
Expand All @@ -54,4 +54,5 @@ drift = Drift(L)

p_in = Particle(x, px, y, py, z, pz, s, p0c, mc2)

track_a_drift(p_in, drift)
track_a_drift(p_in, drift) # consistent output with python code
@btime track_a_drift(p_in, drift) # first test: 97.650 μs; second test: 100.063 μs
67 changes: 47 additions & 20 deletions src/track_a_drift_gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@ include("low_level/sqrt_one.jl")
include("low_level/structures.jl")



"""Adapting structs to bitstype"""
Adapt.@adapt_structure GPU_Particle
Adapt.@adapt_structure GPU_Drift
Adapt.@adapt_structure Intermediate

function track_a_drift(p_in, drift)
"""Tracks the incoming Particle p_in though drift element
and returns the outgoing particle.

function track_a_drift_GPU!(p_in, drift, inter)
"""Parallelized tracking of incoming particles p_in
through drift elements, computed on the GPU.
See Bmad manual section 24.9
"""
L = drift.L
Expand All @@ -15,6 +21,7 @@ function track_a_drift(p_in, drift)
mc2 = p_in.mc2

x, px, y, py, z, pz = p_in.x, p_in.px, p_in.y, p_in.py, p_in.z, p_in.pz
P, Px, Py, Pxy2, Pl, dz = inter.P, inter.Px, inter.Py, inter.Pxy2, inter.Pl

P = 1 .+ pz # Particle's total momentum over p0
Px = px ./ P # Particle's 'x' momentum over p0
Expand All @@ -23,35 +30,55 @@ function track_a_drift(p_in, drift)
Pl = sqrt.(1 .- Pxy2) # Particle's longitudinal momentum over p0
x .+= L .* Px ./ Pl
y .+= L .* Py ./ Pl

# z = z + L * ( beta/beta_ref - 1.0/Pl ) but numerically accurate:
dz = L .* (sqrt_one.((mc2.^2 .* (2 .*pz.+pz.^2))./((p0c.*P).^2 .+ mc2.^2))
.+ sqrt_one.(-Pxy2)./Pl)


z .+= dz
s .+= L
particle = p_in

return particle
return
end



"""helper arrays to determine array memory allocation"""
P = CUDA.fill(undef, 1000);
Px = CUDA.fill(undef, 1000);
Py = CUDA.fill(undef, 1000);
Pxy2 = CUDA.fill(undef, 1000);
Pl = CUDA.fill(undef, 1000);
dz = CUDA.fill(undef, 1000);

"""sample input of 1000 particles"""
x = CUDA.fill(1.0, 1000);
y = CUDA.fill(1.0, 1000);
z = CUDA.fill(0.75, 1000);
px = CUDA.fill(0.5, 1000);
py = CUDA.fill(0.2, 1000);
pz = CUDA.fill(0.5, 1000);
s = CUDA.fill(1.0, 1000);
p0c = CUDA.fill(1.0, 1000);
mc2 = CUDA.fill(.511, 1000);

x = fill(1.0, 1024)
y = fill(1.0, 1024)
z = fill(0.75, 1024)
px = fill(0.5, 1024)
py = fill(0.2, 1024)
pz = fill(0.5, 1024)
s = fill(1.0, 1024)
p0c = fill(1.0, 1024)
mc2 = fill(.511, 1024)
L = CUDA.fill(0.005, 1000);

L = fill(0.005, 1024)
drift = GPU_Drift(L)
inter = Intermediate(P, Px, Py, Pxy2, Pl, dz)
p_in = GPU_Particle(x, px, y, py, z, pz, s, p0c, mc2)

drift = Drift(L)
"""benchmarking"""
function bench_track_a_drift(p_in, drift, inter)
kernel = @cuda launch=false track_a_drift_GPU!(p_in, drift, inter)
config = launch_configuration(kernel.fun)
threads = config.threads
blocks = config.blocks

p_in = Particle(x, px, y, py, z, pz, s, p0c, mc2)
CUDA.@sync begin
kernel(p_in, drift, inter; threads, blocks)
end
end

track_a_drift(p_in, drift)
@btime bench_track_a_drift($p_in, $drift, $inter)

0 comments on commit 6531e7f

Please sign in to comment.