Skip to content

Commit

Permalink
fix variability toggle, synthesis debug
Browse files Browse the repository at this point in the history
  • Loading branch information
palumbom committed Nov 19, 2024
1 parent 287fec6 commit 571588f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 24 deletions.
7 changes: 6 additions & 1 deletion src/gpu/gpu_precomps_eclipse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,12 @@ function calc_eclipse_quantities_gpu!(wavelength, μs, z_rot, ax_codes,
@inbounds ld[m,n,wl] /= count
@inbounds ext[m,n,wl] /= count
end
@inbounds z_rot[m,n] = z_rot_numerator / z_rot_denominator

if iszero(z_rot_denominator)
@inbounds z_rot[m,n] = 0.0
else
@inbounds z_rot[m,n] = z_rot_numerator / z_rot_denominator
end

# set vector components as average
@inbounds xx = x_sum / μ_count
Expand Down
43 changes: 26 additions & 17 deletions src/gpu/gpu_sim_eclipse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,27 @@ function disk_sim_eclipse_gpu(spec::SpecParams{T1}, disk::DiskParamsEclipse{T1},
threads5 = 1024
blocks5 = cld(CUDA.length(prof), prod(threads5))

# allocate arrays for fresh copy of input data to copy to each loop
# allocate destinations for interpolations
@cusync begin
bisall_gpu_loop = CUDA.zeros(T2, CUDA.size(bisall_gpu))
intall_gpu_loop = CUDA.zeros(T2, CUDA.size(intall_gpu))
widall_gpu_loop = CUDA.zeros(T2, CUDA.size(widall_gpu))
bisall_gpu_loop = CUDA.copy(bisall_gpu)
intall_gpu_loop = CUDA.copy(intall_gpu)
widall_gpu_loop = CUDA.copy(widall_gpu)
end

# allocate memory for means
@cusync begin
bisall_mean = CUDA.zeros(CUDA.eltype(bisall_gpu_loop), 100, CUDA.size(bisall_gpu_loop, 3))
intall_mean = CUDA.zeros(CUDA.eltype(intall_gpu_loop), 100, CUDA.size(intall_gpu_loop, 3))
widall_mean = CUDA.zeros(CUDA.eltype(widall_gpu_loop), 100, CUDA.size(widall_gpu_loop, 3))
end

threads6 = (4, 16)
blocks6 = cld(length(lenall_gpu) * 100, prod(threads6))

@cusync @cuda threads=threads6 blocks=blocks6 time_average_bis!(lenall_gpu, bisall_mean, intall_mean,
widall_mean, bisall_gpu, intall_gpu,
widall_gpu)

# loop over time
for t in 1:Nt
# sort out the system geometry
Expand Down Expand Up @@ -108,29 +122,24 @@ function disk_sim_eclipse_gpu(spec::SpecParams{T1}, disk::DiskParamsEclipse{T1},
# calculate how much extra shift is needed
extra_z = spec.conv_blueshifts .- z_cbs_avg

# get a fresh copy of the untrimmed bisector + width data
@cusync begin
CUDA.copyto!(bisall_gpu_loop, bisall_gpu)
CUDA.copyto!(intall_gpu_loop, intall_gpu)
CUDA.copyto!(widall_gpu_loop, widall_gpu)
end

# trim all the bisector data
@cusync @cuda threads=threads2 blocks=blocks2 trim_bisector_gpu!(spec.depths[l], spec.variability[l],
depcontrast_gpu, lenall_gpu,
bisall_gpu_loop, intall_gpu_loop,
widall_gpu_loop, bisall_gpu,
intall_gpu, widall_gpu)
intall_gpu, widall_gpu,
bisall_mean, intall_mean,
widall_mean)

# assemble line shape on even int grid
@cusync @cuda threads=threads3 blocks=blocks3 fill_workspaces_2D_eclipse!(spec.lines[l], spec.variability[l],
extra_z[l], tloop, dat_idx,
z_rot, z_cbs, lenall_gpu,
bisall_gpu_loop, intall_gpu_loop,
widall_gpu_loop, allwavs, allints)
extra_z[l], tloop, dat_idx,
z_rot, z_cbs, lenall_gpu,
bisall_gpu_loop, intall_gpu_loop,
widall_gpu_loop, allwavs, allints)

# do the line synthesis, interp back onto wavelength grid
@cusync @cuda threads=threads4 blocks=blocks4 line_profile_gpu!(prof, μs, ld[:,:,l], dA, ext[:,:,l], λs, allwavs, allints, ext_toggle)
@cusync @cuda threads=threads4 blocks=blocks4 line_profile_gpu!(l, prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)

# copy data from GPU to CPU
@cusync @cuda threads=threads5 blocks=blocks5 apply_line!(t, prof, flux, sum_wts)
Expand Down
15 changes: 9 additions & 6 deletions src/gpu/gpu_synthesis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,17 @@ function line_profile_gpu!(prof, μs, wts, λs, allwavs, allints)
return nothing
end

function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)
function line_profile_gpu!(l, prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)
# get indices from GPU blocks + threads
idx = threadIdx().x + blockDim().x * (blockIdx().x-1)
sdx = blockDim().x * gridDim().x
idy = threadIdx().y + blockDim().y * (blockIdx().y-1)
sdy = blockDim().y * gridDim().y

Nθ_max = CUDA.size(μs, 2)



# parallelized loop over grid
for i in idx:sdx:CUDA.length(μs)
# get index for output array
Expand All @@ -168,7 +171,7 @@ function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_to
continue
end

#take view of arrays to pass to interpolater
# take view of arrays to pass to interpolater
allwavs_i = CUDA.view(allwavs, m, n, :)
allints_i = CUDA.view(allints, m, n, :)

Expand All @@ -179,18 +182,18 @@ function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_to
# loop over wavelengths
for j in idy:sdy:CUDA.length(λs)
if ((λs[j] < CUDA.first(allwavs_i)) || (λs[j] > CUDA.last(allwavs_i)))
@inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n] * ext[m,n]
@inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n,l] * ext[m,n,l]
else
@inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n] * ext[m,n]
@inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n,l] * ext[m,n,l]
end
end
else
# loop over wavelengths
for j in idy:sdy:CUDA.length(λs)
if ((λs[j] < CUDA.first(allwavs_i)) || (λs[j] > CUDA.last(allwavs_i)))
@inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n]
@inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n,l]
else
@inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n]
@inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n,l]
end
end
end
Expand Down

0 comments on commit 571588f

Please sign in to comment.