fix variability toggle, synthesis debug

palumbom · Nov 19, 2024 · 571588f · 571588f
1 parent 287fec6
commit 571588f
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 24 deletions.
diff --git a/src/gpu/gpu_precomps_eclipse.jl b/src/gpu/gpu_precomps_eclipse.jl
@@ -303,7 +303,12 @@ function calc_eclipse_quantities_gpu!(wavelength, μs, z_rot, ax_codes,
                 @inbounds ld[m,n,wl] /= count
                 @inbounds ext[m,n,wl] /= count
             end
-            @inbounds z_rot[m,n] = z_rot_numerator / z_rot_denominator
+
+            if iszero(z_rot_denominator)
+                @inbounds z_rot[m,n] = 0.0
+            else 
+                @inbounds z_rot[m,n] = z_rot_numerator / z_rot_denominator
+            end
 
             # set vector components as average
             @inbounds xx = x_sum / μ_count

diff --git a/src/gpu/gpu_sim_eclipse.jl b/src/gpu/gpu_sim_eclipse.jl
@@ -52,13 +52,27 @@ function disk_sim_eclipse_gpu(spec::SpecParams{T1}, disk::DiskParamsEclipse{T1},
     threads5 = 1024
     blocks5 = cld(CUDA.length(prof), prod(threads5))
 
-    # allocate arrays for fresh copy of input data to copy to each loop
+    # allocate destinations for interpolations
     @cusync begin
-        bisall_gpu_loop = CUDA.zeros(T2, CUDA.size(bisall_gpu))
-        intall_gpu_loop = CUDA.zeros(T2, CUDA.size(intall_gpu))
-        widall_gpu_loop = CUDA.zeros(T2, CUDA.size(widall_gpu))
+        bisall_gpu_loop = CUDA.copy(bisall_gpu)
+        intall_gpu_loop = CUDA.copy(intall_gpu)
+        widall_gpu_loop = CUDA.copy(widall_gpu)
     end
 
+    # allocate memory for means
+    @cusync begin
+        bisall_mean = CUDA.zeros(CUDA.eltype(bisall_gpu_loop), 100, CUDA.size(bisall_gpu_loop, 3))
+        intall_mean = CUDA.zeros(CUDA.eltype(intall_gpu_loop), 100, CUDA.size(intall_gpu_loop, 3))
+        widall_mean = CUDA.zeros(CUDA.eltype(widall_gpu_loop), 100, CUDA.size(widall_gpu_loop, 3))
+    end
+
+    threads6 = (4, 16)
+    blocks6 = cld(length(lenall_gpu) * 100, prod(threads6))
+
+    @cusync @cuda threads=threads6 blocks=blocks6 time_average_bis!(lenall_gpu, bisall_mean, intall_mean, 
+                                                                    widall_mean, bisall_gpu, intall_gpu, 
+                                                                    widall_gpu)
+
     # loop over time
     for t in 1:Nt
         # sort out the system geometry
@@ -108,29 +122,24 @@ function disk_sim_eclipse_gpu(spec::SpecParams{T1}, disk::DiskParamsEclipse{T1},
             # calculate how much extra shift is needed
             extra_z = spec.conv_blueshifts .- z_cbs_avg
 
-            # get a fresh copy of the untrimmed bisector + width data
-            @cusync begin
-                CUDA.copyto!(bisall_gpu_loop, bisall_gpu)
-                CUDA.copyto!(intall_gpu_loop, intall_gpu)
-                CUDA.copyto!(widall_gpu_loop, widall_gpu)
-            end
-
             # trim all the bisector data
             @cusync @cuda threads=threads2 blocks=blocks2 trim_bisector_gpu!(spec.depths[l], spec.variability[l],
                                                                              depcontrast_gpu, lenall_gpu,
                                                                              bisall_gpu_loop, intall_gpu_loop,
                                                                              widall_gpu_loop, bisall_gpu,
-                                                                             intall_gpu, widall_gpu)
+                                                                             intall_gpu, widall_gpu, 
+                                                                             bisall_mean, intall_mean, 
+                                                                             widall_mean)
 
             # assemble line shape on even int grid
             @cusync @cuda threads=threads3 blocks=blocks3 fill_workspaces_2D_eclipse!(spec.lines[l], spec.variability[l],
-                                                                           extra_z[l], tloop, dat_idx,
-                                                                           z_rot, z_cbs, lenall_gpu,
-                                                                           bisall_gpu_loop, intall_gpu_loop,
-                                                                           widall_gpu_loop, allwavs, allints)
+                                                                                      extra_z[l], tloop, dat_idx,
+                                                                                      z_rot, z_cbs, lenall_gpu,
+                                                                                      bisall_gpu_loop, intall_gpu_loop,
+                                                                                      widall_gpu_loop, allwavs, allints)
 
             # do the line synthesis, interp back onto wavelength grid
-            @cusync @cuda threads=threads4 blocks=blocks4 line_profile_gpu!(prof, μs, ld[:,:,l], dA, ext[:,:,l], λs, allwavs, allints, ext_toggle)
+            @cusync @cuda threads=threads4 blocks=blocks4 line_profile_gpu!(l, prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)
 
             # copy data from GPU to CPU
             @cusync @cuda threads=threads5 blocks=blocks5 apply_line!(t, prof, flux, sum_wts)

diff --git a/src/gpu/gpu_synthesis.jl b/src/gpu/gpu_synthesis.jl
@@ -147,14 +147,17 @@ function line_profile_gpu!(prof, μs, wts, λs, allwavs, allints)
     return nothing
 end
 
-function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)
+function line_profile_gpu!(l, prof, μs, ld, dA, ext, λs, allwavs, allints, ext_toggle)
     # get indices from GPU blocks + threads
     idx = threadIdx().x + blockDim().x * (blockIdx().x-1)
     sdx = blockDim().x * gridDim().x
     idy = threadIdx().y + blockDim().y * (blockIdx().y-1)
     sdy = blockDim().y * gridDim().y
 
     Nθ_max = CUDA.size(μs, 2)
+
+
+
     # parallelized loop over grid
     for i in idx:sdx:CUDA.length(μs)
          # get index for output array 
@@ -168,7 +171,7 @@ function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_to
             continue
         end
 
-        #take view of arrays to pass to interpolater
+        # take view of arrays to pass to interpolater
         allwavs_i = CUDA.view(allwavs, m, n, :)
         allints_i = CUDA.view(allints, m, n, :)
 
@@ -179,18 +182,18 @@ function line_profile_gpu!(prof, μs, ld, dA, ext, λs, allwavs, allints, ext_to
             # loop over wavelengths
             for j in idy:sdy:CUDA.length(λs)
                 if ((λs[j] < CUDA.first(allwavs_i)) || (λs[j] > CUDA.last(allwavs_i)))
-                    @inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n] * ext[m,n]
+                    @inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n,l] * ext[m,n,l]
                 else
-                    @inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n] * ext[m,n]
+                    @inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n,l] * ext[m,n,l]
                 end
             end
         else
             # loop over wavelengths
             for j in idy:sdy:CUDA.length(λs)
                 if ((λs[j] < CUDA.first(allwavs_i)) || (λs[j] > CUDA.last(allwavs_i)))
-                    @inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n]
+                    @inbounds CUDA.@atomic prof[j] += dA[m,n] * ld[m,n,l]
                 else
-                    @inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n]
+                    @inbounds CUDA.@atomic prof[j] += itp(λs[j]) * dA[m,n] * ld[m,n,l]
                 end
             end
         end