From c73becbb7fdace6e86f541a0743679aa72a61bf2 Mon Sep 17 00:00:00 2001 From: The Nguyen Date: Tue, 4 Jun 2024 14:46:52 +0200 Subject: [PATCH] move scene initialization into shared memory --- raytracer.cu | 236 +++++++++------------------------------------------ 1 file changed, 39 insertions(+), 197 deletions(-) diff --git a/raytracer.cu b/raytracer.cu index 3e7e986..067fbac 100644 --- a/raytracer.cu +++ b/raytracer.cu @@ -99,25 +99,19 @@ public: class Geometry { public: - __device__ Geometry(const Vector &albedo, int id, bool mirror, double in_refraction_index, double out_refraction_index): albedo(albedo), id(id), + __device__ Geometry(const Vector &C, double R, const Vector &albedo, int id = -1, bool mirror = 0, double in_refraction_index = 1, double out_refraction_index = 1): C(C), R(R), albedo(albedo), id(id), mirror(mirror), in_refraction_index(in_refraction_index), out_refraction_index(out_refraction_index) {} - __device__ Geometry(): mirror(0), in_refraction_index(1), out_refraction_index(1) {}; - + __device__ Geometry(): id(-1), mirror(0), in_refraction_index(1), out_refraction_index(1) {}; + + Vector C; + double R; Vector albedo; int id; bool mirror; double in_refraction_index; double out_refraction_index; - __device__ virtual bool intersect(const Ray& r, double &t, Vector &N) { return 0; }; -}; - -class Sphere: public Geometry { -public: - __device__ Sphere(const Vector &C, double R, const Vector& albedo, bool mirror = 0, double in_refraction_index = 1., double out_refraction_index = 1.) : - C(C), R(R), Geometry(albedo, id, mirror, in_refraction_index, out_refraction_index) {}; - Vector C; - double R; - __device__ bool intersect(const Ray &r, double &t, Vector &N) override { + + __device__ bool intersect(const Ray &r, double &t, Vector &N) { double delta = SQR(dot(r.u, r.O - C)) - ((r.O - C).norm2() - R*R); if (delta < 0) return 0; @@ -183,125 +177,6 @@ public: int triangle_start, triangle_end; }; -class TriangleMesh: public Geometry { -public: - __device__ ~TriangleMesh() {} - __device__ TriangleMesh() {}; - - #define between(A, B, C) ((A) <= (B) && (B) <= (C)) - - __device__ BoundingBox compute_bbox(int triangle_start, int triangle_end) { - BoundingBox bb; - for (int i = triangle_start; i < triangle_end; i++) { - bb.update(vertices[indices[i].vtxi]); - bb.update(vertices[indices[i].vtxj]); - bb.update(vertices[indices[i].vtxk]); - } - return bb; - } - - __device__ void buildBVH(BVH* cur, int triangle_start, int triangle_end) { - // std::cout << cur << ' ' << triangle_start << ' ' << triangle_end << '\n'; - // printf("%d %d\n", triangle_start, triangle_end); - cur->triangle_start = triangle_start; - cur->triangle_end = triangle_end; - cur->left = NULL; - cur->right = NULL; - cur->bb = compute_bbox(triangle_start, triangle_end); - - Vector diag = cur->bb.mx - cur->bb.mn; - int max_axis; - if (diag[0] >= diag[1] && diag[0] >= diag[2]) - max_axis = 0; - else if (diag[1] >= diag[0] && diag[1] >= diag[2]) - max_axis = 1; - else - max_axis = 2; - - int pivot = triangle_start; - double split = (cur->bb.mn[max_axis] + cur->bb.mx[max_axis]) / 2; - for (int i = triangle_start; i < triangle_end; i++) { - double cen = (vertices[indices[i].vtxi][max_axis] + vertices[indices[i].vtxj][max_axis] + vertices[indices[i].vtxk][max_axis]) / 3; - if (cen < split) { - swap(indices[i], indices[pivot]); - pivot++; - } - } - - if (pivot <= triangle_start || pivot >= triangle_end - 1 || triangle_end - triangle_start < 5) { - return; - } - cur->left = new BVH; - cur->right = new BVH; - buildBVH(cur->left, triangle_start, pivot); - buildBVH(cur->right, pivot, triangle_end); - } - - __device__ bool moller_trumbore(const Vector &A, const Vector &B, const Vector &C, Vector& N, const Ray &r, double &t) { - Vector e1 = B - A; - Vector e2 = C - A; - N = cross(e1, e2); - if (dot(r.u, N) == 0) return 0; - double beta = dot(e2, cross(A - r.O, r.u)) / dot(r.u, N); - double gamma = - dot(e1, cross(A - r.O, r.u)) / dot(r.u, N); - if (!between(0, beta, 1) || !between(0, gamma, 1)) return 0; - t = dot(A - r.O, N) / dot(r.u, N); - return beta + gamma <= 1 && t > 0; - } - - __device__ bool intersect(const Ray &r, double &t, Vector &N) override { - // printf("inter!\n"); - double t_tmp; - if (!bvh.bb.intersect(r, t_tmp)) return 0; - BVH* s[30]; - int s_size = 0; - s[s_size++] = &bvh; - - double t_min = INF; - while(s_size) { - const BVH* cur = s[s_size-1]; - s_size--; - if (cur->left) { - double t_left, t_right; - bool ok_left = cur->left->bb.intersect(r, t_left); - bool ok_right = cur->right->bb.intersect(r, t_right); - // printf("%d %d\n", ok_left, ok_right); - if (ok_left) s[s_size++] = cur->left; - if (ok_right) s[s_size++] = cur->right; - } else { - // Leaf - for (int i = cur->triangle_start; i < cur->triangle_end; i++) { - double t_cur; - Vector A = vertices[indices[i].vtxi], B = vertices[indices[i].vtxj], C = vertices[indices[i].vtxk]; - Vector N_triangle; - bool inter = moller_trumbore(A, B, C, N_triangle, r, t_cur); - if (!inter) continue; - if (t_cur > 0 && t_cur < t_min) { - t_min = t_cur; - N = N_triangle; - } - } - } - } - N.normalize(); - t = t_min; - return t_min != INF; - } - TriangleIndices* indices; - int indices_size; - Vector* vertices; - int vertices_size; - // Vector* normals; - // int normals_size; - // Vector* uvs; - // int uvs_size; - // Vector* vertexcolors; - // int vertexcolors_size; - BoundingBox bb; - BVH bvh; -}; -/* End of code derived from Prof Bonnel's code */ - class TriangleMeshHost { public: ~TriangleMeshHost() {} @@ -495,11 +370,6 @@ public: class Scene { public: - __device__ void addObject(Geometry* s) { - s->id = objects_size; - objects[objects_size++] = s; - } - __device__ bool intersect_all(const Ray& r, Vector &P, Vector &N, int &objectId) { double t_min = INF; int id_min = -1; @@ -609,58 +479,48 @@ public: return color; } - Geometry* objects[100]; + Geometry* objects[10]; int objects_size = 0; double intensity = 3e10; Vector L; curandState* rand_states; }; -__global__ void KernelInit(Scene *s, TriangleIndices *indices, int indices_size, Vector *vertices, int vertices_size) { - auto id = threadIdx.x + blockIdx.x * blockDim.x; - if (!id) { - s->L = Vector(-10., 20., 40.); - s->objects_size = 0; - s->intensity = 3e10; - // s->addObject(new Sphere(Vector(0, 0, 0), 10, Vector(1., 1., 1.))); // white sphere - s->addObject(new Sphere(Vector(0, 0, -1000), 940, Vector(0., 1., 0.))); // green fore wall - s->addObject(new Sphere(Vector(0, -1000, 0), 990, Vector(0., 0., 1.))); // blue floor - s->addObject(new Sphere(Vector(0, 1000, 0), 940, Vector(1., 0., 0.))); // red ceiling - s->addObject(new Sphere(Vector(-1000, 0, 0), 940, Vector(0., 1., 1.))); // cyan left wall - s->addObject(new Sphere(Vector(1000, 0, 0), 940, Vector(1., 1., 0.))); // yellow right wall - s->addObject(new Sphere(Vector(0, 0, 1000), 940, Vector(1., 0., 1.))); // magenta back wall - // s->addObject(new Sphere(Vector(-20, 0, 0), 10, Vector(0., 0., 0.), 1)); // mirror sphere - // s->addObject(new Sphere(Vector(20, 0, 0), 9, Vector(0., 0., 0.), 0, 1, 1.5)); // inner nested ssphere - // s->addObject(new Sphere(Vector(20, 0, 0), 10, Vector(0., 0., 0.), 0, 1.5, 1)); // outer nested sphere - - TriangleMesh* cat = new TriangleMesh(); - cat->albedo = Vector(0.25, 0.25, 0.25); - cat->indices_size = indices_size; - cat->indices = indices; - cat->vertices_size = vertices_size; - cat->vertices = vertices; - // cat->normals_size; - // cat->normals; - // cat->uvs_size; - // cat->uvs; - // cat->vertexcolors_size; - // cat->vertexcolors; - cat->bvh.bb = cat->compute_bbox(0, cat->indices_size); - cat->buildBVH(&(cat->bvh), 0, cat->indices_size); - s->addObject(cat); - } -} - -__global__ void KernelLaunch(Scene *s, double *colors, int W, int H, int num_rays, int num_bounce) { +__global__ void KernelLaunch(double *colors, int W, int H, int num_rays, int num_bounce, TriangleIndices *indices, int indices_size, Vector *vertices, int vertices_size) { extern __shared__ double shared_memory[]; size_t index = blockIdx.x * blockDim.x + threadIdx.x; double *shared_colors = shared_memory; - // Geometry *shared_objects = (Geometry *)&shared_colors[blockDim.x * 3]; - // curandState *shared_rand_states = (curandState *)&shared_objects[s->objects_size]; - curandState *shared_rand_states = (curandState *)&shared_colors[blockDim.x * 3]; + Geometry *shared_objects = (Geometry *)&shared_colors[blockDim.x * 3]; + curandState *shared_rand_states = (curandState *)&shared_objects[10]; Scene *shared_scene = (Scene *)&shared_rand_states[blockDim.x]; if (!threadIdx.x) { - *shared_scene = *s; + shared_scene->L = Vector(-10., 20., 40.); + shared_scene->objects_size = 0; + shared_scene->intensity = 3e10; + shared_objects[shared_scene->objects_size] = Geometry(Vector(0, 0, -1000), 940, Vector(0., 1., 0.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; + shared_objects[shared_scene->objects_size] = Geometry(Vector(0, -1000, 0), 990, Vector(0., 0., 1.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; + shared_objects[shared_scene->objects_size] = Geometry(Vector(0, 1000, 0), 940, Vector(1., 0., 0.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; + shared_objects[shared_scene->objects_size] = Geometry(Vector(-1000, 0, 0), 940, Vector(0., 1., 1.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; + shared_objects[shared_scene->objects_size] = Geometry(Vector(1000, 0, 0), 940, Vector(1., 1., 0.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; + shared_objects[shared_scene->objects_size] = Geometry(Vector(0, 0, 1000), 940, Vector(1., 0., 1.)); + shared_objects[shared_scene->objects_size].id = shared_scene->objects_size; + shared_scene->objects[shared_scene->objects_size] = &shared_objects[shared_scene->objects_size]; + ++shared_scene->objects_size; shared_scene->rand_states = shared_rand_states; } __syncthreads(); @@ -688,13 +548,6 @@ __global__ void KernelLaunch(Scene *s, double *colors, int W, int H, int num_ray colors[blockIdx.x * blockDim.x * 3 + blockDim.x * 2 + threadIdx.x] = shared_colors[blockDim.x * 2 + threadIdx.x]; } -__global__ void KernelDelete(Scene *s) { - for (auto x: s->objects) { - delete x; - } - delete s->rand_states; -} - int main(int argc, char **argv) { if (argc != 3) { std::cout << "Invalid number of arguments!\nThe first argument is number of rays and the second argument is number of bounces.\n"; @@ -731,18 +584,7 @@ int main(int argc, char **argv) { gpuErrchk( cudaMalloc((void**)&d_vertices, mesh_ptr->vertices.size() * sizeof(Vector)) ); gpuErrchk( cudaMemcpy(d_vertices, &(mesh_ptr->vertices[0]), mesh_ptr->vertices.size() * sizeof(Vector), cudaMemcpyHostToDevice) ); - // Init scene in kernel - KernelInit<<<1, BLOCK_DIM>>>(d_s, d_indices, mesh_ptr->indices.size(), d_vertices, mesh_ptr->vertices.size()); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - // Launch kernel - KernelLaunch<<>>(d_s, d_colors, W, H, num_rays, num_bounce); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); - - // Free objects in scene - KernelDelete<<<1, 1>>>(d_s); + KernelLaunch<<>>(d_colors, W, H, num_rays, num_bounce, d_indices, mesh_ptr->indices.size(), d_vertices, mesh_ptr->vertices.size()); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() );