From 4ef376dc89a8fc7453010609cdda9841de875e34 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sat, 29 Jun 2019 08:28:41 +0900 Subject: [PATCH] Move multiple-object logic out of BVH4. This allows each part of Psychopath tp handle the logic in the best way, instead of a one-size-fits-all approach. --- src/accel/bvh4.rs | 21 +-- src/ray.rs | 27 +++- src/surface/triangle_mesh.rs | 261 ++++++++++++++++++----------------- src/tracer.rs | 18 ++- 4 files changed, 163 insertions(+), 164 deletions(-) diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs index d7e68e1..e5298e7 100644 --- a/src/accel/bvh4.rs +++ b/src/accel/bvh4.rs @@ -96,14 +96,9 @@ impl<'a> BVH4<'a> { self.depth } - pub fn traverse( - &self, - rays: &mut RayBatch, - ray_stack: &mut RayStack, - objects: &[T], - mut obj_ray_test: F, - ) where - F: FnMut(&T, &mut RayBatch, &mut RayStack), + pub fn traverse(&self, rays: &mut RayBatch, ray_stack: &mut RayStack, mut obj_ray_test: F) + where + F: FnMut(std::ops::Range, &mut RayBatch, &mut RayStack), { if self.root.is_none() { return; @@ -170,16 +165,8 @@ impl<'a> BVH4<'a> { &BVH4Node::Leaf { object_range } => { trav_time += timer.tick() as f64; - // Set up the tasks for each object. - let obj_count = object_range.1 - object_range.0; - for _ in 0..(obj_count - 1) { - ray_stack.duplicate_next_task(); - } - // Do the ray tests. - for obj in &objects[object_range.0..object_range.1] { - obj_ray_test(obj, rays, ray_stack); - } + obj_ray_test(object_range.0..object_range.1, rays, ray_stack); timer.tick(); diff --git a/src/ray.rs b/src/ray.rs index 2fa92de..97bdd39 100644 --- a/src/ray.rs +++ b/src/ray.rs @@ -292,24 +292,37 @@ impl RayStack { self.lanes[l].end_len = self.lanes[l].idxs.len(); } - /// Pops the next task off the stack, and executes the provided closure for - /// each ray index in the task. - pub fn pop_do_next_task(&mut self, mut handle_ray: F) + // Pops the next task off the stack. + pub fn pop_task(&mut self) { + let task = self.tasks.pop().unwrap(); + self.lanes[task.lane].end_len = task.start_idx; + self.lanes[task.lane].idxs.truncate(task.start_idx); + } + + // Executes a task without popping it from the task stack. + pub fn do_next_task(&mut self, mut handle_ray: F) where F: FnMut(usize), { - // Pop the task and do necessary bookkeeping. - let task = self.tasks.pop().unwrap(); + let task = self.tasks.last().unwrap(); let task_range = (task.start_idx, self.lanes[task.lane].end_len); - self.lanes[task.lane].end_len = task.start_idx; // Execute task. for i in task_range.0..task_range.1 { let ray_idx = self.lanes[task.lane].idxs[i]; handle_ray(ray_idx as usize); } + } - self.lanes[task.lane].idxs.truncate(task_range.0); + /// Pops the next task off the stack, and executes the provided closure for + /// each ray index in the task. + #[inline(always)] + pub fn pop_do_next_task(&mut self, handle_ray: F) + where + F: FnMut(usize), + { + self.do_next_task(handle_ray); + self.pop_task(); } /// Pops the next task off the stack, executes the provided closure for diff --git a/src/surface/triangle_mesh.rs b/src/surface/triangle_mesh.rs index 1b54232..468edf7 100644 --- a/src/surface/triangle_mesh.rs +++ b/src/surface/triangle_mesh.rs @@ -130,153 +130,154 @@ impl<'a> Surface for TriangleMesh<'a> { Matrix4x4::new() }; - self.accel.traverse( - rays, - ray_stack, - self.indices, - |tri_indices, rays, ray_stack| { - // For static triangles with static transforms, cache them. - let is_cached = self.time_sample_count == 1 && space.len() <= 1; - let mut tri = if is_cached { - let tri = ( - self.vertices[tri_indices.0 as usize], - self.vertices[tri_indices.1 as usize], - self.vertices[tri_indices.2 as usize], - ); - if space.is_empty() { - tri - } else { - ( - tri.0 * static_mat_space, - tri.1 * static_mat_space, - tri.2 * static_mat_space, - ) - } - } else { - unsafe { std::mem::uninitialized() } - }; + self.accel + .traverse(rays, ray_stack, |idx_range, rays, ray_stack| { + for tri_idx in idx_range { + let tri_indices = self.indices[tri_idx]; - // Test each ray against the current triangle. - ray_stack.pop_do_next_task(|ray_idx| { - let ray_idx = ray_idx as usize; - let ray_time = rays.time(ray_idx); - - // Get triangle if necessary - if !is_cached { - tri = if self.time_sample_count == 1 { - // No deformation motion blur, so fast-path it. + // For static triangles with static transforms, cache them. + let is_cached = self.time_sample_count == 1 && space.len() <= 1; + let mut tri = if is_cached { + let tri = ( + self.vertices[tri_indices.0 as usize], + self.vertices[tri_indices.1 as usize], + self.vertices[tri_indices.2 as usize], + ); + if space.is_empty() { + tri + } else { ( - self.vertices[tri_indices.0 as usize], - self.vertices[tri_indices.1 as usize], - self.vertices[tri_indices.2 as usize], + tri.0 * static_mat_space, + tri.1 * static_mat_space, + tri.2 * static_mat_space, ) - } else { - // Deformation motion blur, need to interpolate. - let p0_slice = &self.vertices[(tri_indices.0 as usize - * self.time_sample_count) - ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; - let p1_slice = &self.vertices[(tri_indices.1 as usize - * self.time_sample_count) - ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; - let p2_slice = &self.vertices[(tri_indices.2 as usize - * self.time_sample_count) - ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; - - let p0 = lerp_slice(p0_slice, ray_time); - let p1 = lerp_slice(p1_slice, ray_time); - let p2 = lerp_slice(p2_slice, ray_time); - - (p0, p1, p2) - }; - } - - // Transform triangle if necessary, and get transform space. - let mat_space = if !space.is_empty() { - if space.len() > 1 { - // Per-ray transform, for motion blur - let mat_space = lerp_slice(space, ray_time).inverse(); - tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space); - mat_space - } else { - // Same transform for all rays - if !is_cached { - tri = ( - tri.0 * static_mat_space, - tri.1 * static_mat_space, - tri.2 * static_mat_space, - ); - } - static_mat_space } } else { - // No transforms - Matrix4x4::new() + unsafe { std::mem::uninitialized() } }; - // Test ray against triangle - if let Some((t, b0, b1, b2)) = triangle::intersect_ray( - rays.orig(ray_idx), - rays.dir(ray_idx), - rays.max_t(ray_idx), - tri, - ) { - if rays.is_occlusion(ray_idx) { - isects[ray_idx] = SurfaceIntersection::Occlude; - rays.mark_done(ray_idx); - } else { - // Calculate intersection point and error magnitudes - let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2)); + // Test each ray against the current triangle. + ray_stack.do_next_task(|ray_idx| { + let ray_idx = ray_idx as usize; + let ray_time = rays.time(ray_idx); - // Calculate geometric surface normal - let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); - - // Calculate interpolated surface normal, if any - let shading_normal = if let Some(normals) = self.normals { - let n0_slice = &normals[(tri_indices.0 as usize + // Get triangle if necessary + if !is_cached { + tri = if self.time_sample_count == 1 { + // No deformation motion blur, so fast-path it. + ( + self.vertices[tri_indices.0 as usize], + self.vertices[tri_indices.1 as usize], + self.vertices[tri_indices.2 as usize], + ) + } else { + // Deformation motion blur, need to interpolate. + let p0_slice = &self.vertices[(tri_indices.0 as usize * self.time_sample_count) ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; - let n1_slice = &normals[(tri_indices.1 as usize + let p1_slice = &self.vertices[(tri_indices.1 as usize * self.time_sample_count) ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; - let n2_slice = &normals[(tri_indices.2 as usize + let p2_slice = &self.vertices[(tri_indices.2 as usize * self.time_sample_count) ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; - let n0 = lerp_slice(n0_slice, ray_time).normalized(); - let n1 = lerp_slice(n1_slice, ray_time).normalized(); - let n2 = lerp_slice(n2_slice, ray_time).normalized(); + let p0 = lerp_slice(p0_slice, ray_time); + let p1 = lerp_slice(p1_slice, ray_time); + let p2 = lerp_slice(p2_slice, ray_time); - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; - if dot(s_nor, geo_normal) >= 0.0 { - s_nor - } else { - -s_nor - } - } else { - geo_normal + (p0, p1, p2) }; - - let intersection_data = SurfaceIntersectionData { - incoming: rays.dir(ray_idx), - t: t, - pos: pos, - pos_err: pos_err, - nor: shading_normal, - nor_g: geo_normal, - local_space: mat_space, - sample_pdf: 0.0, - }; - - // Fill in intersection data - isects[ray_idx] = SurfaceIntersection::Hit { - intersection_data: intersection_data, - closure: shader.shade(&intersection_data, ray_time), - }; - rays.set_max_t(ray_idx, t); } - } - }); - }, - ); + + // Transform triangle if necessary, and get transform space. + let mat_space = if !space.is_empty() { + if space.len() > 1 { + // Per-ray transform, for motion blur + let mat_space = lerp_slice(space, ray_time).inverse(); + tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space); + mat_space + } else { + // Same transform for all rays + if !is_cached { + tri = ( + tri.0 * static_mat_space, + tri.1 * static_mat_space, + tri.2 * static_mat_space, + ); + } + static_mat_space + } + } else { + // No transforms + Matrix4x4::new() + }; + + // Test ray against triangle + if let Some((t, b0, b1, b2)) = triangle::intersect_ray( + rays.orig(ray_idx), + rays.dir(ray_idx), + rays.max_t(ray_idx), + tri, + ) { + if rays.is_occlusion(ray_idx) { + isects[ray_idx] = SurfaceIntersection::Occlude; + rays.mark_done(ray_idx); + } else { + // Calculate intersection point and error magnitudes + let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2)); + + // Calculate geometric surface normal + let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); + + // Calculate interpolated surface normal, if any + let shading_normal = if let Some(normals) = self.normals { + let n0_slice = &normals[(tri_indices.0 as usize + * self.time_sample_count) + ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; + let n1_slice = &normals[(tri_indices.1 as usize + * self.time_sample_count) + ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; + let n2_slice = &normals[(tri_indices.2 as usize + * self.time_sample_count) + ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; + + let n0 = lerp_slice(n0_slice, ray_time).normalized(); + let n1 = lerp_slice(n1_slice, ray_time).normalized(); + let n2 = lerp_slice(n2_slice, ray_time).normalized(); + + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; + if dot(s_nor, geo_normal) >= 0.0 { + s_nor + } else { + -s_nor + } + } else { + geo_normal + }; + + let intersection_data = SurfaceIntersectionData { + incoming: rays.dir(ray_idx), + t: t, + pos: pos, + pos_err: pos_err, + nor: shading_normal, + nor_g: geo_normal, + local_space: mat_space, + sample_pdf: 0.0, + }; + + // Fill in intersection data + isects[ray_idx] = SurfaceIntersection::Hit { + intersection_data: intersection_data, + closure: shader.shade(&intersection_data, ray_time), + }; + rays.set_max_t(ray_idx, t); + } + } + }); + } + ray_stack.pop_task(); + }); } } diff --git a/src/tracer.rs b/src/tracer.rs index e733cdd..7969d8d 100644 --- a/src/tracer.rs +++ b/src/tracer.rs @@ -85,11 +85,11 @@ impl<'a> TracerInner<'a> { rays: &mut RayBatch, ray_stack: &mut RayStack, ) { - assembly.object_accel.traverse( - rays, - ray_stack, - &assembly.instances[..], - |inst, rays, ray_stack| { + assembly + .object_accel + .traverse(rays, ray_stack, |idx_range, rays, ray_stack| { + let inst = &assembly.instances[idx_range.start]; + // Transform rays if needed if let Some((xstart, xend)) = inst.transform_indices { // Push transforms to stack @@ -98,12 +98,11 @@ impl<'a> TracerInner<'a> { // Do transforms // TODO: re-divide rays based on direction (maybe?). let xforms = self.xform_stack.top(); - ray_stack.pop_do_next_task_and_push_rays(2, |ray_idx| { + ray_stack.do_next_task(|ray_idx| { let t = rays.time(ray_idx); rays.update_local(ray_idx, &lerp_slice(xforms, t)); - (Bool4::new(true, true, false, false), 2) }); - ray_stack.push_lanes_to_tasks(&[0, 1]); + ray_stack.duplicate_next_task(); } // Trace rays @@ -142,8 +141,7 @@ impl<'a> TracerInner<'a> { }); } } - }, - ); + }); } fn trace_object<'b>(