diff --git a/src/surface/micropoly_batch.rs b/src/surface/micropoly_batch.rs index 8bb9447..05beed2 100644 --- a/src/surface/micropoly_batch.rs +++ b/src/surface/micropoly_batch.rs @@ -1,5 +1,7 @@ #![allow(dead_code)] +use std::collections::HashMap; + use mem_arena::MemArena; use crate::{ @@ -8,12 +10,14 @@ use crate::{ boundable::Boundable, lerp::lerp_slice, math::{cross, dot, Matrix4x4, Normal, Point}, - ray::{RayBatch, RayStack, RayTask} - shading::surface_closure::SurfaceClosure, + ray::{RayBatch, RayStack}, + shading::{SimpleSurfaceShader, SurfaceShader}, }; use super::{triangle, SurfaceIntersection, SurfaceIntersectionData}; +const MAX_LEAF_TRIANGLE_COUNT: usize = 3; + /// This is the core surface primitive for rendering: all surfaces are /// ultimately processed into pre-shaded micropolygon batches for rendering. /// @@ -29,7 +33,7 @@ pub struct MicropolyBatch<'a> { normals: &'a [Normal], // Per-vertex shading data. - vertex_closures: &'a [SurfaceClosure], + vertex_closures: &'a [SimpleSurfaceShader], // Micro-triangle indices. Each element of the tuple specifies the index // of a vertex, which indexes into all of the arrays above. @@ -42,60 +46,100 @@ pub struct MicropolyBatch<'a> { impl<'a> MicropolyBatch<'a> { pub fn from_verts_and_indices<'b>( arena: &'b MemArena, - geo_time_sample_count: usize, - verts: &[Point], - vert_normals: &[Normal], - vert_closures: &[SurfaceClosure], - triangles: &[(u32, u32, u32)], + verts: &[Vec], + vert_normals: &[Vec], + tri_indices: &[(usize, usize, usize)], ) -> MicropolyBatch<'b> { + let vert_count = verts[0].len(); + let time_sample_count = verts.len(); + + // Copy verts over to a contiguous area of memory, reorganizing them + // so that each vertices' time samples are contiguous in memory. + let vertices = { + let vertices = arena.alloc_array_uninitialized(vert_count * time_sample_count); + + for vi in 0..vert_count { + for ti in 0..time_sample_count { + unsafe { + *vertices[(vi * time_sample_count) + ti].as_mut_ptr() = verts[ti][vi]; + } + } + } + + unsafe { std::mem::transmute(vertices) } + }; + + // Copy vertex normals, if any, organizing them the same as vertices + // above. + let normals = { + let normals = arena.alloc_array_uninitialized(vert_count * time_sample_count); + + for vi in 0..vert_count { + for ti in 0..time_sample_count { + unsafe { + *normals[(vi * time_sample_count) + ti].as_mut_ptr() = vert_normals[ti][vi]; + } + } + } + + unsafe { std::mem::transmute(&normals[..]) } + }; + + // Copy triangle vertex indices over, appending the triangle index itself to the tuple + let indices: &mut [(u32, u32, u32)] = { + let indices = arena.alloc_array_uninitialized(tri_indices.len()); + for (i, tri_i) in tri_indices.iter().enumerate() { + unsafe { + *indices[i].as_mut_ptr() = (tri_i.0 as u32, tri_i.2 as u32, tri_i.1 as u32); + } + } + unsafe { std::mem::transmute(indices) } + }; + // Create bounds array for use during BVH construction - let bounds = { - let mut bounds = Vec::with_capacity(triangles.len() * geo_time_sample_count); - for tri in triangles { - for ti in 0..geo_time_sample_count { - let p0 = verts[(tri.0 as usize * geo_time_sample_count) + ti]; - let p1 = verts[(tri.1 as usize * geo_time_sample_count) + ti]; - let p2 = verts[(tri.2 as usize * geo_time_sample_count) + ti]; + let (bounds, bounds_map) = { + let mut bounds = Vec::with_capacity(indices.len() * time_sample_count); + let mut bounds_map = HashMap::new(); + + for tri in tri_indices { + let start = bounds.len(); + for ti in 0..time_sample_count { + let p0 = verts[ti][tri.0]; + let p1 = verts[ti][tri.1]; + let p2 = verts[ti][tri.2]; let minimum = p0.min(p1.min(p2)); let maximum = p0.max(p1.max(p2)); bounds.push(BBox::from_points(minimum, maximum)); } + let end = bounds.len(); + bounds_map.insert((tri.0 as u32, tri.1 as u32, tri.2 as u32), (start, end)); } - bounds + (bounds, bounds_map) }; - // Create an array of triangle indices for use during the BVH build. - let mut tmp_indices: Vec<_> = (0u32..(triangles.len() as u32)).collect(); - // Build BVH - let accel = BVH4::from_objects(arena, &mut tmp_indices[..], 3, |index| { - &bounds[(*index as usize * geo_time_sample_count) - ..((*index as usize + 1) * geo_time_sample_count)] + let accel = BVH4::from_objects(arena, &mut indices[..], MAX_LEAF_TRIANGLE_COUNT, |tri| { + let (start, end) = bounds_map[tri]; + &bounds[start..end] }); - // Copy triangle vertex indices over in the post-bvh-build order. - let indices = { - let indices = unsafe { arena.alloc_array_uninitialized(triangles.len()) }; - for (i, tmp_i) in tmp_indices.iter().enumerate() { - indices[i] = triangles[*tmp_i as usize]; - } - indices - }; - MicropolyBatch { - time_sample_count: geo_time_sample_count, - vertices: arena.copy_slice(verts), - normals: arena.copy_slice(vert_normals), - - vertex_closures: arena.copy_slice(vert_closures), - + time_sample_count: time_sample_count, + vertices: vertices, + normals: normals, + vertex_closures: &[], indices: indices, - accel: accel, } } } +impl<'a> Boundable for MicropolyBatch<'a> { + fn bounds(&self) -> &[BBox] { + self.accel.bounds() + } +} + impl<'a> MicropolyBatch<'a> { fn intersect_rays( &self, @@ -106,156 +150,195 @@ impl<'a> MicropolyBatch<'a> { ) { // Precalculate transform for non-motion blur cases let static_mat_space = if space.len() == 1 { - space[0].inverse() + lerp_slice(space, 0.0).inverse() } else { Matrix4x4::new() }; self.accel - .traverse(rays, ray_stack, self.indices, |tri_indices, rs| { - // For static triangles with static transforms, cache them. - let is_cached = self.time_sample_count == 1 && space.len() <= 1; - let mut tri = if is_cached { - let tri = ( - self.vertices[tri_indices.0 as usize], - self.vertices[tri_indices.1 as usize], - self.vertices[tri_indices.2 as usize], - ); - if space.is_empty() { - tri - } else { - ( - tri.0 * static_mat_space, - tri.1 * static_mat_space, - tri.2 * static_mat_space, - ) - } - } else { - unsafe { std::mem::uninitialized() } - }; + .traverse(rays, ray_stack, |idx_range, rays, ray_stack| { + let tri_count = idx_range.end - idx_range.start; - // Test each ray against the current triangle. - for r in rs { - let wr = &wrays[r.id as usize]; + // Build the triangle cache if we can! + let is_cached = ray_stack.ray_count_in_next_task() >= tri_count + && self.time_sample_count == 1 + && space.len() <= 1; + let mut tri_cache = [std::mem::MaybeUninit::uninit(); MAX_LEAF_TRIANGLE_COUNT]; + if is_cached { + for tri_idx in idx_range.clone() { + let i = tri_idx - idx_range.start; + let tri_indices = self.indices[tri_idx]; - // Get triangle if necessary - if !is_cached { - tri = if self.time_sample_count == 1 { - // No deformation motion blur, so fast-path it. - ( + // For static triangles with static transforms, cache them. + unsafe { + *tri_cache[i].as_mut_ptr() = ( self.vertices[tri_indices.0 as usize], self.vertices[tri_indices.1 as usize], self.vertices[tri_indices.2 as usize], - ) - } else { - // Deformation motion blur, need to interpolate. - let p0_slice = &self.vertices[(tri_indices.0 as usize - * self.time_sample_count) - ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; - let p1_slice = &self.vertices[(tri_indices.1 as usize - * self.time_sample_count) - ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; - let p2_slice = &self.vertices[(tri_indices.2 as usize - * self.time_sample_count) - ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; - - let p0 = lerp_slice(p0_slice, wr.time); - let p1 = lerp_slice(p1_slice, wr.time); - let p2 = lerp_slice(p2_slice, wr.time); - - (p0, p1, p2) - }; - } - - // Transform triangle if necessary, and get transform space. - let mat_space = if !space.is_empty() { - if space.len() > 1 { - // Per-ray transform, for motion blur - let mat_space = lerp_slice(space, wr.time).inverse(); - tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space); - mat_space - } else { - // Same transform for all rays - if !is_cached { - tri = ( - tri.0 * static_mat_space, - tri.1 * static_mat_space, - tri.2 * static_mat_space, - ); - } - static_mat_space - } - } else { - // No transforms - Matrix4x4::new() - }; - - // Test ray against triangle - if let Some((t, b0, b1, b2)) = triangle::intersect_ray(wr, tri) { - if t < r.max_t { - if r.is_occlusion() { - isects[r.id as usize] = SurfaceIntersection::Occlude; - r.mark_done(); - } else { - // Calculate intersection point and error magnitudes - let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2)); - - // Calculate geometric surface normal - let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); - - // Calculate interpolated surface normal - let shading_normal = { - let n0_slice = &self.normals[(tri_indices.0 as usize - * self.time_sample_count) - ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; - let n1_slice = &self.normals[(tri_indices.1 as usize - * self.time_sample_count) - ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; - let n2_slice = &self.normals[(tri_indices.2 as usize - * self.time_sample_count) - ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; - - let n0 = lerp_slice(n0_slice, wr.time).normalized(); - let n1 = lerp_slice(n1_slice, wr.time).normalized(); - let n2 = lerp_slice(n2_slice, wr.time).normalized(); - - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; - if dot(s_nor, geo_normal) >= 0.0 { - s_nor - } else { - -s_nor - } - }; - - // Calculate surface closure - // TODO: use interpolation between the vertices - let surface_closure = self.vertex_closures[tri_indices.0 as usize]; - - // Fill in intersection data - isects[r.id as usize] = SurfaceIntersection::Hit { - intersection_data: SurfaceIntersectionData { - incoming: wr.dir, - t: t, - pos: pos, - pos_err: pos_err, - nor: shading_normal, - nor_g: geo_normal, - local_space: mat_space, - sample_pdf: 0.0, - }, - closure: surface_closure, - }; - r.max_t = t; + ); + if !space.is_empty() { + (*tri_cache[i].as_mut_ptr()).0 = + (*tri_cache[i].as_mut_ptr()).0 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).1 = + (*tri_cache[i].as_mut_ptr()).1 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).2 = + (*tri_cache[i].as_mut_ptr()).2 * static_mat_space; } } } } + + // Test each ray against the triangles. + ray_stack.do_next_task(|ray_idx| { + let ray_idx = ray_idx as usize; + + if rays.is_done(ray_idx) { + return; + } + + let ray_time = rays.time(ray_idx); + + // Calculate the ray space, if necessary. + let mat_space = if space.len() > 1 { + // Per-ray transform, for motion blur + lerp_slice(space, ray_time).inverse() + } else { + static_mat_space + }; + + // Iterate through the triangles and test the ray against them. + let mut non_shadow_hit = false; + let mut hit_tri = std::mem::MaybeUninit::uninit(); + let mut hit_tri_indices = std::mem::MaybeUninit::uninit(); + let mut hit_tri_data = std::mem::MaybeUninit::uninit(); + let ray_pre = triangle::RayTriPrecompute::new(rays.dir(ray_idx)); + for tri_idx in idx_range.clone() { + let tri_indices = self.indices[tri_idx]; + + // Get triangle if necessary + let tri = if is_cached { + let i = tri_idx - idx_range.start; + unsafe { tri_cache[i].assume_init() } + } else { + let mut tri = if self.time_sample_count == 1 { + // No deformation motion blur, so fast-path it. + ( + self.vertices[tri_indices.0 as usize], + self.vertices[tri_indices.1 as usize], + self.vertices[tri_indices.2 as usize], + ) + } else { + // Deformation motion blur, need to interpolate. + let p0_slice = &self.vertices[(tri_indices.0 as usize + * self.time_sample_count) + ..((tri_indices.0 as usize + 1) * self.time_sample_count)]; + let p1_slice = &self.vertices[(tri_indices.1 as usize + * self.time_sample_count) + ..((tri_indices.1 as usize + 1) * self.time_sample_count)]; + let p2_slice = &self.vertices[(tri_indices.2 as usize + * self.time_sample_count) + ..((tri_indices.2 as usize + 1) * self.time_sample_count)]; + + let p0 = lerp_slice(p0_slice, ray_time); + let p1 = lerp_slice(p1_slice, ray_time); + let p2 = lerp_slice(p2_slice, ray_time); + + (p0, p1, p2) + }; + + if !space.is_empty() { + tri.0 = tri.0 * mat_space; + tri.1 = tri.1 * mat_space; + tri.2 = tri.2 * mat_space; + } + + tri + }; + + // Test ray against triangle + if let Some((t, b0, b1, b2)) = triangle::intersect_ray( + rays.orig(ray_idx), + ray_pre, + rays.max_t(ray_idx), + tri, + ) { + if rays.is_occlusion(ray_idx) { + isects[ray_idx] = SurfaceIntersection::Occlude; + rays.mark_done(ray_idx); + break; + } else { + non_shadow_hit = true; + rays.set_max_t(ray_idx, t); + unsafe { + *hit_tri.as_mut_ptr() = tri; + *hit_tri_indices.as_mut_ptr() = tri_indices; + *hit_tri_data.as_mut_ptr() = (t, b0, b1, b2); + } + } + } + } + + // Calculate intersection data if necessary. + if non_shadow_hit { + let hit_tri = unsafe { hit_tri.assume_init() }; + let hit_tri_indices = unsafe { hit_tri_indices.assume_init() }; + let (t, b0, b1, b2) = unsafe { hit_tri_data.assume_init() }; + + // Calculate intersection point and error magnitudes + let (pos, pos_err) = triangle::surface_point(hit_tri, (b0, b1, b2)); + + // Calculate geometric surface normal + let geo_normal = + cross(hit_tri.0 - hit_tri.1, hit_tri.0 - hit_tri.2).into_normal(); + + // Calculate interpolated surface normal + let shading_normal = { + let n0_slice = &self.normals[(hit_tri_indices.0 as usize + * self.time_sample_count) + ..((hit_tri_indices.0 as usize + 1) * self.time_sample_count)]; + let n1_slice = &self.normals[(hit_tri_indices.1 as usize + * self.time_sample_count) + ..((hit_tri_indices.1 as usize + 1) * self.time_sample_count)]; + let n2_slice = &self.normals[(hit_tri_indices.2 as usize + * self.time_sample_count) + ..((hit_tri_indices.2 as usize + 1) * self.time_sample_count)]; + + let n0 = lerp_slice(n0_slice, ray_time).normalized(); + let n1 = lerp_slice(n1_slice, ray_time).normalized(); + let n2 = lerp_slice(n2_slice, ray_time).normalized(); + + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; + if dot(s_nor, geo_normal) >= 0.0 { + s_nor + } else { + -s_nor + } + }; + + // Calculate interpolated surface closure. + // TODO: actually interpolate. + let closure = self.vertex_closures + [hit_tri_indices.0 as usize * self.time_sample_count]; + + let intersection_data = SurfaceIntersectionData { + incoming: rays.dir(ray_idx), + t: t, + pos: pos, + pos_err: pos_err, + nor: shading_normal, + nor_g: geo_normal, + local_space: mat_space, + sample_pdf: 0.0, + }; + + // Fill in intersection data + isects[ray_idx] = SurfaceIntersection::Hit { + intersection_data: intersection_data, + closure: closure.shade(&intersection_data, ray_time), + }; + } + }); + ray_stack.pop_task(); }); } } - -impl<'a> Boundable for MicropolyBatch<'a> { - fn bounds(&self) -> &[BBox] { - self.accel.bounds() - } -} diff --git a/src/surface/mod.rs b/src/surface/mod.rs index 2f90223..956cb25 100644 --- a/src/surface/mod.rs +++ b/src/surface/mod.rs @@ -1,6 +1,7 @@ #![allow(dead_code)] // pub mod micropoly_batch; +pub mod micropoly_batch; pub mod triangle; pub mod triangle_mesh;