From 09daf617ef6e662cfa09e55b27757e9d2590bcf5 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sat, 1 Jul 2017 15:08:05 -0700 Subject: [PATCH] Implemented a non-SIMD BVH4. Perf appears to be identical to BVH. --- src/accel/bvh4.rs | 330 ++++++++++++++++++++++++++++++++ src/accel/mod.rs | 2 + src/main.rs | 4 +- src/scene/assembly.rs | 8 +- src/surface/triangle_mesh.rs | 6 +- sub_crates/bvh_order/src/lib.rs | 13 +- 6 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 src/accel/bvh4.rs diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs new file mode 100644 index 0000000..50247e3 --- /dev/null +++ b/src/accel/bvh4.rs @@ -0,0 +1,330 @@ +#![allow(dead_code)] + +use std; + +use mem_arena::MemArena; + +use algorithm::partition; +use bbox::BBox; +use boundable::Boundable; +use lerp::lerp_slice; +use ray::AccelRay; +use timer::Timer; + +use bvh_order::{TRAVERSAL_TABLE, SplitAxes, calc_traversal_code}; +use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}; +use super::ACCEL_TRAV_TIME; + + +#[derive(Copy, Clone, Debug)] +pub struct BVH4<'a> { + root: Option<&'a BVH4Node<'a>>, + depth: usize, +} + +#[derive(Copy, Clone, Debug)] +pub enum BVH4Node<'a> { + Inner { + traversal_code: u8, + bounds_start: &'a BBox, + bounds_len: u16, + children: &'a [BVH4Node<'a>], + }, + + Leaf { + bounds_start: &'a BBox, + bounds_len: u16, + object_range: (usize, usize), + }, +} + +impl<'a> BVH4<'a> { + pub fn from_objects<'b, T, F>( + arena: &'a MemArena, + objects: &mut [T], + objects_per_leaf: usize, + bounder: F, + ) -> BVH4<'a> + where + F: 'b + Fn(&T) -> &'b [BBox], + { + if objects.len() == 0 { + BVH4 { + root: None, + depth: 0, + } + } else { + let base = BVHBase::from_objects(objects, objects_per_leaf, bounder); + + let mut root = unsafe { arena.alloc_uninitialized::() }; + BVH4::construct_from_base(arena, &base, base.root_node_index(), root); + BVH4 { + root: Some(root), + depth: base.depth, + } + } + } + + pub fn tree_depth(&self) -> usize { + self.depth + } + + pub fn traverse(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F) + where + F: FnMut(&T, &mut [AccelRay]), + { + if self.root.is_none() { + return; + } + + let mut timer = Timer::new(); + let mut trav_time: f64 = 0.0; + + let traversal_table = { + let ray_sign_is_neg = [ + rays[0].dir_inv.x() < 0.0, + rays[0].dir_inv.y() < 0.0, + rays[0].dir_inv.z() < 0.0, + ]; + let ray_code = ray_sign_is_neg[0] as usize + ((ray_sign_is_neg[1] as usize) << 1) + + ((ray_sign_is_neg[2] as usize) << 2); + &TRAVERSAL_TABLE[ray_code] + }; + + // +2 of max depth for root and last child + let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2]; + let mut ray_i_stack = [rays.len(); (BVH_MAX_DEPTH * 3) + 2]; + let mut stack_ptr = 1; + + while stack_ptr > 0 { + match node_stack[stack_ptr] { + &BVH4Node::Inner { + traversal_code, + bounds_start, + bounds_len, + children, + } => { + let bounds = + unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) }; + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); + if part > 0 { + let order_code = traversal_table[traversal_code as usize]; + match children.len() { + 4 => { + let i4 = ((order_code >> 6) & 0b11) as usize; + let i3 = ((order_code >> 4) & 0b11) as usize; + let i2 = ((order_code >> 2) & 0b11) as usize; + let i1 = (order_code & 0b11) as usize; + + ray_i_stack[stack_ptr] = part; + ray_i_stack[stack_ptr + 1] = part; + ray_i_stack[stack_ptr + 2] = part; + ray_i_stack[stack_ptr + 3] = part; + + node_stack[stack_ptr] = &children[i4]; + node_stack[stack_ptr + 1] = &children[i3]; + node_stack[stack_ptr + 2] = &children[i2]; + node_stack[stack_ptr + 3] = &children[i1]; + + stack_ptr += 3; + } + 3 => { + let i3 = ((order_code >> 4) & 0b11) as usize; + let i2 = ((order_code >> 2) & 0b11) as usize; + let i1 = (order_code & 0b11) as usize; + + ray_i_stack[stack_ptr] = part; + ray_i_stack[stack_ptr + 1] = part; + ray_i_stack[stack_ptr + 2] = part; + + node_stack[stack_ptr] = &children[i3]; + node_stack[stack_ptr + 1] = &children[i2]; + node_stack[stack_ptr + 2] = &children[i1]; + + stack_ptr += 2; + } + 2 => { + let i2 = ((order_code >> 2) & 0b11) as usize; + let i1 = (order_code & 0b11) as usize; + + ray_i_stack[stack_ptr] = part; + ray_i_stack[stack_ptr + 1] = part; + + node_stack[stack_ptr] = &children[i2]; + node_stack[stack_ptr + 1] = &children[i1]; + + stack_ptr += 1; + } + _ => unreachable!(), + } + } else { + stack_ptr -= 1; + } + } + + &BVH4Node::Leaf { + object_range, + bounds_start, + bounds_len, + } => { + let bounds = + unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) }; + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); + + trav_time += timer.tick() as f64; + + if part > 0 { + for obj in &objects[object_range.0..object_range.1] { + obj_ray_test(obj, &mut rays[..part]); + } + } + + timer.tick(); + + stack_ptr -= 1; + } + } + } + + trav_time += timer.tick() as f64; + ACCEL_TRAV_TIME.with(|att| { + let v = att.get(); + att.set(v + trav_time); + }); + } + + fn construct_from_base( + arena: &'a MemArena, + base: &BVHBase, + node_index: usize, + node_mem: &mut BVH4Node<'a>, + ) { + match &base.nodes[node_index] { + &BVHBaseNode::Internal { + bounds_range, + children_indices, + split_axis, + } => { + let child_l = &base.nodes[children_indices.0]; + let child_r = &base.nodes[children_indices.1]; + + // Prepare convenient access to the stuff we need. + let child_count: usize; + let child_indices: [usize; 4]; + let split_info: SplitAxes; + match child_l { + &BVHBaseNode::Internal { + children_indices: i_l, + split_axis: s_l, + .. + } => { + match child_r { + &BVHBaseNode::Internal { + children_indices: i_r, + split_axis: s_r, + .. + } => { + // Four nodes + child_count = 4; + child_indices = [i_l.0, i_l.1, i_r.0, i_r.1]; + split_info = SplitAxes::Full((split_axis, s_l, s_r)); + } + &BVHBaseNode::Leaf { .. } => { + // Three nodes with left split + child_count = 3; + child_indices = [i_l.0, i_l.1, children_indices.1, 0]; + split_info = SplitAxes::Left((split_axis, s_l)); + } + } + } + &BVHBaseNode::Leaf { .. } => { + match child_r { + &BVHBaseNode::Internal { + children_indices: i_r, + split_axis: s_r, + .. + } => { + // Three nodes with right split + child_count = 3; + child_indices = [children_indices.0, i_r.0, i_r.1, 0]; + split_info = SplitAxes::Right((split_axis, s_r)); + } + &BVHBaseNode::Leaf { .. } => { + // Two nodes + child_count = 2; + child_indices = [children_indices.0, children_indices.1, 0, 0]; + split_info = SplitAxes::TopOnly(split_axis); + } + } + } + } + + // Copy bounds + let bounds = arena.copy_slice_with_alignment( + &base.bounds[bounds_range.0..bounds_range.1], + 32, + ); + + // Build children + let mut children_mem = unsafe { + arena.alloc_array_uninitialized_with_alignment::(child_count, 32) + }; + for i in 0..child_count { + BVH4::construct_from_base(arena, base, child_indices[i], &mut children_mem[i]); + } + + // Fill in node + *node_mem = BVH4Node::Inner { + traversal_code: calc_traversal_code(split_info), + bounds_start: &bounds[0], + bounds_len: bounds.len() as u16, + children: children_mem, + }; + } + + &BVHBaseNode::Leaf { + bounds_range, + object_range, + } => { + let bounds = arena.copy_slice(&base.bounds[bounds_range.0..bounds_range.1]); + + *node_mem = BVH4Node::Leaf { + bounds_start: &bounds[0], + bounds_len: bounds.len() as u16, + object_range: object_range, + }; + } + } + } +} + +lazy_static! { + static ref DEGENERATE_BOUNDS: [BBox; 1] = [BBox::new()]; +} + +impl<'a> Boundable for BVH4<'a> { + fn bounds<'b>(&'b self) -> &'b [BBox] { + match self.root { + None => &DEGENERATE_BOUNDS[..], + Some(root) => { + match root { + &BVH4Node::Inner { + bounds_start, + bounds_len, + .. + } => unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) }, + + &BVH4Node::Leaf { + bounds_start, + bounds_len, + .. + } => unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) }, + } + } + } + } +} diff --git a/src/accel/mod.rs b/src/accel/mod.rs index 9de5a68..1f54ad7 100644 --- a/src/accel/mod.rs +++ b/src/accel/mod.rs @@ -1,5 +1,6 @@ mod bvh_base; mod bvh; +mod bvh4; mod light_array; mod light_tree; mod objects_split; @@ -10,6 +11,7 @@ use math::{Vector, Point, Normal}; use shading::surface_closure::SurfaceClosure; pub use self::bvh::{BVH, BVHNode}; +pub use self::bvh4::{BVH4, BVH4Node}; pub use self::light_tree::LightTree; pub use self::light_array::LightArray; diff --git a/src/main.rs b/src/main.rs index 8e4defa..17c0301 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +extern crate bvh_order; extern crate color as color_util; extern crate float4; extern crate halton; @@ -62,7 +63,7 @@ use ray::{Ray, AccelRay}; use surface::SurfaceIntersection; use renderer::LightPath; use bbox::BBox; -use accel::BVHNode; +use accel::{BVHNode, BVH4Node}; use timer::Timer; @@ -181,6 +182,7 @@ fn main() { println!("LightPath size: {} bytes", mem::size_of::()); println!("BBox size: {} bytes", mem::size_of::()); println!("BVHNode size: {} bytes", mem::size_of::()); + println!("BVH4Node size: {} bytes", mem::size_of::()); return; } diff --git a/src/scene/assembly.rs b/src/scene/assembly.rs index 3760ac1..748b5c6 100644 --- a/src/scene/assembly.rs +++ b/src/scene/assembly.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use mem_arena::MemArena; use accel::{LightAccel, LightTree}; -use accel::BVH; +use accel::BVH4; use bbox::{BBox, transform_bbox_slice_from}; use boundable::Boundable; use color::SpectralSample; @@ -28,7 +28,7 @@ pub struct Assembly<'a> { pub assemblies: &'a [Assembly<'a>], // Object accel - pub object_accel: BVH<'a>, + pub object_accel: BVH4<'a>, // Light accel pub light_accel: LightTree<'a>, @@ -251,7 +251,7 @@ impl<'a> AssemblyBuilder<'a> { let (bis, bbs) = self.instance_bounds(); // Build object accel - let object_accel = BVH::from_objects(self.arena, &mut self.instances[..], 1, |inst| { + let object_accel = BVH4::from_objects(self.arena, &mut self.instances[..], 1, |inst| { &bbs[bis[inst.id]..bis[inst.id + 1]] }); @@ -311,7 +311,7 @@ impl<'a> AssemblyBuilder<'a> { /// Returns a pair of vectors with the bounds of all instances. - /// This is used for building the assembly's BVH. + /// This is used for building the assembly's BVH4. fn instance_bounds(&self) -> (Vec, Vec) { let mut indices = vec![0]; let mut bounds = Vec::new(); diff --git a/src/surface/triangle_mesh.rs b/src/surface/triangle_mesh.rs index 2618f0a..168cc40 100644 --- a/src/surface/triangle_mesh.rs +++ b/src/surface/triangle_mesh.rs @@ -2,7 +2,7 @@ use mem_arena::MemArena; -use accel::BVH; +use accel::BVH4; use bbox::BBox; use boundable::Boundable; use color::XYZ; @@ -21,7 +21,7 @@ pub struct TriangleMesh<'a> { time_samples: usize, geo: &'a [(Point, Point, Point)], indices: &'a [usize], - accel: BVH<'a>, + accel: BVH4<'a>, } impl<'a> TriangleMesh<'a> { @@ -46,7 +46,7 @@ impl<'a> TriangleMesh<'a> { bounds }; - let accel = BVH::from_objects(arena, &mut indices[..], 3, |tri_i| { + let accel = BVH4::from_objects(arena, &mut indices[..], 3, |tri_i| { &bounds[*tri_i..(*tri_i + time_samples)] }); diff --git a/sub_crates/bvh_order/src/lib.rs b/sub_crates/bvh_order/src/lib.rs index fc52a5e..b1b66e6 100644 --- a/sub_crates/bvh_order/src/lib.rs +++ b/sub_crates/bvh_order/src/lib.rs @@ -1,19 +1,21 @@ #![allow(dead_code)] -// Include the file generated by the build.rs script +// Include TRAVERSAL_TABLE generated by the build.rs script include!(concat!(env!("OUT_DIR"), "/table_inc.rs")); /// Represents the split axes of the BVH2 node(s) that a BVH4 node was created /// from. /// -/// * `Full` means four nodes from three splits: top, left, and right. +/// * `Full` is four nodes from three splits: top, left, and right. /// * `Left` is three nodes from two splits: top and left. /// * `Right` is three nodes from two splits: top and right. /// * `TopOnly` is two nodes from one split (in other words, the BVH4 node is /// identical to the single BVH2 node that it was created from). /// -/// Left in this case means the node whose coordinate on the top split-axis is -/// lower. For example, if the top split is on the x axis, then `left.x <= right.x`. +/// The left node of a split is the node whose coordinate on the top split-axis +/// is lower. For example, if the top split is on the x axis, then `left.x <= right.x`. +/// +/// The values representing each axis are x = 0, y = 1, and z = 2. #[derive(Debug, Copy, Clone)] pub enum SplitAxes { Full((u8, u8, u8)), // top, left, right @@ -23,7 +25,8 @@ pub enum SplitAxes { } /// Calculates the traversal code for a BVH4 node based on the splits and -/// topology of its children. +/// topology of the BVH2 node(s) it was created from. +#[inline(always)] pub fn calc_traversal_code(split: SplitAxes) -> u8 { match split { SplitAxes::Full((top, left, right)) => top + (left * 3) + (right * 9),