diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs new file mode 100644 index 0000000..15eab8e --- /dev/null +++ b/src/accel/bvh4.rs @@ -0,0 +1,164 @@ +#![allow(dead_code)] + +use mem_arena::MemArena; + +use algorithm::partition; +use bbox::BBox; +use boundable::Boundable; +use lerp::lerp_slice; +use ray::AccelRay; + +use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}; + + +#[derive(Copy, Clone, Debug)] +pub struct BVH4<'a> { + root: Option<&'a BVH4Node<'a>>, + depth: usize, +} + +#[derive(Copy, Clone, Debug)] +enum BVH4Node<'a> { + Internal { + bounds: &'a [BBox], + children: (&'a BVH4Node<'a>, &'a BVH4Node<'a>), + split_axis: u8, + }, + + Leaf { + bounds: &'a [BBox], + object_range: (usize, usize), + }, +} + +impl<'a> BVH4<'a> { + pub fn from_objects<'b, T, F>(arena: &'a MemArena, + objects: &mut [T], + objects_per_leaf: usize, + bounder: F) + -> BVH4<'a> + where F: 'b + Fn(&T) -> &'b [BBox] + { + if objects.len() == 0 { + BVH4 { + root: None, + depth: 0, + } + } else { + let base = BVHBase::from_objects(objects, objects_per_leaf, bounder); + + BVH4 { + root: Some(BVH4::construct_from_base(arena, &base, base.root_node_index())), + depth: base.depth, + } + } + } + + pub fn tree_depth(&self) -> usize { + self.depth + } + + pub fn traverse(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F) + where F: FnMut(&T, &mut [AccelRay]) + { + match self.root { + None => {} + + Some(root) => { + // +2 of max depth for root and last child + let mut node_stack = [root; BVH_MAX_DEPTH + 2]; + let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; + let mut stack_ptr = 1; + + while stack_ptr > 0 { + match node_stack[stack_ptr] { + &BVH4Node::Internal { bounds, children, split_axis } => { + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); + if part > 0 { + node_stack[stack_ptr] = children.0; + node_stack[stack_ptr + 1] = children.1; + ray_i_stack[stack_ptr] = part; + ray_i_stack[stack_ptr + 1] = part; + if rays[0].dir_inv.get_n(split_axis as usize).is_sign_positive() { + node_stack.swap(stack_ptr, stack_ptr + 1); + } + stack_ptr += 1; + } else { + stack_ptr -= 1; + } + } + + &BVH4Node::Leaf { bounds, object_range } => { + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); + if part > 0 { + for obj in &objects[object_range.0..object_range.1] { + obj_ray_test(obj, &mut rays[..part]); + } + } + + stack_ptr -= 1; + } + } + } + } + } + } + + fn construct_from_base(arena: &'a MemArena, + base: &BVHBase, + node_index: usize) + -> &'a mut BVH4Node<'a> { + match &base.nodes[node_index] { + &BVHBaseNode::Internal { bounds_range, children_indices, split_axis } => { + let mut node = unsafe { arena.alloc_uninitialized::() }; + + let bounds = arena.copy_slice(&base.bounds[bounds_range.0..bounds_range.1]); + let child1 = BVH4::construct_from_base(arena, base, children_indices.0); + let child2 = BVH4::construct_from_base(arena, base, children_indices.1); + + *node = BVH4Node::Internal { + bounds: bounds, + children: (child1, child2), + split_axis: split_axis, + }; + + return node; + } + + &BVHBaseNode::Leaf { bounds_range, object_range } => { + let mut node = unsafe { arena.alloc_uninitialized::() }; + let bounds = arena.copy_slice(&base.bounds[bounds_range.0..bounds_range.1]); + + *node = BVH4Node::Leaf { + bounds: bounds, + object_range: object_range, + }; + + return node; + } + } + } +} + +lazy_static! { + static ref DEGENERATE_BOUNDS: [BBox; 1] = [BBox::new()]; +} + +impl<'a> Boundable for BVH4<'a> { + fn bounds<'b>(&'b self) -> &'b [BBox] { + match self.root { + None => &DEGENERATE_BOUNDS[..], + Some(root) => { + match root { + &BVH4Node::Internal { bounds, .. } => bounds, + + &BVH4Node::Leaf { bounds, .. } => bounds, + } + } + } + } +} diff --git a/src/accel/mod.rs b/src/accel/mod.rs index 4a5ff4a..3bde84d 100644 --- a/src/accel/mod.rs +++ b/src/accel/mod.rs @@ -1,5 +1,6 @@ mod bvh_base; mod bvh; +mod bvh4; mod light_array; mod light_tree; mod objects_split; @@ -8,6 +9,7 @@ use math::{Vector, Point, Normal}; use shading::surface_closure::SurfaceClosure; pub use self::bvh::BVH; +pub use self::bvh4::BVH4; pub use self::light_tree::LightTree; diff --git a/src/bbox4.rs b/src/bbox4.rs new file mode 100644 index 0000000..1c79470 --- /dev/null +++ b/src/bbox4.rs @@ -0,0 +1,121 @@ +#![allow(dead_code)] + +use std; +use std::ops::{BitOr, BitOrAssign}; + +use bbox::BBox; +use float4::{Float4, Bool4, v_min, v_max}; +use lerp::{lerp, Lerp}; +use ray::AccelRay; + + +const BBOX_MAXT_ADJUST: f32 = 1.00000024; + +/// A SIMD set of 4 3D axis-aligned bounding boxes. +#[derive(Debug, Copy, Clone)] +pub struct BBox4 { + pub min: (Float4, Float4, Float4), // xs, ys, zs + pub max: (Float4, Float4, Float4), // xs, ys, zs +} + +impl BBox4 { + /// Creates a degenerate BBox with +infinity min and -infinity max. + pub fn new() -> BBox4 { + BBox4 { + min: (Float4::splat(std::f32::INFINITY), + Float4::splat(std::f32::INFINITY), + Float4::splat(std::f32::INFINITY)), + max: (Float4::splat(std::f32::NEG_INFINITY), + Float4::splat(std::f32::NEG_INFINITY), + Float4::splat(std::f32::NEG_INFINITY)), + } + } + + /// Creates a BBox with min as the minimum extent and max as the maximum + /// extent. + pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 { + BBox4 { + min: (Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), + Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), + Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z())), + max: (Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()), + Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()), + Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z())), + } + } + + // Returns whether the given ray intersects with the bboxes. + pub fn intersect_accel_ray(&self, ray: &AccelRay) -> Bool4 { + // Convert ray to SIMD form + let ray4_o = + (Float4::splat(ray.orig.x()), Float4::splat(ray.orig.y()), Float4::splat(ray.orig.z())); + let ray4_dinv = (Float4::splat(ray.dir_inv.x()), + Float4::splat(ray.dir_inv.y()), + Float4::splat(ray.dir_inv.z())); + + // Calculate the plane intersections + let (xlos, xhis) = if ray.dir_inv.x() >= 0.0 { + ((self.min.0 - ray4_o.0) * ray4_dinv.0, (self.max.0 - ray4_o.0) * ray4_dinv.0) + } else { + ((self.max.0 - ray4_o.0) * ray4_dinv.0, (self.min.0 - ray4_o.0) * ray4_dinv.0) + }; + + let (ylos, yhis) = if ray.dir_inv.y() >= 0.0 { + ((self.min.1 - ray4_o.1) * ray4_dinv.1, (self.max.1 - ray4_o.1) * ray4_dinv.1) + } else { + ((self.max.1 - ray4_o.1) * ray4_dinv.1, (self.min.1 - ray4_o.1) * ray4_dinv.1) + }; + + let (zlos, zhis) = if ray.dir_inv.z() >= 0.0 { + ((self.min.2 - ray4_o.2) * ray4_dinv.2, (self.max.2 - ray4_o.2) * ray4_dinv.2) + } else { + ((self.max.2 - ray4_o.2) * ray4_dinv.2, (self.min.2 - ray4_o.2) * ray4_dinv.2) + }; + + // Get the minimum and maximum hits + let mins = v_max(v_max(xlos, ylos), v_max(zlos, Float4::splat(0.0))); + let maxs = v_max(v_min(v_min(xhis, yhis), zhis), + Float4::splat(std::f32::NEG_INFINITY) * Float4::splat(BBOX_MAXT_ADJUST)); + + // Check for hits + let hits = mins.lt(Float4::splat(ray.max_t)) & mins.lte(maxs); + + return hits; + } +} + + +/// Union of two BBoxes. +impl BitOr for BBox4 { + type Output = BBox4; + + fn bitor(self, rhs: BBox4) -> BBox4 { + BBox4 { + min: (self.min.0.v_min(rhs.min.0), + self.min.1.v_min(rhs.min.1), + self.min.2.v_min(rhs.min.2)), + max: (self.max.0.v_max(rhs.max.0), + self.max.1.v_max(rhs.max.1), + self.max.2.v_max(rhs.max.2)), + } + } +} + +impl BitOrAssign for BBox4 { + fn bitor_assign(&mut self, rhs: BBox4) { + *self = *self | rhs; + } +} + +impl Lerp for BBox4 { + fn lerp(self, other: BBox4, alpha: f32) -> BBox4 { + BBox4 { + min: (lerp(self.min.0, other.min.0, alpha), + lerp(self.min.1, other.min.1, alpha), + lerp(self.min.2, other.min.2, alpha)), + max: (lerp(self.max.0, other.max.0, alpha), + lerp(self.max.1, other.max.1, alpha), + lerp(self.max.2, other.max.2, alpha)), + } + } +} diff --git a/src/float4.rs b/src/float4.rs index de7c1e7..ad4f7da 100644 --- a/src/float4.rs +++ b/src/float4.rs @@ -1,11 +1,12 @@ #![allow(dead_code)] use std::cmp::PartialEq; -use std::ops::{Add, Sub, Mul, Div}; +use std::ops::{Add, Sub, Mul, Div, BitAnd}; #[cfg(feature = "simd_perf")] -use simd::f32x4; +use simd::{f32x4, bool32fx4}; +use lerp::Lerp; /// Essentially a tuple of four floats, which will use SIMD operations /// where possible on a platform. @@ -133,6 +134,62 @@ impl Float4 { }) } + #[cfg(feature = "simd_perf")] + pub fn lt(&self, other: Float4) -> Bool4 { + Bool4 { data: self.data.lt(other.data) } + } + #[cfg(not(feature = "simd_perf"))] + pub fn lt(&self, other: Float4) -> Bool4 { + Bool4 { + data: [self.data[0] < other.data[0], + self.data[1] < other.data[1], + self.data[2] < other.data[2], + self.data[3] < other.data[3]], + } + } + + #[cfg(feature = "simd_perf")] + pub fn lte(&self, other: Float4) -> Bool4 { + Bool4 { data: self.data.lte(other.data) } + } + #[cfg(not(feature = "simd_perf"))] + pub fn lte(&self, other: Float4) -> Bool4 { + Bool4 { + data: [self.data[0] <= other.data[0], + self.data[1] <= other.data[1], + self.data[2] <= other.data[2], + self.data[3] <= other.data[3]], + } + } + + #[cfg(feature = "simd_perf")] + pub fn gt(&self, other: Float4) -> Bool4 { + Bool4 { data: self.data.gt(other.data) } + } + #[cfg(not(feature = "simd_perf"))] + pub fn gt(&self, other: Float4) -> Bool4 { + Bool4 { + data: [self.data[0] > other.data[0], + self.data[1] > other.data[1], + self.data[2] > other.data[2], + self.data[3] > other.data[3]], + } + } + + #[cfg(feature = "simd_perf")] + pub fn gte(&self, other: Float4) -> Bool4 { + Bool4 { data: self.data.gte(other.data) } + } + #[cfg(not(feature = "simd_perf"))] + pub fn gte(&self, other: Float4) -> Bool4 { + Bool4 { + data: [self.data[0] >= other.data[0], + self.data[1] >= other.data[1], + self.data[2] >= other.data[2], + self.data[3] >= other.data[3]], + } + } + /// Set the nth element to the given value. #[inline] pub fn set_n(&mut self, n: usize, v: f32) { @@ -382,6 +439,110 @@ impl Div for Float4 { } } +impl Lerp for Float4 { + fn lerp(self, other: Float4, alpha: f32) -> Float4 { + (self * (1.0 - alpha)) + (other * alpha) + } +} + +#[inline(always)] +pub fn v_min(a: Float4, b: Float4) -> Float4 { + a.v_min(b) +} + +#[inline(always)] +pub fn v_max(a: Float4, b: Float4) -> Float4 { + a.v_max(b) +} + + +/// Essentially a tuple of four bools, which will use SIMD operations +/// where possible on a platform. +#[cfg(feature = "simd_perf")] +#[derive(Debug, Copy, Clone)] +pub struct Bool4 { + data: bool32fx4, +} + +#[cfg(not(feature = "simd_perf"))] +#[derive(Debug, Copy, Clone)] +pub struct Bool4 { + data: [bool; 4], +} + +impl Bool4 { + /// Returns the value of the 0th element. + #[cfg(feature = "simd_perf")] + #[inline(always)] + pub fn get_0(&self) -> bool { + self.data.extract(0) + } + #[cfg(not(feature = "simd_perf"))] + #[inline(always)] + pub fn get_0(&self) -> bool { + unsafe { *self.data.get_unchecked(0) } + } + + /// Returns the value of the 1th element. + #[cfg(feature = "simd_perf")] + #[inline(always)] + pub fn get_1(&self) -> bool { + self.data.extract(1) + } + #[cfg(not(feature = "simd_perf"))] + #[inline(always)] + pub fn get_1(&self) -> bool { + unsafe { *self.data.get_unchecked(1) } + } + + /// Returns the value of the 2th element. + #[cfg(feature = "simd_perf")] + #[inline(always)] + pub fn get_2(&self) -> bool { + self.data.extract(2) + } + #[cfg(not(feature = "simd_perf"))] + #[inline(always)] + pub fn get_2(&self) -> bool { + unsafe { *self.data.get_unchecked(2) } + } + + /// Returns the value of the 3th element. + #[cfg(feature = "simd_perf")] + #[inline(always)] + pub fn get_3(&self) -> bool { + self.data.extract(3) + } + #[cfg(not(feature = "simd_perf"))] + #[inline(always)] + pub fn get_3(&self) -> bool { + unsafe { *self.data.get_unchecked(3) } + } + + pub fn to_bitmask(&self) -> u8 { + (self.get_0() as u8) & ((self.get_1() as u8) << 1) & ((self.get_2() as u8) << 2) & + ((self.get_3() as u8) << 3) + } +} + +impl BitAnd for Bool4 { + type Output = Bool4; + + #[cfg(feature = "simd_perf")] + fn bitand(self, rhs: Bool4) -> Bool4 { + Bool4 { data: self.data & rhs.data } + } + #[cfg(not(feature = "simd_perf"))] + fn bitand(self, rhs: Bool4) -> Bool4 { + Bool4 { + data: [self.data[0] && rhs.data[0], + self.data[1] && rhs.data[1], + self.data[2] && rhs.data[2], + self.data[3] && rhs.data[3]], + } + } +} + #[cfg(test)] mod tests { diff --git a/src/main.rs b/src/main.rs index 14a8a1c..6db9bb1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,6 +21,7 @@ extern crate simd; mod accel; mod algorithm; mod bbox; +mod bbox4; mod bitstack; mod boundable; mod camera;