From 172e2f19ef717d314614811247210fa865c923de Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Fri, 12 May 2017 21:07:40 -0700 Subject: [PATCH] Removed BVH4 and the related bitstack from AccelRay. I couldn't make the BVH4 faster than the BVH, and the bitstack was bloating the AccelRay struct. Removing the bitstack gives a small but noticable speedup in rendering. --- src/accel/bvh4.rs | 487 --------------------------------- src/accel/bvh4_table.inc | 35 --- src/accel/create_bvh4_table.py | 58 ---- src/accel/mod.rs | 2 - src/bbox4.rs | 116 -------- src/bitstack.rs | 303 -------------------- src/main.rs | 6 - src/ray.rs | 3 - 8 files changed, 1010 deletions(-) delete mode 100644 src/accel/bvh4.rs delete mode 100644 src/accel/bvh4_table.inc delete mode 100644 src/accel/create_bvh4_table.py delete mode 100644 src/bbox4.rs delete mode 100644 src/bitstack.rs diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs deleted file mode 100644 index d9fe66a..0000000 --- a/src/accel/bvh4.rs +++ /dev/null @@ -1,487 +0,0 @@ -//! This BVH4 implementation pulls a lot of ideas from the paper -//! "Efficient Ray Tracing Kernels for Modern CPU Architectures" -//! by Fuetterling et al. -//! -//! Specifically, the table-based traversal order approach they -//! propose is largely followed by this implementation. - -#![allow(dead_code)] - -use mem_arena::MemArena; - -use algorithm::{partition, partition_with_side}; -use bbox::BBox; -use bbox4::BBox4; -use boundable::Boundable; -use lerp::lerp_slice; -use ray::AccelRay; -use timer::Timer; - -use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}; -use super::ACCEL_TRAV_TIME; - -// TRAVERSAL_TABLE -include!("bvh4_table.inc"); - -#[derive(Copy, Clone, Debug)] -pub struct BVH4<'a> { - root: Option<&'a BVH4Node<'a>>, - depth: usize, - node_count: usize, - _bounds: Option<&'a [BBox]>, -} - -#[derive(Copy, Clone, Debug)] -pub enum BVH4Node<'a> { - Internal { - bounds: &'a [BBox4], - children: &'a [BVH4Node<'a>], - traversal_code: u8, - }, - - Leaf { object_range: (usize, usize) }, -} - -impl<'a> BVH4<'a> { - pub fn from_objects<'b, T, F>(arena: &'a MemArena, - objects: &mut [T], - objects_per_leaf: usize, - bounder: F) - -> BVH4<'a> - where F: 'b + Fn(&T) -> &'b [BBox] - { - if objects.len() == 0 { - BVH4 { - root: None, - depth: 0, - node_count: 0, - _bounds: None, - } - } else { - let base = BVHBase::from_objects(objects, objects_per_leaf, bounder); - - let mut fill_node = unsafe { arena.alloc_uninitialized_with_alignment::(32) }; - let node_count = BVH4::construct_from_base(arena, - &base, - &base.nodes[base.root_node_index()], - fill_node); - - BVH4 { - root: Some(fill_node), - depth: (base.depth / 2) + 1, - node_count: node_count, - _bounds: { - let range = base.nodes[base.root_node_index()].bounds_range(); - Some(arena.copy_slice(&base.bounds[range.0..range.1])) - }, - } - } - } - - pub fn tree_depth(&self) -> usize { - self.depth - } - - pub fn traverse(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F) - where F: FnMut(&T, &mut [AccelRay]) - { - if self.root.is_none() { - return; - } - - let mut trav_time: f64 = 0.0; - let mut timer = Timer::new(); - - // +2 of max depth for root and last child - let mut node_stack = [self.root; BVH_MAX_DEPTH + 2]; - let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; - let mut stack_ptr = 1; - let mut unpopped = 0; - let mut first_loop = true; - - let ray_code = ((rays[0].dir_inv.x() < 0.0) as u8) | - (((rays[0].dir_inv.y() < 0.0) as u8) << 1) | - (((rays[0].dir_inv.z() < 0.0) as u8) << 2); - - while stack_ptr > 0 { - match node_stack[stack_ptr] { - Some(&BVH4Node::Internal { bounds, children, traversal_code }) => { - let node_order_code = { - TRAVERSAL_TABLE[ray_code as usize][traversal_code as usize] - }; - let noc1 = node_order_code & 3; - let noc2 = (node_order_code >> 2) & 3; - let noc3 = (node_order_code >> 4) & 3; - let noc4 = (node_order_code >> 6) & 3; - - let mut all_hits = 0; - - // Ray testing - let part; - { - // Common code for ray testing below - let mut test_ray = |r: &mut AccelRay| { - let hits = lerp_slice(bounds, r.time) - .intersect_accel_ray(r) - .to_bitmask(); - - all_hits |= hits; - - if hits != 0 { - // Push hit bits onto ray's traversal stack - let shuffled_hits = match children.len() { - 4 => { - ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) | - (((hits >> noc3) & 1) << 2) | - (((hits >> noc4) & 1) << 3) - } - 3 => { - ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) | - (((hits >> noc3) & 1) << 2) - } - 2 => ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1), - _ => unreachable!(), - }; - r.trav_stack.push_n(shuffled_hits, children.len() as u8); - - return true; - } - - return false; - }; - - // Skip some tests if it's the first loop - part = if first_loop { - filter_rays(&ray_i_stack[stack_ptr..], - &mut rays[..ray_i_stack[stack_ptr]], - unpopped, - |r, _| { - if !r.is_done() { - return test_ray(r); - } - return false; - }) - } else { - filter_rays(&ray_i_stack[stack_ptr..], - &mut rays[..ray_i_stack[stack_ptr]], - unpopped, - |r, pop_count| { - if (!r.is_done()) && r.trav_stack.pop_to_nth(pop_count) { - return test_ray(r); - } - return false; - }) - }; - } - unpopped = 0; - - // Update stack based on ray testing results - if part > 0 { - for i in 0..children.len() { - let inv_i = (children.len() - 1) - i; - let child_i = ((node_order_code >> (inv_i * 2)) & 3) as usize; - node_stack[stack_ptr + i] = if ((all_hits >> child_i) & 1) == 0 { - None - } else { - Some(&children[child_i]) - }; - ray_i_stack[stack_ptr + i] = part; - } - - stack_ptr += children.len() - 1; - } else { - stack_ptr -= 1; - } - } - - Some(&BVH4Node::Leaf { object_range }) => { - let part = if !first_loop { - filter_rays(&ray_i_stack[stack_ptr..], - &mut rays[..ray_i_stack[stack_ptr]], - unpopped, - |r, pop_count| { - (!r.is_done()) && r.trav_stack.pop_to_nth(pop_count) - }) - } else { - ray_i_stack[stack_ptr] - }; - - unpopped = 0; - - trav_time += timer.tick() as f64; - - for obj in &objects[object_range.0..object_range.1] { - obj_ray_test(obj, &mut rays[..part]); - } - - timer.tick(); - - stack_ptr -= 1; - } - - None => { - if !first_loop { - unpopped += 1; - - } - stack_ptr -= 1; - } - } - - first_loop = false; - } - - // Pop any unpopped bits of the ray traversal stacks - if unpopped > 0 { - filter_rays(&ray_i_stack[1..], - &mut rays[..ray_i_stack[1]], - unpopped - 1, - |r, pop_count| r.trav_stack.pop_to_nth(pop_count)); - } - - trav_time += timer.tick() as f64; - ACCEL_TRAV_TIME.with(|att| { - let v = att.get(); - att.set(v + trav_time); - }); - } - - fn construct_from_base(arena: &'a MemArena, - base: &BVHBase, - node: &BVHBaseNode, - fill_node: &mut BVH4Node<'a>) - -> usize { - let mut node_count = 0; - - match node { - // Create internal node - &BVHBaseNode::Internal { bounds_range: _, children_indices, split_axis } => { - let child_l = &base.nodes[children_indices.0]; - let child_r = &base.nodes[children_indices.1]; - - // Prepare convenient access to the stuff we need. - let child_count; - let children; // [Optional, Optional, Optional, Optional] - let split_axis_l; // Optional - let split_axis_r; // Optional - match child_l { - &BVHBaseNode::Internal { children_indices: i_l, split_axis: s_l, .. } => { - match child_r { - &BVHBaseNode::Internal { children_indices: i_r, - split_axis: s_r, - .. } => { - // Four nodes - child_count = 4; - children = [Some(&base.nodes[i_l.0]), - Some(&base.nodes[i_l.1]), - Some(&base.nodes[i_r.0]), - Some(&base.nodes[i_r.1])]; - split_axis_l = Some(s_l); - split_axis_r = Some(s_r); - } - &BVHBaseNode::Leaf { .. } => { - // Three nodes with left split - child_count = 3; - children = [Some(&base.nodes[i_l.0]), - Some(&base.nodes[i_l.1]), - Some(child_r), - None]; - split_axis_l = Some(s_l); - split_axis_r = None; - } - } - } - &BVHBaseNode::Leaf { .. } => { - match child_r { - &BVHBaseNode::Internal { children_indices: i_r, - split_axis: s_r, - .. } => { - // Three nodes with right split - child_count = 3; - children = [Some(child_l), - Some(&base.nodes[i_r.0]), - Some(&base.nodes[i_r.1]), - None]; - split_axis_l = None; - split_axis_r = Some(s_r); - } - &BVHBaseNode::Leaf { .. } => { - // Two nodes - child_count = 2; - children = [Some(child_l), Some(child_r), None, None]; - split_axis_l = None; - split_axis_r = None; - } - } - } - } - - node_count += child_count; - - // Construct bounds - let bounds = { - let bounds_len = children.iter() - .map(|c| if let &Some(n) = c { - let len = n.bounds_range().1 - n.bounds_range().0; - debug_assert!(len >= 1); - len - } else { - 0 - }) - .max() - .unwrap(); - debug_assert!(bounds_len >= 1); - let mut bounds = - unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) }; - if bounds_len < 2 { - let b1 = children[0] - .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]); - let b2 = children[1] - .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]); - let b3 = children[2] - .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]); - let b4 = children[3] - .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]); - bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4); - } else { - for (i, b) in bounds.iter_mut().enumerate() { - let time = i as f32 / (bounds_len - 1) as f32; - - let b1 = children[0].map_or(BBox::new(), |c| { - let (x, y) = c.bounds_range(); - lerp_slice(&base.bounds[x..y], time) - }); - let b2 = children[1].map_or(BBox::new(), |c| { - let (x, y) = c.bounds_range(); - lerp_slice(&base.bounds[x..y], time) - }); - let b3 = children[2].map_or(BBox::new(), |c| { - let (x, y) = c.bounds_range(); - lerp_slice(&base.bounds[x..y], time) - }); - let b4 = children[3].map_or(BBox::new(), |c| { - let (x, y) = c.bounds_range(); - lerp_slice(&base.bounds[x..y], time) - }); - *b = BBox4::from_bboxes(b1, b2, b3, b4); - } - } - bounds - }; - - // Construct child nodes - let mut child_nodes = - unsafe { - arena.alloc_array_uninitialized_with_alignment::(child_count, 32) - }; - for (i, c) in children[0..child_count].iter().enumerate() { - node_count += - BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]); - } - - // Build this node - let traversal_code = { - let topology_code = if child_count == 4 { - 0 - } else if child_count == 2 { - 3 - } else if split_axis_l.is_some() { - 1 - } else { - 2 - }; - calc_traversal_code(split_axis, - split_axis_l.unwrap_or(split_axis_r.unwrap_or(0)), - if child_count == 4 { - split_axis_r.unwrap() - } else { - 0 - }, - topology_code) - }; - *fill_node = BVH4Node::Internal { - bounds: bounds, - children: child_nodes, - traversal_code: traversal_code, - }; - } - - // Create internal node - &BVHBaseNode::Leaf { object_range, .. } => { - *fill_node = BVH4Node::Leaf { object_range: object_range }; - node_count += 1; - } - } - - return node_count; - } -} - - -impl<'a> Boundable for BVH4<'a> { - fn bounds<'b>(&'b self) -> &'b [BBox] { - self._bounds.unwrap_or(&[]) - } -} - - -// Calculates the traversal code for a BVH4 node based on the splits and topology -// of its children. -// -// split_1 is the top split. -// -// split_2 is either the left or right split depending on topology, and is only -// relevant for topologies 0-2. For topology 3 it should be 0. -// -// split_3 is always the right split, and is only relevant for topology 0. For -// topologies 1-3 it should be 0. -// -// topology can be 0-3: -// 0: All three splits exist, representing 4 BVH4 children. -// 1: Two splits exist: top split and left split, representing 3 BVH4 children. -// 2: Two splits exist: top split and right split, representing 3 BVH4 children. -// 3: Only the top split exists, representing 2 BVH4 children. -fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u8 { - debug_assert!(!(topology > 0 && split_3 > 0)); - debug_assert!(!(topology > 2 && split_2 > 0)); - - static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9]; - split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize] -} - - -fn filter_rays(ray_i_stack: &[usize], - rays: &mut [AccelRay], - unpopped: usize, - mut ray_test: F) - -> usize - where F: FnMut(&mut AccelRay, usize) -> bool -{ - let part = if ray_i_stack[0] == ray_i_stack[unpopped] { - let pop_count = unpopped + 1; - partition(rays, |r| ray_test(r, pop_count)) - } else { - let mut part_n = [0, rays.len() - 1]; // Where we are in the partition - let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side - - partition_with_side(rays, |r, side| { - let pop_count = if !side { - while part_n[0] >= ray_i_stack[part_pop[0]] { - part_pop[0] -= 1; - } - part_n[0] += 1; - part_pop[0] - } else { - while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped { - part_pop[1] += 1; - } - part_n[1] -= 1; - part_pop[1] - }; - - return ray_test(r, pop_count + 1); - }) - }; - - part -} diff --git a/src/accel/bvh4_table.inc b/src/accel/bvh4_table.inc deleted file mode 100644 index fef7769..0000000 --- a/src/accel/bvh4_table.inc +++ /dev/null @@ -1,35 +0,0 @@ -static TRAVERSAL_TABLE: [[u8; 48]; 8] = [ - [228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, - 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, - 4, 4, 4], - [27, 177, 177, 75, 180, 180, 75, 180, 180, 30, 225, 225, 78, 228, 228, 78, 228, 228, 30, 225, 225, 78, 228, 228, 78, 228, 228, - 6, 33, 33, 18, 36, 36, 18, 36, 36, - 6, 24, 24, 9, 36, 36, 9, 36, 36, - 1, 4, 4], - [228, 78, 228, 225, 30, 225, 228, 78, 228, 180, 75, 180, 177, 27, 177, 180, 75, 180, 228, 78, 228, 225, 30, 225, 228, 78, 228, - 36, 18, 36, 33, 6, 33, 36, 18, 36, - 36, 9, 36, 24, 6, 24, 36, 9, 36, - 4, 1, 4], - [27, 27, 177, 27, 27, 177, 75, 75, 180, 27, 27, 177, 27, 27, 177, 75, 75, 180, 30, 30, 225, 30, 30, 225, 78, 78, 228, - 6, 6, 33, 6, 6, 33, 18, 18, 36, - 6, 6, 24, 6, 6, 24, 9, 9, 36, - 1, 1, 4], - [228, 228, 78, 228, 228, 78, 225, 225, 30, 228, 228, 78, 228, 228, 78, 225, 225, 30, 180, 180, 75, 180, 180, 75, 177, 177, 27, - 36, 36, 18, 36, 36, 18, 33, 33, 6, - 36, 36, 9, 36, 36, 9, 24, 24, 6, - 4, 4, 1], - [27, 177, 27, 75, 180, 75, 27, 177, 27, 30, 225, 30, 78, 228, 78, 30, 225, 30, 27, 177, 27, 75, 180, 75, 27, 177, 27, - 6, 33, 6, 18, 36, 18, 6, 33, 6, - 6, 24, 6, 9, 36, 9, 6, 24, 6, - 1, 4, 1], - [228, 78, 78, 225, 30, 30, 225, 30, 30, 180, 75, 75, 177, 27, 27, 177, 27, 27, 180, 75, 75, 177, 27, 27, 177, 27, 27, - 36, 18, 18, 33, 6, 6, 33, 6, 6, - 36, 9, 9, 24, 6, 6, 24, 6, 6, - 4, 1, 1], - [27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, - 1, 1, 1], -]; - diff --git a/src/accel/create_bvh4_table.py b/src/accel/create_bvh4_table.py deleted file mode 100644 index d9b9410..0000000 --- a/src/accel/create_bvh4_table.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -if __name__ == "__main__": - text = "static TRAVERSAL_TABLE: [[u8; 48]; 8] = [\n" - - for raydir in range(0, 8): - ray = [raydir & 1, (raydir >> 1) & 1, (raydir >> 2) & 1] - - text += " [" - for splits in [[s1, s2, s3] for s3 in range(0,3) for s2 in range(0,3) for s1 in range(0,3)]: - perm = [0, 1, 2, 3] - if ray[splits[1]] == 1: - perm = [perm[1], perm[0]] + perm[2:4] - if ray[splits[2]] == 1: - perm = perm[0:2] + [perm[3], perm[2]] - if ray[splits[0]] == 1: - perm = perm[2:4] + perm[0:2] - perm = perm[0] + (perm[1] << 2) + (perm[2] << 4) + (perm[3] << 6) - text += "%d, " % perm - text = text[:-1] - - text += "\n " - for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]: - perm = [0, 1, 2] - if ray[splits[1]] == 1: - perm = [perm[1], perm[0], perm[2]] - if ray[splits[0]] == 1: - perm = [perm[2], perm[0], perm[1]] - perm = perm[0] + (perm[1] << 2) + (perm[2] << 4) - text += "%d, " % perm - text = text[:-1] - - text += "\n " - for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]: - perm = [0, 1, 2] - if ray[splits[1]] == 1: - perm = [perm[0], perm[2], perm[1]] - if ray[splits[0]] == 1: - perm = [perm[1], perm[2], perm[0]] - perm = perm[0] + (perm[1] << 2) + (perm[2] << 4) - text += "%d, " % perm - text = text[:-1] - - text += "\n " - for split in [s1 for s1 in range(0,3)]: - perm = [0, 1] - if ray[split] == 1: - perm = [perm[1], perm[0]] - perm = perm[0] + (perm[1] << 2) - text += "%d, " % perm - text = text[:-1] - - text = text[:-1] + "],\n" - - text += "];\n" - - print text - diff --git a/src/accel/mod.rs b/src/accel/mod.rs index 0000196..f538668 100644 --- a/src/accel/mod.rs +++ b/src/accel/mod.rs @@ -1,6 +1,5 @@ mod bvh_base; mod bvh; -mod bvh4; mod light_array; mod light_tree; mod objects_split; @@ -11,7 +10,6 @@ use math::{Vector, Point, Normal}; use shading::surface_closure::SurfaceClosure; pub use self::bvh::{BVH, BVHNode}; -pub use self::bvh4::{BVH4, BVH4Node}; pub use self::light_tree::LightTree; // Track BVH traversal time diff --git a/src/bbox4.rs b/src/bbox4.rs deleted file mode 100644 index 15fa59d..0000000 --- a/src/bbox4.rs +++ /dev/null @@ -1,116 +0,0 @@ -#![allow(dead_code)] - -use std; -use std::ops::{BitOr, BitOrAssign}; - -use bbox::BBox; -use float4::{Float4, Bool4, v_min, v_max}; -use lerp::{lerp, Lerp}; -use ray::AccelRay; - - -const BBOX_MAXT_ADJUST: f32 = 1.00000024; - -/// A SIMD set of 4 3D axis-aligned bounding boxes. -#[derive(Debug, Copy, Clone)] -pub struct BBox4 { - pub x: (Float4, Float4), // (min, max) - pub y: (Float4, Float4), // (min, max) - pub z: (Float4, Float4), // (min, max) -} - -impl BBox4 { - /// Creates a degenerate BBox with +infinity min and -infinity max. - pub fn new() -> BBox4 { - BBox4 { - x: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)), - y: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)), - z: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)), - } - } - - /// Creates a BBox with min as the minimum extent and max as the maximum - /// extent. - pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 { - BBox4 { - x: (Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), - Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x())), - y: (Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), - Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y())), - z: (Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()), - Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z())), - } - } - - // Returns whether the given ray intersects with the bboxes. - pub fn intersect_accel_ray(&self, ray: &AccelRay) -> Bool4 { - // Precalculate ray direction sign booleans. - // Doing it up here slightly speeds things up lower down. - let ray_pos = (ray.dir_inv.x() >= 0.0, ray.dir_inv.y() >= 0.0, ray.dir_inv.z() >= 0.0); - - // Convert ray to SIMD form - let ray4_o = - (Float4::splat(ray.orig.x()), Float4::splat(ray.orig.y()), Float4::splat(ray.orig.z())); - let ray4_dinv = (Float4::splat(ray.dir_inv.x()), - Float4::splat(ray.dir_inv.y()), - Float4::splat(ray.dir_inv.z())); - - // Calculate the plane intersections - let (xlos, xhis) = if ray_pos.0 { - ((self.x.0 - ray4_o.0) * ray4_dinv.0, (self.x.1 - ray4_o.0) * ray4_dinv.0) - } else { - ((self.x.1 - ray4_o.0) * ray4_dinv.0, (self.x.0 - ray4_o.0) * ray4_dinv.0) - }; - let (ylos, yhis) = if ray_pos.1 { - ((self.y.0 - ray4_o.1) * ray4_dinv.1, (self.y.1 - ray4_o.1) * ray4_dinv.1) - } else { - ((self.y.1 - ray4_o.1) * ray4_dinv.1, (self.y.0 - ray4_o.1) * ray4_dinv.1) - }; - let (zlos, zhis) = if ray_pos.2 { - ((self.z.0 - ray4_o.2) * ray4_dinv.2, (self.z.1 - ray4_o.2) * ray4_dinv.2) - } else { - ((self.z.1 - ray4_o.2) * ray4_dinv.2, (self.z.0 - ray4_o.2) * ray4_dinv.2) - }; - - // Get the minimum and maximum hits - let mins = v_max(v_max(xlos, ylos), v_max(zlos, Float4::splat(0.0))); - let maxs = v_max(v_min(v_min(xhis, yhis), zhis), - Float4::splat(std::f32::NEG_INFINITY)) * - Float4::splat(BBOX_MAXT_ADJUST); - - // Check for hits - let hits = mins.lt(Float4::splat(ray.max_t)) & mins.lte(maxs); - - return hits; - } -} - - -/// Union of two BBoxes. -impl BitOr for BBox4 { - type Output = BBox4; - - fn bitor(self, rhs: BBox4) -> BBox4 { - BBox4 { - x: (self.x.0.v_min(rhs.x.0), self.x.1.v_max(rhs.x.1)), - y: (self.y.0.v_min(rhs.y.0), self.y.1.v_max(rhs.y.1)), - z: (self.z.0.v_min(rhs.z.0), self.z.1.v_max(rhs.z.1)), - } - } -} - -impl BitOrAssign for BBox4 { - fn bitor_assign(&mut self, rhs: BBox4) { - *self = *self | rhs; - } -} - -impl Lerp for BBox4 { - fn lerp(self, other: BBox4, alpha: f32) -> BBox4 { - BBox4 { - x: (lerp(self.x.0, other.x.0, alpha), lerp(self.x.1, other.x.1, alpha)), - y: (lerp(self.y.0, other.y.0, alpha), lerp(self.y.1, other.y.1, alpha)), - z: (lerp(self.z.0, other.z.0, alpha), lerp(self.z.1, other.z.1, alpha)), - } - } -} diff --git a/src/bitstack.rs b/src/bitstack.rs deleted file mode 100644 index af46ace..0000000 --- a/src/bitstack.rs +++ /dev/null @@ -1,303 +0,0 @@ -#![allow(dead_code)] - -use std::mem::size_of; - -#[derive(Copy, Clone, Debug)] -pub struct BitStack128 { - data: (u64, u64), -} - -impl BitStack128 { - pub fn new() -> BitStack128 { - BitStack128 { data: (0, 0) } - } - - pub fn new_with_1() -> BitStack128 { - BitStack128 { data: (1, 0) } - } - - /// Push a bit onto the top of the stack. - pub fn push(&mut self, value: bool) { - // Verify no stack overflow - debug_assert!((self.data.1 >> ((size_of::() * 8) - 1)) == 0); - - self.data.1 = (self.data.1 << 1) | (self.data.0 >> ((size_of::() * 8) - 1)); - self.data.0 <<= 1; - self.data.0 |= value as u64; - } - - /// Push n bits onto the top of the stack. The input - /// bits are passed as an integer, with the bit that - /// will be on top in the least significant digit, and - /// the rest following in order from there. - /// - /// Note that unless you are running a debug build, no - /// effort is made to verify that only the first n - /// bits of the passed value are used. So if other - /// bits are non-zero this will produce incorrect results. - pub fn push_n(&mut self, value: u8, count: u8) { - // Verify no bitstack overflow - debug_assert!((self.data.1 >> ((size_of::() * 8) - count as usize)) == 0); - // Verify no bits outside of the n-bit range - debug_assert!(if count < (size_of::() * 8) as u8 { - value & (!((1 << count) - 1)) == 0 - } else { - true - }); - debug_assert!(count <= (size_of::() * 8) as u8); - - self.data.1 = (self.data.1 << count as usize) | - (self.data.0 >> ((size_of::() * 8) - count as usize)); - self.data.0 <<= count as u64; - self.data.0 |= value as u64; - } - - /// Pop the top bit off the stack. - pub fn pop(&mut self) -> bool { - let b = (self.data.0 & 1) != 0; - self.data.0 = (self.data.0 >> 1) | (self.data.1 << ((size_of::() * 8) - 1)); - self.data.1 >>= 1; - return b; - } - - /// Pop the top n bits off the stack. The bits are returned as - /// an integer, with the top bit in the least significant digit, - /// and the rest following in order from there. - pub fn pop_n(&mut self, n: usize) -> u64 { - debug_assert!(n < (size_of::() * 8)); // Can't pop more than we have - debug_assert!(n < (size_of::() * 8)); // Can't pop more than the return type can hold - let b = self.data.0 & ((1 << n) - 1); - self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::() * 8) - n)); - self.data.1 >>= n; - return b; - } - - /// Pop the top n bits off the stack, but return only the nth bit. - pub fn pop_to_nth(&mut self, n: usize) -> bool { - debug_assert!(n > 0); - debug_assert!(n < (size_of::() * 8)); // Can't pop more than we have - debug_assert!(n < (size_of::() * 8)); // Can't pop more than the return type can hold - let b = (self.data.0 & (1 << (n - 1))) != 0; - self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::() * 8) - n)); - self.data.1 >>= n; - return b; - } - - /// Read the top bit of the stack without popping it. - pub fn peek(&self) -> bool { - (self.data.0 & 1) != 0 - } - - /// Read the top n bits of the stack without popping them. The bits - /// are returned as an integer, with the top bit in the least - /// significant digit, and the rest following in order from there. - pub fn peek_n(&self, n: usize) -> u64 { - // Can't return more than we have - debug_assert!(n < (size_of::() * 8)); - // Can't return more than the return type can hold - debug_assert!(n < (size_of::() * 8)); - - self.data.0 & ((1 << n) - 1) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn push() { - let mut bs = BitStack128::new(); - bs.push(true); - bs.push(false); - bs.push(true); - bs.push(true); - bs.push(false); - bs.push(true); - bs.push(true); - bs.push(true); - - assert!(bs.data.0 == 0b10110111); - assert!(bs.data.1 == 0); - } - - #[test] - fn push_overflow() { - let mut bs = BitStack128::new(); - for _ in 0..9 { - bs.push(true); - bs.push(false); - bs.push(true); - bs.push(true); - bs.push(false); - bs.push(true); - bs.push(true); - bs.push(true); - } - - assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111); - assert!(bs.data.1 == 0b10110111); - } - - #[test] - fn pop() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b10110111; - - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == false); - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == false); - assert!(bs.pop() == true); - } - - #[test] - fn pop_overflow() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111; - bs.data.1 = 0b10110111; - for _ in 0..9 { - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == false); - assert!(bs.pop() == true); - assert!(bs.pop() == true); - assert!(bs.pop() == false); - assert!(bs.pop() == true); - } - } - - #[test] - fn push_n() { - let mut bs = BitStack128::new(); - bs.push_n(0b10110, 5); - bs.push_n(0b10110111, 8); - - assert!(bs.data.0 == 0b1011010110111); - } - - #[test] - fn push_n_overflow() { - let mut bs = BitStack128::new(); - for _ in 0..9 { - bs.push_n(0b10110111, 8); - } - - assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111); - assert!(bs.data.1 == 0b10110111); - } - - #[test] - fn pop_n() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b0010_1000_1100_1110_0101_0111; - - assert!(bs.pop_n(4) == 0b0111); - assert!(bs.data.0 == 0b0010_1000_1100_1110_0101); - - assert!(bs.pop_n(4) == 0b0101); - assert!(bs.data.0 == 0b0010_1000_1100_1110); - - assert!(bs.pop_n(4) == 0b1110); - assert!(bs.data.0 == 0b0010_1000_1100); - - assert!(bs.pop_n(4) == 0b1100); - assert!(bs.data.0 == 0b0010_1000); - - assert!(bs.pop_n(4) == 0b1000); - assert!(bs.data.0 == 0b0010); - - assert!(bs.pop_n(4) == 0b0010); - assert!(bs.data.0 == 0); - } - - #[test] - fn pop_n_overflow() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111; - bs.data.1 = 0b10110111; - for _ in 0..9 { - assert!(bs.pop_n(8) == 0b10110111); - } - } - - #[test] - fn pop_to_nth() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b0010_1000_1100_1110_0101_0111; - - assert!(bs.pop_to_nth(4) == false); - assert!(bs.data.0 == 0b0010_1000_1100_1110_0101); - - assert!(bs.pop_to_nth(4) == false); - assert!(bs.data.0 == 0b0010_1000_1100_1110); - - assert!(bs.pop_to_nth(4) == true); - assert!(bs.data.0 == 0b0010_1000_1100); - - assert!(bs.pop_to_nth(4) == true); - assert!(bs.data.0 == 0b0010_1000); - - assert!(bs.pop_to_nth(4) == true); - assert!(bs.data.0 == 0b0010); - - assert!(bs.pop_to_nth(4) == false); - assert!(bs.data.0 == 0); - } - - #[test] - fn pop_to_nth_overflow() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b00110111_10110111_00110111_10110111_00110111_10110111_00110111_10110111; - bs.data.1 = 0b00110111_10110111; - for _ in 0..5 { - assert!(bs.pop_to_nth(8) == true); - assert!(bs.pop_to_nth(8) == false); - } - } - - #[test] - fn peek() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b10110111; - - assert!(bs.peek() == true); - bs.pop(); - - assert!(bs.peek() == true); - bs.pop(); - - assert!(bs.peek() == true); - bs.pop(); - - assert!(bs.peek() == false); - bs.pop(); - - assert!(bs.peek() == true); - bs.pop(); - - assert!(bs.peek() == true); - bs.pop(); - - assert!(bs.peek() == false); - bs.pop(); - - assert!(bs.peek() == true); - } - - #[test] - fn peek_n() { - let mut bs = BitStack128::new(); - bs.data.0 = 0b10110111; - - assert!(bs.peek_n(4) == 0b0111); - bs.pop_n(4); - - assert!(bs.peek_n(4) == 0b1011); - bs.pop_n(4); - } -} diff --git a/src/main.rs b/src/main.rs index 24d3377..af81b6b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,8 +23,6 @@ extern crate lazy_static; mod accel; mod algorithm; mod bbox; -mod bbox4; -mod bitstack; mod boundable; mod camera; mod color; @@ -61,9 +59,7 @@ use ray::{Ray, AccelRay}; use surface::SurfaceIntersection; use renderer::LightPath; use bbox::BBox; -use bbox4::BBox4; use accel::BVHNode; -use accel::BVH4Node; use timer::Timer; @@ -130,9 +126,7 @@ fn main() { mem::size_of::()); println!("LightPath size: {} bytes", mem::size_of::()); println!("BBox size: {} bytes", mem::size_of::()); - println!("BBox4 size: {} bytes", mem::size_of::()); println!("BVHNode size: {} bytes", mem::size_of::()); - println!("BVH4Node size: {} bytes", mem::size_of::()); return; } diff --git a/src/ray.rs b/src/ray.rs index 3980a78..d75e1d6 100644 --- a/src/ray.rs +++ b/src/ray.rs @@ -2,7 +2,6 @@ use std; -use bitstack::BitStack128; use float4::Float4; use math::{Vector, Point, Matrix4x4}; @@ -59,7 +58,6 @@ pub struct AccelRay { pub time: f32, pub flags: u32, pub id: u32, - pub trav_stack: BitStack128, } impl AccelRay { @@ -71,7 +69,6 @@ impl AccelRay { time: ray.time, flags: ray.flags, id: id, - trav_stack: BitStack128::new_with_1(), } }