Removed BVH4 and the related bitstack from AccelRay.

I couldn't make the BVH4 faster than the BVH, and the bitstack
was bloating the AccelRay struct.  Removing the bitstack gives
a small but noticable speedup in rendering.
This commit is contained in:
Nathan Vegdahl 2017-05-12 21:07:40 -07:00
parent 88578b9eae
commit 172e2f19ef
8 changed files with 0 additions and 1010 deletions

View File

@ -1,487 +0,0 @@
//! This BVH4 implementation pulls a lot of ideas from the paper
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
//! by Fuetterling et al.
//!
//! Specifically, the table-based traversal order approach they
//! propose is largely followed by this implementation.
#![allow(dead_code)]
use mem_arena::MemArena;
use algorithm::{partition, partition_with_side};
use bbox::BBox;
use bbox4::BBox4;
use boundable::Boundable;
use lerp::lerp_slice;
use ray::AccelRay;
use timer::Timer;
use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
use super::ACCEL_TRAV_TIME;
// TRAVERSAL_TABLE
include!("bvh4_table.inc");
#[derive(Copy, Clone, Debug)]
pub struct BVH4<'a> {
root: Option<&'a BVH4Node<'a>>,
depth: usize,
node_count: usize,
_bounds: Option<&'a [BBox]>,
}
#[derive(Copy, Clone, Debug)]
pub enum BVH4Node<'a> {
Internal {
bounds: &'a [BBox4],
children: &'a [BVH4Node<'a>],
traversal_code: u8,
},
Leaf { object_range: (usize, usize) },
}
impl<'a> BVH4<'a> {
pub fn from_objects<'b, T, F>(arena: &'a MemArena,
objects: &mut [T],
objects_per_leaf: usize,
bounder: F)
-> BVH4<'a>
where F: 'b + Fn(&T) -> &'b [BBox]
{
if objects.len() == 0 {
BVH4 {
root: None,
depth: 0,
node_count: 0,
_bounds: None,
}
} else {
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
let mut fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
let node_count = BVH4::construct_from_base(arena,
&base,
&base.nodes[base.root_node_index()],
fill_node);
BVH4 {
root: Some(fill_node),
depth: (base.depth / 2) + 1,
node_count: node_count,
_bounds: {
let range = base.nodes[base.root_node_index()].bounds_range();
Some(arena.copy_slice(&base.bounds[range.0..range.1]))
},
}
}
}
pub fn tree_depth(&self) -> usize {
self.depth
}
pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
where F: FnMut(&T, &mut [AccelRay])
{
if self.root.is_none() {
return;
}
let mut trav_time: f64 = 0.0;
let mut timer = Timer::new();
// +2 of max depth for root and last child
let mut node_stack = [self.root; BVH_MAX_DEPTH + 2];
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
let mut stack_ptr = 1;
let mut unpopped = 0;
let mut first_loop = true;
let ray_code = ((rays[0].dir_inv.x() < 0.0) as u8) |
(((rays[0].dir_inv.y() < 0.0) as u8) << 1) |
(((rays[0].dir_inv.z() < 0.0) as u8) << 2);
while stack_ptr > 0 {
match node_stack[stack_ptr] {
Some(&BVH4Node::Internal { bounds, children, traversal_code }) => {
let node_order_code = {
TRAVERSAL_TABLE[ray_code as usize][traversal_code as usize]
};
let noc1 = node_order_code & 3;
let noc2 = (node_order_code >> 2) & 3;
let noc3 = (node_order_code >> 4) & 3;
let noc4 = (node_order_code >> 6) & 3;
let mut all_hits = 0;
// Ray testing
let part;
{
// Common code for ray testing below
let mut test_ray = |r: &mut AccelRay| {
let hits = lerp_slice(bounds, r.time)
.intersect_accel_ray(r)
.to_bitmask();
all_hits |= hits;
if hits != 0 {
// Push hit bits onto ray's traversal stack
let shuffled_hits = match children.len() {
4 => {
((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) |
(((hits >> noc3) & 1) << 2) |
(((hits >> noc4) & 1) << 3)
}
3 => {
((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) |
(((hits >> noc3) & 1) << 2)
}
2 => ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1),
_ => unreachable!(),
};
r.trav_stack.push_n(shuffled_hits, children.len() as u8);
return true;
}
return false;
};
// Skip some tests if it's the first loop
part = if first_loop {
filter_rays(&ray_i_stack[stack_ptr..],
&mut rays[..ray_i_stack[stack_ptr]],
unpopped,
|r, _| {
if !r.is_done() {
return test_ray(r);
}
return false;
})
} else {
filter_rays(&ray_i_stack[stack_ptr..],
&mut rays[..ray_i_stack[stack_ptr]],
unpopped,
|r, pop_count| {
if (!r.is_done()) && r.trav_stack.pop_to_nth(pop_count) {
return test_ray(r);
}
return false;
})
};
}
unpopped = 0;
// Update stack based on ray testing results
if part > 0 {
for i in 0..children.len() {
let inv_i = (children.len() - 1) - i;
let child_i = ((node_order_code >> (inv_i * 2)) & 3) as usize;
node_stack[stack_ptr + i] = if ((all_hits >> child_i) & 1) == 0 {
None
} else {
Some(&children[child_i])
};
ray_i_stack[stack_ptr + i] = part;
}
stack_ptr += children.len() - 1;
} else {
stack_ptr -= 1;
}
}
Some(&BVH4Node::Leaf { object_range }) => {
let part = if !first_loop {
filter_rays(&ray_i_stack[stack_ptr..],
&mut rays[..ray_i_stack[stack_ptr]],
unpopped,
|r, pop_count| {
(!r.is_done()) && r.trav_stack.pop_to_nth(pop_count)
})
} else {
ray_i_stack[stack_ptr]
};
unpopped = 0;
trav_time += timer.tick() as f64;
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, &mut rays[..part]);
}
timer.tick();
stack_ptr -= 1;
}
None => {
if !first_loop {
unpopped += 1;
}
stack_ptr -= 1;
}
}
first_loop = false;
}
// Pop any unpopped bits of the ray traversal stacks
if unpopped > 0 {
filter_rays(&ray_i_stack[1..],
&mut rays[..ray_i_stack[1]],
unpopped - 1,
|r, pop_count| r.trav_stack.pop_to_nth(pop_count));
}
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
}
fn construct_from_base(arena: &'a MemArena,
base: &BVHBase,
node: &BVHBaseNode,
fill_node: &mut BVH4Node<'a>)
-> usize {
let mut node_count = 0;
match node {
// Create internal node
&BVHBaseNode::Internal { bounds_range: _, children_indices, split_axis } => {
let child_l = &base.nodes[children_indices.0];
let child_r = &base.nodes[children_indices.1];
// Prepare convenient access to the stuff we need.
let child_count;
let children; // [Optional, Optional, Optional, Optional]
let split_axis_l; // Optional
let split_axis_r; // Optional
match child_l {
&BVHBaseNode::Internal { children_indices: i_l, split_axis: s_l, .. } => {
match child_r {
&BVHBaseNode::Internal { children_indices: i_r,
split_axis: s_r,
.. } => {
// Four nodes
child_count = 4;
children = [Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1])];
split_axis_l = Some(s_l);
split_axis_r = Some(s_r);
}
&BVHBaseNode::Leaf { .. } => {
// Three nodes with left split
child_count = 3;
children = [Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(child_r),
None];
split_axis_l = Some(s_l);
split_axis_r = None;
}
}
}
&BVHBaseNode::Leaf { .. } => {
match child_r {
&BVHBaseNode::Internal { children_indices: i_r,
split_axis: s_r,
.. } => {
// Three nodes with right split
child_count = 3;
children = [Some(child_l),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1]),
None];
split_axis_l = None;
split_axis_r = Some(s_r);
}
&BVHBaseNode::Leaf { .. } => {
// Two nodes
child_count = 2;
children = [Some(child_l), Some(child_r), None, None];
split_axis_l = None;
split_axis_r = None;
}
}
}
}
node_count += child_count;
// Construct bounds
let bounds = {
let bounds_len = children.iter()
.map(|c| if let &Some(n) = c {
let len = n.bounds_range().1 - n.bounds_range().0;
debug_assert!(len >= 1);
len
} else {
0
})
.max()
.unwrap();
debug_assert!(bounds_len >= 1);
let mut bounds =
unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
if bounds_len < 2 {
let b1 = children[0]
.map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b2 = children[1]
.map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b3 = children[2]
.map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b4 = children[3]
.map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
} else {
for (i, b) in bounds.iter_mut().enumerate() {
let time = i as f32 / (bounds_len - 1) as f32;
let b1 = children[0].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b2 = children[1].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b3 = children[2].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b4 = children[3].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
*b = BBox4::from_bboxes(b1, b2, b3, b4);
}
}
bounds
};
// Construct child nodes
let mut child_nodes =
unsafe {
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
};
for (i, c) in children[0..child_count].iter().enumerate() {
node_count +=
BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
}
// Build this node
let traversal_code = {
let topology_code = if child_count == 4 {
0
} else if child_count == 2 {
3
} else if split_axis_l.is_some() {
1
} else {
2
};
calc_traversal_code(split_axis,
split_axis_l.unwrap_or(split_axis_r.unwrap_or(0)),
if child_count == 4 {
split_axis_r.unwrap()
} else {
0
},
topology_code)
};
*fill_node = BVH4Node::Internal {
bounds: bounds,
children: child_nodes,
traversal_code: traversal_code,
};
}
// Create internal node
&BVHBaseNode::Leaf { object_range, .. } => {
*fill_node = BVH4Node::Leaf { object_range: object_range };
node_count += 1;
}
}
return node_count;
}
}
impl<'a> Boundable for BVH4<'a> {
fn bounds<'b>(&'b self) -> &'b [BBox] {
self._bounds.unwrap_or(&[])
}
}
// Calculates the traversal code for a BVH4 node based on the splits and topology
// of its children.
//
// split_1 is the top split.
//
// split_2 is either the left or right split depending on topology, and is only
// relevant for topologies 0-2. For topology 3 it should be 0.
//
// split_3 is always the right split, and is only relevant for topology 0. For
// topologies 1-3 it should be 0.
//
// topology can be 0-3:
// 0: All three splits exist, representing 4 BVH4 children.
// 1: Two splits exist: top split and left split, representing 3 BVH4 children.
// 2: Two splits exist: top split and right split, representing 3 BVH4 children.
// 3: Only the top split exists, representing 2 BVH4 children.
fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u8 {
debug_assert!(!(topology > 0 && split_3 > 0));
debug_assert!(!(topology > 2 && split_2 > 0));
static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9];
split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize]
}
fn filter_rays<F>(ray_i_stack: &[usize],
rays: &mut [AccelRay],
unpopped: usize,
mut ray_test: F)
-> usize
where F: FnMut(&mut AccelRay, usize) -> bool
{
let part = if ray_i_stack[0] == ray_i_stack[unpopped] {
let pop_count = unpopped + 1;
partition(rays, |r| ray_test(r, pop_count))
} else {
let mut part_n = [0, rays.len() - 1]; // Where we are in the partition
let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side
partition_with_side(rays, |r, side| {
let pop_count = if !side {
while part_n[0] >= ray_i_stack[part_pop[0]] {
part_pop[0] -= 1;
}
part_n[0] += 1;
part_pop[0]
} else {
while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped {
part_pop[1] += 1;
}
part_n[1] -= 1;
part_pop[1]
};
return ray_test(r, pop_count + 1);
})
};
part
}

View File

@ -1,35 +0,0 @@
static TRAVERSAL_TABLE: [[u8; 48]; 8] = [
[228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228,
36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36,
4, 4, 4],
[27, 177, 177, 75, 180, 180, 75, 180, 180, 30, 225, 225, 78, 228, 228, 78, 228, 228, 30, 225, 225, 78, 228, 228, 78, 228, 228,
6, 33, 33, 18, 36, 36, 18, 36, 36,
6, 24, 24, 9, 36, 36, 9, 36, 36,
1, 4, 4],
[228, 78, 228, 225, 30, 225, 228, 78, 228, 180, 75, 180, 177, 27, 177, 180, 75, 180, 228, 78, 228, 225, 30, 225, 228, 78, 228,
36, 18, 36, 33, 6, 33, 36, 18, 36,
36, 9, 36, 24, 6, 24, 36, 9, 36,
4, 1, 4],
[27, 27, 177, 27, 27, 177, 75, 75, 180, 27, 27, 177, 27, 27, 177, 75, 75, 180, 30, 30, 225, 30, 30, 225, 78, 78, 228,
6, 6, 33, 6, 6, 33, 18, 18, 36,
6, 6, 24, 6, 6, 24, 9, 9, 36,
1, 1, 4],
[228, 228, 78, 228, 228, 78, 225, 225, 30, 228, 228, 78, 228, 228, 78, 225, 225, 30, 180, 180, 75, 180, 180, 75, 177, 177, 27,
36, 36, 18, 36, 36, 18, 33, 33, 6,
36, 36, 9, 36, 36, 9, 24, 24, 6,
4, 4, 1],
[27, 177, 27, 75, 180, 75, 27, 177, 27, 30, 225, 30, 78, 228, 78, 30, 225, 30, 27, 177, 27, 75, 180, 75, 27, 177, 27,
6, 33, 6, 18, 36, 18, 6, 33, 6,
6, 24, 6, 9, 36, 9, 6, 24, 6,
1, 4, 1],
[228, 78, 78, 225, 30, 30, 225, 30, 30, 180, 75, 75, 177, 27, 27, 177, 27, 27, 180, 75, 75, 177, 27, 27, 177, 27, 27,
36, 18, 18, 33, 6, 6, 33, 6, 6,
36, 9, 9, 24, 6, 6, 24, 6, 6,
4, 1, 1],
[27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6,
1, 1, 1],
];

View File

@ -1,58 +0,0 @@
#!/usr/bin/env python
if __name__ == "__main__":
text = "static TRAVERSAL_TABLE: [[u8; 48]; 8] = [\n"
for raydir in range(0, 8):
ray = [raydir & 1, (raydir >> 1) & 1, (raydir >> 2) & 1]
text += " ["
for splits in [[s1, s2, s3] for s3 in range(0,3) for s2 in range(0,3) for s1 in range(0,3)]:
perm = [0, 1, 2, 3]
if ray[splits[1]] == 1:
perm = [perm[1], perm[0]] + perm[2:4]
if ray[splits[2]] == 1:
perm = perm[0:2] + [perm[3], perm[2]]
if ray[splits[0]] == 1:
perm = perm[2:4] + perm[0:2]
perm = perm[0] + (perm[1] << 2) + (perm[2] << 4) + (perm[3] << 6)
text += "%d, " % perm
text = text[:-1]
text += "\n "
for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]:
perm = [0, 1, 2]
if ray[splits[1]] == 1:
perm = [perm[1], perm[0], perm[2]]
if ray[splits[0]] == 1:
perm = [perm[2], perm[0], perm[1]]
perm = perm[0] + (perm[1] << 2) + (perm[2] << 4)
text += "%d, " % perm
text = text[:-1]
text += "\n "
for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]:
perm = [0, 1, 2]
if ray[splits[1]] == 1:
perm = [perm[0], perm[2], perm[1]]
if ray[splits[0]] == 1:
perm = [perm[1], perm[2], perm[0]]
perm = perm[0] + (perm[1] << 2) + (perm[2] << 4)
text += "%d, " % perm
text = text[:-1]
text += "\n "
for split in [s1 for s1 in range(0,3)]:
perm = [0, 1]
if ray[split] == 1:
perm = [perm[1], perm[0]]
perm = perm[0] + (perm[1] << 2)
text += "%d, " % perm
text = text[:-1]
text = text[:-1] + "],\n"
text += "];\n"
print text

View File

@ -1,6 +1,5 @@
mod bvh_base; mod bvh_base;
mod bvh; mod bvh;
mod bvh4;
mod light_array; mod light_array;
mod light_tree; mod light_tree;
mod objects_split; mod objects_split;
@ -11,7 +10,6 @@ use math::{Vector, Point, Normal};
use shading::surface_closure::SurfaceClosure; use shading::surface_closure::SurfaceClosure;
pub use self::bvh::{BVH, BVHNode}; pub use self::bvh::{BVH, BVHNode};
pub use self::bvh4::{BVH4, BVH4Node};
pub use self::light_tree::LightTree; pub use self::light_tree::LightTree;
// Track BVH traversal time // Track BVH traversal time

View File

@ -1,116 +0,0 @@
#![allow(dead_code)]
use std;
use std::ops::{BitOr, BitOrAssign};
use bbox::BBox;
use float4::{Float4, Bool4, v_min, v_max};
use lerp::{lerp, Lerp};
use ray::AccelRay;
const BBOX_MAXT_ADJUST: f32 = 1.00000024;
/// A SIMD set of 4 3D axis-aligned bounding boxes.
#[derive(Debug, Copy, Clone)]
pub struct BBox4 {
pub x: (Float4, Float4), // (min, max)
pub y: (Float4, Float4), // (min, max)
pub z: (Float4, Float4), // (min, max)
}
impl BBox4 {
/// Creates a degenerate BBox with +infinity min and -infinity max.
pub fn new() -> BBox4 {
BBox4 {
x: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
y: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
z: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
}
}
/// Creates a BBox with min as the minimum extent and max as the maximum
/// extent.
pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 {
BBox4 {
x: (Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x())),
y: (Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y())),
z: (Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z())),
}
}
// Returns whether the given ray intersects with the bboxes.
pub fn intersect_accel_ray(&self, ray: &AccelRay) -> Bool4 {
// Precalculate ray direction sign booleans.
// Doing it up here slightly speeds things up lower down.
let ray_pos = (ray.dir_inv.x() >= 0.0, ray.dir_inv.y() >= 0.0, ray.dir_inv.z() >= 0.0);
// Convert ray to SIMD form
let ray4_o =
(Float4::splat(ray.orig.x()), Float4::splat(ray.orig.y()), Float4::splat(ray.orig.z()));
let ray4_dinv = (Float4::splat(ray.dir_inv.x()),
Float4::splat(ray.dir_inv.y()),
Float4::splat(ray.dir_inv.z()));
// Calculate the plane intersections
let (xlos, xhis) = if ray_pos.0 {
((self.x.0 - ray4_o.0) * ray4_dinv.0, (self.x.1 - ray4_o.0) * ray4_dinv.0)
} else {
((self.x.1 - ray4_o.0) * ray4_dinv.0, (self.x.0 - ray4_o.0) * ray4_dinv.0)
};
let (ylos, yhis) = if ray_pos.1 {
((self.y.0 - ray4_o.1) * ray4_dinv.1, (self.y.1 - ray4_o.1) * ray4_dinv.1)
} else {
((self.y.1 - ray4_o.1) * ray4_dinv.1, (self.y.0 - ray4_o.1) * ray4_dinv.1)
};
let (zlos, zhis) = if ray_pos.2 {
((self.z.0 - ray4_o.2) * ray4_dinv.2, (self.z.1 - ray4_o.2) * ray4_dinv.2)
} else {
((self.z.1 - ray4_o.2) * ray4_dinv.2, (self.z.0 - ray4_o.2) * ray4_dinv.2)
};
// Get the minimum and maximum hits
let mins = v_max(v_max(xlos, ylos), v_max(zlos, Float4::splat(0.0)));
let maxs = v_max(v_min(v_min(xhis, yhis), zhis),
Float4::splat(std::f32::NEG_INFINITY)) *
Float4::splat(BBOX_MAXT_ADJUST);
// Check for hits
let hits = mins.lt(Float4::splat(ray.max_t)) & mins.lte(maxs);
return hits;
}
}
/// Union of two BBoxes.
impl BitOr for BBox4 {
type Output = BBox4;
fn bitor(self, rhs: BBox4) -> BBox4 {
BBox4 {
x: (self.x.0.v_min(rhs.x.0), self.x.1.v_max(rhs.x.1)),
y: (self.y.0.v_min(rhs.y.0), self.y.1.v_max(rhs.y.1)),
z: (self.z.0.v_min(rhs.z.0), self.z.1.v_max(rhs.z.1)),
}
}
}
impl BitOrAssign for BBox4 {
fn bitor_assign(&mut self, rhs: BBox4) {
*self = *self | rhs;
}
}
impl Lerp for BBox4 {
fn lerp(self, other: BBox4, alpha: f32) -> BBox4 {
BBox4 {
x: (lerp(self.x.0, other.x.0, alpha), lerp(self.x.1, other.x.1, alpha)),
y: (lerp(self.y.0, other.y.0, alpha), lerp(self.y.1, other.y.1, alpha)),
z: (lerp(self.z.0, other.z.0, alpha), lerp(self.z.1, other.z.1, alpha)),
}
}
}

View File

@ -1,303 +0,0 @@
#![allow(dead_code)]
use std::mem::size_of;
#[derive(Copy, Clone, Debug)]
pub struct BitStack128 {
data: (u64, u64),
}
impl BitStack128 {
pub fn new() -> BitStack128 {
BitStack128 { data: (0, 0) }
}
pub fn new_with_1() -> BitStack128 {
BitStack128 { data: (1, 0) }
}
/// Push a bit onto the top of the stack.
pub fn push(&mut self, value: bool) {
// Verify no stack overflow
debug_assert!((self.data.1 >> ((size_of::<u64>() * 8) - 1)) == 0);
self.data.1 = (self.data.1 << 1) | (self.data.0 >> ((size_of::<u64>() * 8) - 1));
self.data.0 <<= 1;
self.data.0 |= value as u64;
}
/// Push n bits onto the top of the stack. The input
/// bits are passed as an integer, with the bit that
/// will be on top in the least significant digit, and
/// the rest following in order from there.
///
/// Note that unless you are running a debug build, no
/// effort is made to verify that only the first n
/// bits of the passed value are used. So if other
/// bits are non-zero this will produce incorrect results.
pub fn push_n(&mut self, value: u8, count: u8) {
// Verify no bitstack overflow
debug_assert!((self.data.1 >> ((size_of::<u64>() * 8) - count as usize)) == 0);
// Verify no bits outside of the n-bit range
debug_assert!(if count < (size_of::<u8>() * 8) as u8 {
value & (!((1 << count) - 1)) == 0
} else {
true
});
debug_assert!(count <= (size_of::<u8>() * 8) as u8);
self.data.1 = (self.data.1 << count as usize) |
(self.data.0 >> ((size_of::<u64>() * 8) - count as usize));
self.data.0 <<= count as u64;
self.data.0 |= value as u64;
}
/// Pop the top bit off the stack.
pub fn pop(&mut self) -> bool {
let b = (self.data.0 & 1) != 0;
self.data.0 = (self.data.0 >> 1) | (self.data.1 << ((size_of::<u64>() * 8) - 1));
self.data.1 >>= 1;
return b;
}
/// Pop the top n bits off the stack. The bits are returned as
/// an integer, with the top bit in the least significant digit,
/// and the rest following in order from there.
pub fn pop_n(&mut self, n: usize) -> u64 {
debug_assert!(n < (size_of::<BitStack128>() * 8)); // Can't pop more than we have
debug_assert!(n < (size_of::<u64>() * 8)); // Can't pop more than the return type can hold
let b = self.data.0 & ((1 << n) - 1);
self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::<u64>() * 8) - n));
self.data.1 >>= n;
return b;
}
/// Pop the top n bits off the stack, but return only the nth bit.
pub fn pop_to_nth(&mut self, n: usize) -> bool {
debug_assert!(n > 0);
debug_assert!(n < (size_of::<BitStack128>() * 8)); // Can't pop more than we have
debug_assert!(n < (size_of::<u64>() * 8)); // Can't pop more than the return type can hold
let b = (self.data.0 & (1 << (n - 1))) != 0;
self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::<u64>() * 8) - n));
self.data.1 >>= n;
return b;
}
/// Read the top bit of the stack without popping it.
pub fn peek(&self) -> bool {
(self.data.0 & 1) != 0
}
/// Read the top n bits of the stack without popping them. The bits
/// are returned as an integer, with the top bit in the least
/// significant digit, and the rest following in order from there.
pub fn peek_n(&self, n: usize) -> u64 {
// Can't return more than we have
debug_assert!(n < (size_of::<BitStack128>() * 8));
// Can't return more than the return type can hold
debug_assert!(n < (size_of::<u64>() * 8));
self.data.0 & ((1 << n) - 1)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn push() {
let mut bs = BitStack128::new();
bs.push(true);
bs.push(false);
bs.push(true);
bs.push(true);
bs.push(false);
bs.push(true);
bs.push(true);
bs.push(true);
assert!(bs.data.0 == 0b10110111);
assert!(bs.data.1 == 0);
}
#[test]
fn push_overflow() {
let mut bs = BitStack128::new();
for _ in 0..9 {
bs.push(true);
bs.push(false);
bs.push(true);
bs.push(true);
bs.push(false);
bs.push(true);
bs.push(true);
bs.push(true);
}
assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111);
assert!(bs.data.1 == 0b10110111);
}
#[test]
fn pop() {
let mut bs = BitStack128::new();
bs.data.0 = 0b10110111;
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == false);
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == false);
assert!(bs.pop() == true);
}
#[test]
fn pop_overflow() {
let mut bs = BitStack128::new();
bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111;
bs.data.1 = 0b10110111;
for _ in 0..9 {
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == false);
assert!(bs.pop() == true);
assert!(bs.pop() == true);
assert!(bs.pop() == false);
assert!(bs.pop() == true);
}
}
#[test]
fn push_n() {
let mut bs = BitStack128::new();
bs.push_n(0b10110, 5);
bs.push_n(0b10110111, 8);
assert!(bs.data.0 == 0b1011010110111);
}
#[test]
fn push_n_overflow() {
let mut bs = BitStack128::new();
for _ in 0..9 {
bs.push_n(0b10110111, 8);
}
assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111);
assert!(bs.data.1 == 0b10110111);
}
#[test]
fn pop_n() {
let mut bs = BitStack128::new();
bs.data.0 = 0b0010_1000_1100_1110_0101_0111;
assert!(bs.pop_n(4) == 0b0111);
assert!(bs.data.0 == 0b0010_1000_1100_1110_0101);
assert!(bs.pop_n(4) == 0b0101);
assert!(bs.data.0 == 0b0010_1000_1100_1110);
assert!(bs.pop_n(4) == 0b1110);
assert!(bs.data.0 == 0b0010_1000_1100);
assert!(bs.pop_n(4) == 0b1100);
assert!(bs.data.0 == 0b0010_1000);
assert!(bs.pop_n(4) == 0b1000);
assert!(bs.data.0 == 0b0010);
assert!(bs.pop_n(4) == 0b0010);
assert!(bs.data.0 == 0);
}
#[test]
fn pop_n_overflow() {
let mut bs = BitStack128::new();
bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111;
bs.data.1 = 0b10110111;
for _ in 0..9 {
assert!(bs.pop_n(8) == 0b10110111);
}
}
#[test]
fn pop_to_nth() {
let mut bs = BitStack128::new();
bs.data.0 = 0b0010_1000_1100_1110_0101_0111;
assert!(bs.pop_to_nth(4) == false);
assert!(bs.data.0 == 0b0010_1000_1100_1110_0101);
assert!(bs.pop_to_nth(4) == false);
assert!(bs.data.0 == 0b0010_1000_1100_1110);
assert!(bs.pop_to_nth(4) == true);
assert!(bs.data.0 == 0b0010_1000_1100);
assert!(bs.pop_to_nth(4) == true);
assert!(bs.data.0 == 0b0010_1000);
assert!(bs.pop_to_nth(4) == true);
assert!(bs.data.0 == 0b0010);
assert!(bs.pop_to_nth(4) == false);
assert!(bs.data.0 == 0);
}
#[test]
fn pop_to_nth_overflow() {
let mut bs = BitStack128::new();
bs.data.0 = 0b00110111_10110111_00110111_10110111_00110111_10110111_00110111_10110111;
bs.data.1 = 0b00110111_10110111;
for _ in 0..5 {
assert!(bs.pop_to_nth(8) == true);
assert!(bs.pop_to_nth(8) == false);
}
}
#[test]
fn peek() {
let mut bs = BitStack128::new();
bs.data.0 = 0b10110111;
assert!(bs.peek() == true);
bs.pop();
assert!(bs.peek() == true);
bs.pop();
assert!(bs.peek() == true);
bs.pop();
assert!(bs.peek() == false);
bs.pop();
assert!(bs.peek() == true);
bs.pop();
assert!(bs.peek() == true);
bs.pop();
assert!(bs.peek() == false);
bs.pop();
assert!(bs.peek() == true);
}
#[test]
fn peek_n() {
let mut bs = BitStack128::new();
bs.data.0 = 0b10110111;
assert!(bs.peek_n(4) == 0b0111);
bs.pop_n(4);
assert!(bs.peek_n(4) == 0b1011);
bs.pop_n(4);
}
}

View File

@ -23,8 +23,6 @@ extern crate lazy_static;
mod accel; mod accel;
mod algorithm; mod algorithm;
mod bbox; mod bbox;
mod bbox4;
mod bitstack;
mod boundable; mod boundable;
mod camera; mod camera;
mod color; mod color;
@ -61,9 +59,7 @@ use ray::{Ray, AccelRay};
use surface::SurfaceIntersection; use surface::SurfaceIntersection;
use renderer::LightPath; use renderer::LightPath;
use bbox::BBox; use bbox::BBox;
use bbox4::BBox4;
use accel::BVHNode; use accel::BVHNode;
use accel::BVH4Node;
use timer::Timer; use timer::Timer;
@ -130,9 +126,7 @@ fn main() {
mem::size_of::<SurfaceIntersection>()); mem::size_of::<SurfaceIntersection>());
println!("LightPath size: {} bytes", mem::size_of::<LightPath>()); println!("LightPath size: {} bytes", mem::size_of::<LightPath>());
println!("BBox size: {} bytes", mem::size_of::<BBox>()); println!("BBox size: {} bytes", mem::size_of::<BBox>());
println!("BBox4 size: {} bytes", mem::size_of::<BBox4>());
println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>()); println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>());
println!("BVH4Node size: {} bytes", mem::size_of::<BVH4Node>());
return; return;
} }

View File

@ -2,7 +2,6 @@
use std; use std;
use bitstack::BitStack128;
use float4::Float4; use float4::Float4;
use math::{Vector, Point, Matrix4x4}; use math::{Vector, Point, Matrix4x4};
@ -59,7 +58,6 @@ pub struct AccelRay {
pub time: f32, pub time: f32,
pub flags: u32, pub flags: u32,
pub id: u32, pub id: u32,
pub trav_stack: BitStack128,
} }
impl AccelRay { impl AccelRay {
@ -71,7 +69,6 @@ impl AccelRay {
time: ray.time, time: ray.time,
flags: ray.flags, flags: ray.flags,
id: id, id: id,
trav_stack: BitStack128::new_with_1(),
} }
} }