From 172e2f19ef717d314614811247210fa865c923de Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Fri, 12 May 2017 21:07:40 -0700
Subject: [PATCH] Removed BVH4 and the related bitstack from AccelRay.

I couldn't make the BVH4 faster than the BVH, and the bitstack
was bloating the AccelRay struct.  Removing the bitstack gives
a small but noticable speedup in rendering.
---
 src/accel/bvh4.rs              | 487 ---------------------------------
 src/accel/bvh4_table.inc       |  35 ---
 src/accel/create_bvh4_table.py |  58 ----
 src/accel/mod.rs               |   2 -
 src/bbox4.rs                   | 116 --------
 src/bitstack.rs                | 303 --------------------
 src/main.rs                    |   6 -
 src/ray.rs                     |   3 -
 8 files changed, 1010 deletions(-)
 delete mode 100644 src/accel/bvh4.rs
 delete mode 100644 src/accel/bvh4_table.inc
 delete mode 100644 src/accel/create_bvh4_table.py
 delete mode 100644 src/bbox4.rs
 delete mode 100644 src/bitstack.rs
diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs
deleted file mode 100644
index d9fe66a..0000000
--- a/src/accel/bvh4.rs
+++ /dev/null
@@ -1,487 +0,0 @@
-//! This BVH4 implementation pulls a lot of ideas from the paper
-//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
-//! by Fuetterling et al.
-//!
-//! Specifically, the table-based traversal order approach they
-//! propose is largely followed by this implementation.
-
-#![allow(dead_code)]
-
-use mem_arena::MemArena;
-
-use algorithm::{partition, partition_with_side};
-use bbox::BBox;
-use bbox4::BBox4;
-use boundable::Boundable;
-use lerp::lerp_slice;
-use ray::AccelRay;
-use timer::Timer;
-
-use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
-use super::ACCEL_TRAV_TIME;
-
-// TRAVERSAL_TABLE
-include!("bvh4_table.inc");
-
-#[derive(Copy, Clone, Debug)]
-pub struct BVH4<'a> {
-    root: Option<&'a BVH4Node<'a>>,
-    depth: usize,
-    node_count: usize,
-    _bounds: Option<&'a [BBox]>,
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum BVH4Node<'a> {
-    Internal {
-        bounds: &'a [BBox4],
-        children: &'a [BVH4Node<'a>],
-        traversal_code: u8,
-    },
-
-    Leaf { object_range: (usize, usize) },
-}
-
-impl<'a> BVH4<'a> {
-    pub fn from_objects<'b, T, F>(arena: &'a MemArena,
-                                  objects: &mut [T],
-                                  objects_per_leaf: usize,
-                                  bounder: F)
-                                  -> BVH4<'a>
-        where F: 'b + Fn(&T) -> &'b [BBox]
-    {
-        if objects.len() == 0 {
-            BVH4 {
-                root: None,
-                depth: 0,
-                node_count: 0,
-                _bounds: None,
-            }
-        } else {
-            let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
-
-            let mut fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
-            let node_count = BVH4::construct_from_base(arena,
-                                                       &base,
-                                                       &base.nodes[base.root_node_index()],
-                                                       fill_node);
-
-            BVH4 {
-                root: Some(fill_node),
-                depth: (base.depth / 2) + 1,
-                node_count: node_count,
-                _bounds: {
-                    let range = base.nodes[base.root_node_index()].bounds_range();
-                    Some(arena.copy_slice(&base.bounds[range.0..range.1]))
-                },
-            }
-        }
-    }
-
-    pub fn tree_depth(&self) -> usize {
-        self.depth
-    }
-
-    pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
-        where F: FnMut(&T, &mut [AccelRay])
-    {
-        if self.root.is_none() {
-            return;
-        }
-
-        let mut trav_time: f64 = 0.0;
-        let mut timer = Timer::new();
-
-        // +2 of max depth for root and last child
-        let mut node_stack = [self.root; BVH_MAX_DEPTH + 2];
-        let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
-        let mut stack_ptr = 1;
-        let mut unpopped = 0;
-        let mut first_loop = true;
-
-        let ray_code = ((rays[0].dir_inv.x() < 0.0) as u8) |
-                       (((rays[0].dir_inv.y() < 0.0) as u8) << 1) |
-                       (((rays[0].dir_inv.z() < 0.0) as u8) << 2);
-
-        while stack_ptr > 0 {
-            match node_stack[stack_ptr] {
-                Some(&BVH4Node::Internal { bounds, children, traversal_code }) => {
-                    let node_order_code = {
-                        TRAVERSAL_TABLE[ray_code as usize][traversal_code as usize]
-                    };
-                    let noc1 = node_order_code & 3;
-                    let noc2 = (node_order_code >> 2) & 3;
-                    let noc3 = (node_order_code >> 4) & 3;
-                    let noc4 = (node_order_code >> 6) & 3;
-
-                    let mut all_hits = 0;
-
-                    // Ray testing
-                    let part;
-                    {
-                        // Common code for ray testing below
-                        let mut test_ray = |r: &mut AccelRay| {
-                            let hits = lerp_slice(bounds, r.time)
-                                .intersect_accel_ray(r)
-                                .to_bitmask();
-
-                            all_hits |= hits;
-
-                            if hits != 0 {
-                                // Push hit bits onto ray's traversal stack
-                                let shuffled_hits = match children.len() {
-                                    4 => {
-                                        ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) |
-                                        (((hits >> noc3) & 1) << 2) |
-                                        (((hits >> noc4) & 1) << 3)
-                                    }
-                                    3 => {
-                                        ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1) |
-                                        (((hits >> noc3) & 1) << 2)
-                                    }
-                                    2 => ((hits >> noc1) & 1) | (((hits >> noc2) & 1) << 1),
-                                    _ => unreachable!(),
-                                };
-                                r.trav_stack.push_n(shuffled_hits, children.len() as u8);
-
-                                return true;
-                            }
-
-                            return false;
-                        };
-
-                        // Skip some tests if it's the first loop
-                        part = if first_loop {
-                            filter_rays(&ray_i_stack[stack_ptr..],
-                                        &mut rays[..ray_i_stack[stack_ptr]],
-                                        unpopped,
-                                        |r, _| {
-                                            if !r.is_done() {
-                                                return test_ray(r);
-                                            }
-                                            return false;
-                                        })
-                        } else {
-                            filter_rays(&ray_i_stack[stack_ptr..],
-                                        &mut rays[..ray_i_stack[stack_ptr]],
-                                        unpopped,
-                                        |r, pop_count| {
-                                if (!r.is_done()) && r.trav_stack.pop_to_nth(pop_count) {
-                                    return test_ray(r);
-                                }
-                                return false;
-                            })
-                        };
-                    }
-                    unpopped = 0;
-
-                    // Update stack based on ray testing results
-                    if part > 0 {
-                        for i in 0..children.len() {
-                            let inv_i = (children.len() - 1) - i;
-                            let child_i = ((node_order_code >> (inv_i * 2)) & 3) as usize;
-                            node_stack[stack_ptr + i] = if ((all_hits >> child_i) & 1) == 0 {
-                                None
-                            } else {
-                                Some(&children[child_i])
-                            };
-                            ray_i_stack[stack_ptr + i] = part;
-                        }
-
-                        stack_ptr += children.len() - 1;
-                    } else {
-                        stack_ptr -= 1;
-                    }
-                }
-
-                Some(&BVH4Node::Leaf { object_range }) => {
-                    let part = if !first_loop {
-                        filter_rays(&ray_i_stack[stack_ptr..],
-                                    &mut rays[..ray_i_stack[stack_ptr]],
-                                    unpopped,
-                                    |r, pop_count| {
-                                        (!r.is_done()) && r.trav_stack.pop_to_nth(pop_count)
-                                    })
-                    } else {
-                        ray_i_stack[stack_ptr]
-                    };
-
-                    unpopped = 0;
-
-                    trav_time += timer.tick() as f64;
-
-                    for obj in &objects[object_range.0..object_range.1] {
-                        obj_ray_test(obj, &mut rays[..part]);
-                    }
-
-                    timer.tick();
-
-                    stack_ptr -= 1;
-                }
-
-                None => {
-                    if !first_loop {
-                        unpopped += 1;
-
-                    }
-                    stack_ptr -= 1;
-                }
-            }
-
-            first_loop = false;
-        }
-
-        // Pop any unpopped bits of the ray traversal stacks
-        if unpopped > 0 {
-            filter_rays(&ray_i_stack[1..],
-                        &mut rays[..ray_i_stack[1]],
-                        unpopped - 1,
-                        |r, pop_count| r.trav_stack.pop_to_nth(pop_count));
-        }
-
-        trav_time += timer.tick() as f64;
-        ACCEL_TRAV_TIME.with(|att| {
-            let v = att.get();
-            att.set(v + trav_time);
-        });
-    }
-
-    fn construct_from_base(arena: &'a MemArena,
-                           base: &BVHBase,
-                           node: &BVHBaseNode,
-                           fill_node: &mut BVH4Node<'a>)
-                           -> usize {
-        let mut node_count = 0;
-
-        match node {
-            // Create internal node
-            &BVHBaseNode::Internal { bounds_range: _, children_indices, split_axis } => {
-                let child_l = &base.nodes[children_indices.0];
-                let child_r = &base.nodes[children_indices.1];
-
-                // Prepare convenient access to the stuff we need.
-                let child_count;
-                let children; // [Optional, Optional, Optional, Optional]
-                let split_axis_l; // Optional
-                let split_axis_r; // Optional
-                match child_l {
-                    &BVHBaseNode::Internal { children_indices: i_l, split_axis: s_l, .. } => {
-                        match child_r {
-                            &BVHBaseNode::Internal { children_indices: i_r,
-                                                     split_axis: s_r,
-                                                     .. } => {
-                                // Four nodes
-                                child_count = 4;
-                                children = [Some(&base.nodes[i_l.0]),
-                                            Some(&base.nodes[i_l.1]),
-                                            Some(&base.nodes[i_r.0]),
-                                            Some(&base.nodes[i_r.1])];
-                                split_axis_l = Some(s_l);
-                                split_axis_r = Some(s_r);
-                            }
-                            &BVHBaseNode::Leaf { .. } => {
-                                // Three nodes with left split
-                                child_count = 3;
-                                children = [Some(&base.nodes[i_l.0]),
-                                            Some(&base.nodes[i_l.1]),
-                                            Some(child_r),
-                                            None];
-                                split_axis_l = Some(s_l);
-                                split_axis_r = None;
-                            }
-                        }
-                    }
-                    &BVHBaseNode::Leaf { .. } => {
-                        match child_r {
-                            &BVHBaseNode::Internal { children_indices: i_r,
-                                                     split_axis: s_r,
-                                                     .. } => {
-                                // Three nodes with right split
-                                child_count = 3;
-                                children = [Some(child_l),
-                                            Some(&base.nodes[i_r.0]),
-                                            Some(&base.nodes[i_r.1]),
-                                            None];
-                                split_axis_l = None;
-                                split_axis_r = Some(s_r);
-                            }
-                            &BVHBaseNode::Leaf { .. } => {
-                                // Two nodes
-                                child_count = 2;
-                                children = [Some(child_l), Some(child_r), None, None];
-                                split_axis_l = None;
-                                split_axis_r = None;
-                            }
-                        }
-                    }
-                }
-
-                node_count += child_count;
-
-                // Construct bounds
-                let bounds = {
-                    let bounds_len = children.iter()
-                        .map(|c| if let &Some(n) = c {
-                            let len = n.bounds_range().1 - n.bounds_range().0;
-                            debug_assert!(len >= 1);
-                            len
-                        } else {
-                            0
-                        })
-                        .max()
-                        .unwrap();
-                    debug_assert!(bounds_len >= 1);
-                    let mut bounds =
-                        unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
-                    if bounds_len < 2 {
-                        let b1 = children[0]
-                            .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
-                        let b2 = children[1]
-                            .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
-                        let b3 = children[2]
-                            .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
-                        let b4 = children[3]
-                            .map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
-                        bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
-                    } else {
-                        for (i, b) in bounds.iter_mut().enumerate() {
-                            let time = i as f32 / (bounds_len - 1) as f32;
-
-                            let b1 = children[0].map_or(BBox::new(), |c| {
-                                let (x, y) = c.bounds_range();
-                                lerp_slice(&base.bounds[x..y], time)
-                            });
-                            let b2 = children[1].map_or(BBox::new(), |c| {
-                                let (x, y) = c.bounds_range();
-                                lerp_slice(&base.bounds[x..y], time)
-                            });
-                            let b3 = children[2].map_or(BBox::new(), |c| {
-                                let (x, y) = c.bounds_range();
-                                lerp_slice(&base.bounds[x..y], time)
-                            });
-                            let b4 = children[3].map_or(BBox::new(), |c| {
-                                let (x, y) = c.bounds_range();
-                                lerp_slice(&base.bounds[x..y], time)
-                            });
-                            *b = BBox4::from_bboxes(b1, b2, b3, b4);
-                        }
-                    }
-                    bounds
-                };
-
-                // Construct child nodes
-                let mut child_nodes =
-                    unsafe {
-                        arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
-                    };
-                for (i, c) in children[0..child_count].iter().enumerate() {
-                    node_count +=
-                        BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
-                }
-
-                // Build this node
-                let traversal_code = {
-                    let topology_code = if child_count == 4 {
-                        0
-                    } else if child_count == 2 {
-                        3
-                    } else if split_axis_l.is_some() {
-                        1
-                    } else {
-                        2
-                    };
-                    calc_traversal_code(split_axis,
-                                        split_axis_l.unwrap_or(split_axis_r.unwrap_or(0)),
-                                        if child_count == 4 {
-                                            split_axis_r.unwrap()
-                                        } else {
-                                            0
-                                        },
-                                        topology_code)
-                };
-                *fill_node = BVH4Node::Internal {
-                    bounds: bounds,
-                    children: child_nodes,
-                    traversal_code: traversal_code,
-                };
-            }
-
-            // Create internal node
-            &BVHBaseNode::Leaf { object_range, .. } => {
-                *fill_node = BVH4Node::Leaf { object_range: object_range };
-                node_count += 1;
-            }
-        }
-
-        return node_count;
-    }
-}
-
-
-impl<'a> Boundable for BVH4<'a> {
-    fn bounds<'b>(&'b self) -> &'b [BBox] {
-        self._bounds.unwrap_or(&[])
-    }
-}
-
-
-// Calculates the traversal code for a BVH4 node based on the splits and topology
-// of its children.
-//
-// split_1 is the top split.
-//
-// split_2 is either the left or right split depending on topology, and is only
-// relevant for topologies 0-2.  For topology 3 it should be 0.
-//
-// split_3 is always the right split, and is only relevant for topology 0. For
-// topologies 1-3 it should be 0.
-//
-// topology can be 0-3:
-//     0: All three splits exist, representing 4 BVH4 children.
-//     1: Two splits exist: top split and left split, representing 3 BVH4 children.
-//     2: Two splits exist: top split and right split, representing 3 BVH4 children.
-//     3: Only the top split exists, representing 2 BVH4 children.
-fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u8 {
-    debug_assert!(!(topology > 0 && split_3 > 0));
-    debug_assert!(!(topology > 2 && split_2 > 0));
-
-    static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9];
-    split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize]
-}
-
-
-fn filter_rays<F>(ray_i_stack: &[usize],
-                  rays: &mut [AccelRay],
-                  unpopped: usize,
-                  mut ray_test: F)
-                  -> usize
-    where F: FnMut(&mut AccelRay, usize) -> bool
-{
-    let part = if ray_i_stack[0] == ray_i_stack[unpopped] {
-        let pop_count = unpopped + 1;
-        partition(rays, |r| ray_test(r, pop_count))
-    } else {
-        let mut part_n = [0, rays.len() - 1]; // Where we are in the partition
-        let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side
-
-        partition_with_side(rays, |r, side| {
-            let pop_count = if !side {
-                while part_n[0] >= ray_i_stack[part_pop[0]] {
-                    part_pop[0] -= 1;
-                }
-                part_n[0] += 1;
-                part_pop[0]
-            } else {
-                while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped {
-                    part_pop[1] += 1;
-                }
-                part_n[1] -= 1;
-                part_pop[1]
-            };
-
-            return ray_test(r, pop_count + 1);
-        })
-    };
-
-    part
-}
diff --git a/src/accel/bvh4_table.inc b/src/accel/bvh4_table.inc
deleted file mode 100644
index fef7769..0000000
--- a/src/accel/bvh4_table.inc
+++ /dev/null
@@ -1,35 +0,0 @@
-static TRAVERSAL_TABLE: [[u8; 48]; 8] = [
-    [228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228, 228,
-     36, 36, 36, 36, 36, 36, 36, 36, 36,
-     36, 36, 36, 36, 36, 36, 36, 36, 36,
-     4, 4, 4],
-    [27, 177, 177, 75, 180, 180, 75, 180, 180, 30, 225, 225, 78, 228, 228, 78, 228, 228, 30, 225, 225, 78, 228, 228, 78, 228, 228,
-     6, 33, 33, 18, 36, 36, 18, 36, 36,
-     6, 24, 24, 9, 36, 36, 9, 36, 36,
-     1, 4, 4],
-    [228, 78, 228, 225, 30, 225, 228, 78, 228, 180, 75, 180, 177, 27, 177, 180, 75, 180, 228, 78, 228, 225, 30, 225, 228, 78, 228,
-     36, 18, 36, 33, 6, 33, 36, 18, 36,
-     36, 9, 36, 24, 6, 24, 36, 9, 36,
-     4, 1, 4],
-    [27, 27, 177, 27, 27, 177, 75, 75, 180, 27, 27, 177, 27, 27, 177, 75, 75, 180, 30, 30, 225, 30, 30, 225, 78, 78, 228,
-     6, 6, 33, 6, 6, 33, 18, 18, 36,
-     6, 6, 24, 6, 6, 24, 9, 9, 36,
-     1, 1, 4],
-    [228, 228, 78, 228, 228, 78, 225, 225, 30, 228, 228, 78, 228, 228, 78, 225, 225, 30, 180, 180, 75, 180, 180, 75, 177, 177, 27,
-     36, 36, 18, 36, 36, 18, 33, 33, 6,
-     36, 36, 9, 36, 36, 9, 24, 24, 6,
-     4, 4, 1],
-    [27, 177, 27, 75, 180, 75, 27, 177, 27, 30, 225, 30, 78, 228, 78, 30, 225, 30, 27, 177, 27, 75, 180, 75, 27, 177, 27,
-     6, 33, 6, 18, 36, 18, 6, 33, 6,
-     6, 24, 6, 9, 36, 9, 6, 24, 6,
-     1, 4, 1],
-    [228, 78, 78, 225, 30, 30, 225, 30, 30, 180, 75, 75, 177, 27, 27, 177, 27, 27, 180, 75, 75, 177, 27, 27, 177, 27, 27,
-     36, 18, 18, 33, 6, 6, 33, 6, 6,
-     36, 9, 9, 24, 6, 6, 24, 6, 6,
-     4, 1, 1],
-    [27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-     6, 6, 6, 6, 6, 6, 6, 6, 6,
-     6, 6, 6, 6, 6, 6, 6, 6, 6,
-     1, 1, 1],
-];
-
diff --git a/src/accel/create_bvh4_table.py b/src/accel/create_bvh4_table.py
deleted file mode 100644
index d9b9410..0000000
--- a/src/accel/create_bvh4_table.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-if __name__ == "__main__":
-    text = "static TRAVERSAL_TABLE: [[u8; 48]; 8] = [\n"
-
-    for raydir in range(0, 8):
-        ray = [raydir & 1, (raydir >> 1) & 1, (raydir >> 2) & 1]
-        
-        text += "    ["
-        for splits in [[s1, s2, s3] for s3 in range(0,3) for s2 in range(0,3) for s1 in range(0,3)]:
-            perm = [0, 1, 2, 3]
-            if ray[splits[1]] == 1:
-                perm = [perm[1], perm[0]] + perm[2:4]
-            if ray[splits[2]] == 1:
-                perm = perm[0:2] + [perm[3], perm[2]]
-            if ray[splits[0]] == 1:
-                perm = perm[2:4] + perm[0:2]
-            perm = perm[0] + (perm[1] << 2) + (perm[2] << 4) + (perm[3] << 6)
-            text += "%d, " % perm
-        text = text[:-1]
-
-        text += "\n     "
-        for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]:
-            perm = [0, 1, 2]
-            if ray[splits[1]] == 1:
-                perm = [perm[1], perm[0], perm[2]]
-            if ray[splits[0]] == 1:
-                perm = [perm[2], perm[0], perm[1]]
-            perm = perm[0] + (perm[1] << 2) + (perm[2] << 4)
-            text += "%d, " % perm
-        text = text[:-1]
-
-        text += "\n     "
-        for splits in [[s1, s2] for s2 in range(0,3) for s1 in range(0,3)]:
-            perm = [0, 1, 2]
-            if ray[splits[1]] == 1:
-                perm = [perm[0], perm[2], perm[1]]
-            if ray[splits[0]] == 1:
-                perm = [perm[1], perm[2], perm[0]]
-            perm = perm[0] + (perm[1] << 2) + (perm[2] << 4)
-            text += "%d, " % perm
-        text = text[:-1]
-        
-        text += "\n     "
-        for split in [s1 for s1 in range(0,3)]:
-            perm = [0, 1]
-            if ray[split] == 1:
-                perm = [perm[1], perm[0]]
-            perm = perm[0] + (perm[1] << 2)
-            text += "%d, " % perm
-        text = text[:-1]
-        
-        text = text[:-1] + "],\n"
-    
-    text += "];\n"
-
-    print text
-
diff --git a/src/accel/mod.rs b/src/accel/mod.rs
index 0000196..f538668 100644
--- a/src/accel/mod.rs
+++ b/src/accel/mod.rs
@@ -1,6 +1,5 @@
 mod bvh_base;
 mod bvh;
-mod bvh4;
 mod light_array;
 mod light_tree;
 mod objects_split;
@@ -11,7 +10,6 @@ use math::{Vector, Point, Normal};
 use shading::surface_closure::SurfaceClosure;
 
 pub use self::bvh::{BVH, BVHNode};
-pub use self::bvh4::{BVH4, BVH4Node};
 pub use self::light_tree::LightTree;
 
 // Track BVH traversal time
diff --git a/src/bbox4.rs b/src/bbox4.rs
deleted file mode 100644
index 15fa59d..0000000
--- a/src/bbox4.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-#![allow(dead_code)]
-
-use std;
-use std::ops::{BitOr, BitOrAssign};
-
-use bbox::BBox;
-use float4::{Float4, Bool4, v_min, v_max};
-use lerp::{lerp, Lerp};
-use ray::AccelRay;
-
-
-const BBOX_MAXT_ADJUST: f32 = 1.00000024;
-
-/// A SIMD set of 4 3D axis-aligned bounding boxes.
-#[derive(Debug, Copy, Clone)]
-pub struct BBox4 {
-    pub x: (Float4, Float4), // (min, max)
-    pub y: (Float4, Float4), // (min, max)
-    pub z: (Float4, Float4), // (min, max)
-}
-
-impl BBox4 {
-    /// Creates a degenerate BBox with +infinity min and -infinity max.
-    pub fn new() -> BBox4 {
-        BBox4 {
-            x: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
-            y: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
-            z: (Float4::splat(std::f32::INFINITY), Float4::splat(std::f32::NEG_INFINITY)),
-        }
-    }
-
-    /// Creates a BBox with min as the minimum extent and max as the maximum
-    /// extent.
-    pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 {
-        BBox4 {
-            x: (Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
-                Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x())),
-            y: (Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
-                Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y())),
-            z: (Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
-                Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z())),
-        }
-    }
-
-    // Returns whether the given ray intersects with the bboxes.
-    pub fn intersect_accel_ray(&self, ray: &AccelRay) -> Bool4 {
-        // Precalculate ray direction sign booleans.
-        // Doing it up here slightly speeds things up lower down.
-        let ray_pos = (ray.dir_inv.x() >= 0.0, ray.dir_inv.y() >= 0.0, ray.dir_inv.z() >= 0.0);
-
-        // Convert ray to SIMD form
-        let ray4_o =
-            (Float4::splat(ray.orig.x()), Float4::splat(ray.orig.y()), Float4::splat(ray.orig.z()));
-        let ray4_dinv = (Float4::splat(ray.dir_inv.x()),
-                         Float4::splat(ray.dir_inv.y()),
-                         Float4::splat(ray.dir_inv.z()));
-
-        // Calculate the plane intersections
-        let (xlos, xhis) = if ray_pos.0 {
-            ((self.x.0 - ray4_o.0) * ray4_dinv.0, (self.x.1 - ray4_o.0) * ray4_dinv.0)
-        } else {
-            ((self.x.1 - ray4_o.0) * ray4_dinv.0, (self.x.0 - ray4_o.0) * ray4_dinv.0)
-        };
-        let (ylos, yhis) = if ray_pos.1 {
-            ((self.y.0 - ray4_o.1) * ray4_dinv.1, (self.y.1 - ray4_o.1) * ray4_dinv.1)
-        } else {
-            ((self.y.1 - ray4_o.1) * ray4_dinv.1, (self.y.0 - ray4_o.1) * ray4_dinv.1)
-        };
-        let (zlos, zhis) = if ray_pos.2 {
-            ((self.z.0 - ray4_o.2) * ray4_dinv.2, (self.z.1 - ray4_o.2) * ray4_dinv.2)
-        } else {
-            ((self.z.1 - ray4_o.2) * ray4_dinv.2, (self.z.0 - ray4_o.2) * ray4_dinv.2)
-        };
-
-        // Get the minimum and maximum hits
-        let mins = v_max(v_max(xlos, ylos), v_max(zlos, Float4::splat(0.0)));
-        let maxs = v_max(v_min(v_min(xhis, yhis), zhis),
-                         Float4::splat(std::f32::NEG_INFINITY)) *
-                   Float4::splat(BBOX_MAXT_ADJUST);
-
-        // Check for hits
-        let hits = mins.lt(Float4::splat(ray.max_t)) & mins.lte(maxs);
-
-        return hits;
-    }
-}
-
-
-/// Union of two BBoxes.
-impl BitOr for BBox4 {
-    type Output = BBox4;
-
-    fn bitor(self, rhs: BBox4) -> BBox4 {
-        BBox4 {
-            x: (self.x.0.v_min(rhs.x.0), self.x.1.v_max(rhs.x.1)),
-            y: (self.y.0.v_min(rhs.y.0), self.y.1.v_max(rhs.y.1)),
-            z: (self.z.0.v_min(rhs.z.0), self.z.1.v_max(rhs.z.1)),
-        }
-    }
-}
-
-impl BitOrAssign for BBox4 {
-    fn bitor_assign(&mut self, rhs: BBox4) {
-        *self = *self | rhs;
-    }
-}
-
-impl Lerp for BBox4 {
-    fn lerp(self, other: BBox4, alpha: f32) -> BBox4 {
-        BBox4 {
-            x: (lerp(self.x.0, other.x.0, alpha), lerp(self.x.1, other.x.1, alpha)),
-            y: (lerp(self.y.0, other.y.0, alpha), lerp(self.y.1, other.y.1, alpha)),
-            z: (lerp(self.z.0, other.z.0, alpha), lerp(self.z.1, other.z.1, alpha)),
-        }
-    }
-}
diff --git a/src/bitstack.rs b/src/bitstack.rs
deleted file mode 100644
index af46ace..0000000
--- a/src/bitstack.rs
+++ /dev/null
@@ -1,303 +0,0 @@
-#![allow(dead_code)]
-
-use std::mem::size_of;
-
-#[derive(Copy, Clone, Debug)]
-pub struct BitStack128 {
-    data: (u64, u64),
-}
-
-impl BitStack128 {
-    pub fn new() -> BitStack128 {
-        BitStack128 { data: (0, 0) }
-    }
-
-    pub fn new_with_1() -> BitStack128 {
-        BitStack128 { data: (1, 0) }
-    }
-
-    /// Push a bit onto the top of the stack.
-    pub fn push(&mut self, value: bool) {
-        // Verify no stack overflow
-        debug_assert!((self.data.1 >> ((size_of::<u64>() * 8) - 1)) == 0);
-
-        self.data.1 = (self.data.1 << 1) | (self.data.0 >> ((size_of::<u64>() * 8) - 1));
-        self.data.0 <<= 1;
-        self.data.0 |= value as u64;
-    }
-
-    /// Push n bits onto the top of the stack.  The input
-    /// bits are passed as an integer, with the bit that
-    /// will be on top in the least significant digit, and
-    /// the rest following in order from there.
-    ///
-    /// Note that unless you are running a debug build, no
-    /// effort is made to verify that only the first n
-    /// bits of the passed value are used.  So if other
-    /// bits are non-zero this will produce incorrect results.
-    pub fn push_n(&mut self, value: u8, count: u8) {
-        // Verify no bitstack overflow
-        debug_assert!((self.data.1 >> ((size_of::<u64>() * 8) - count as usize)) == 0);
-        // Verify no bits outside of the n-bit range
-        debug_assert!(if count < (size_of::<u8>() * 8) as u8 {
-            value & (!((1 << count) - 1)) == 0
-        } else {
-            true
-        });
-        debug_assert!(count <= (size_of::<u8>() * 8) as u8);
-
-        self.data.1 = (self.data.1 << count as usize) |
-                      (self.data.0 >> ((size_of::<u64>() * 8) - count as usize));
-        self.data.0 <<= count as u64;
-        self.data.0 |= value as u64;
-    }
-
-    /// Pop the top bit off the stack.
-    pub fn pop(&mut self) -> bool {
-        let b = (self.data.0 & 1) != 0;
-        self.data.0 = (self.data.0 >> 1) | (self.data.1 << ((size_of::<u64>() * 8) - 1));
-        self.data.1 >>= 1;
-        return b;
-    }
-
-    /// Pop the top n bits off the stack.  The bits are returned as
-    /// an integer, with the top bit in the least significant digit,
-    /// and the rest following in order from there.
-    pub fn pop_n(&mut self, n: usize) -> u64 {
-        debug_assert!(n < (size_of::<BitStack128>() * 8)); // Can't pop more than we have
-        debug_assert!(n < (size_of::<u64>() * 8)); // Can't pop more than the return type can hold
-        let b = self.data.0 & ((1 << n) - 1);
-        self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::<u64>() * 8) - n));
-        self.data.1 >>= n;
-        return b;
-    }
-
-    /// Pop the top n bits off the stack, but return only the nth bit.
-    pub fn pop_to_nth(&mut self, n: usize) -> bool {
-        debug_assert!(n > 0);
-        debug_assert!(n < (size_of::<BitStack128>() * 8)); // Can't pop more than we have
-        debug_assert!(n < (size_of::<u64>() * 8)); // Can't pop more than the return type can hold
-        let b = (self.data.0 & (1 << (n - 1))) != 0;
-        self.data.0 = (self.data.0 >> n) | (self.data.1 << ((size_of::<u64>() * 8) - n));
-        self.data.1 >>= n;
-        return b;
-    }
-
-    /// Read the top bit of the stack without popping it.
-    pub fn peek(&self) -> bool {
-        (self.data.0 & 1) != 0
-    }
-
-    /// Read the top n bits of the stack without popping them.  The bits
-    /// are returned as an integer, with the top bit in the least
-    /// significant digit, and the rest following in order from there.
-    pub fn peek_n(&self, n: usize) -> u64 {
-        // Can't return more than we have
-        debug_assert!(n < (size_of::<BitStack128>() * 8));
-        // Can't return more than the return type can hold
-        debug_assert!(n < (size_of::<u64>() * 8));
-
-        self.data.0 & ((1 << n) - 1)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn push() {
-        let mut bs = BitStack128::new();
-        bs.push(true);
-        bs.push(false);
-        bs.push(true);
-        bs.push(true);
-        bs.push(false);
-        bs.push(true);
-        bs.push(true);
-        bs.push(true);
-
-        assert!(bs.data.0 == 0b10110111);
-        assert!(bs.data.1 == 0);
-    }
-
-    #[test]
-    fn push_overflow() {
-        let mut bs = BitStack128::new();
-        for _ in 0..9 {
-            bs.push(true);
-            bs.push(false);
-            bs.push(true);
-            bs.push(true);
-            bs.push(false);
-            bs.push(true);
-            bs.push(true);
-            bs.push(true);
-        }
-
-        assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111);
-        assert!(bs.data.1 == 0b10110111);
-    }
-
-    #[test]
-    fn pop() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b10110111;
-
-        assert!(bs.pop() == true);
-        assert!(bs.pop() == true);
-        assert!(bs.pop() == true);
-        assert!(bs.pop() == false);
-        assert!(bs.pop() == true);
-        assert!(bs.pop() == true);
-        assert!(bs.pop() == false);
-        assert!(bs.pop() == true);
-    }
-
-    #[test]
-    fn pop_overflow() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111;
-        bs.data.1 = 0b10110111;
-        for _ in 0..9 {
-            assert!(bs.pop() == true);
-            assert!(bs.pop() == true);
-            assert!(bs.pop() == true);
-            assert!(bs.pop() == false);
-            assert!(bs.pop() == true);
-            assert!(bs.pop() == true);
-            assert!(bs.pop() == false);
-            assert!(bs.pop() == true);
-        }
-    }
-
-    #[test]
-    fn push_n() {
-        let mut bs = BitStack128::new();
-        bs.push_n(0b10110, 5);
-        bs.push_n(0b10110111, 8);
-
-        assert!(bs.data.0 == 0b1011010110111);
-    }
-
-    #[test]
-    fn push_n_overflow() {
-        let mut bs = BitStack128::new();
-        for _ in 0..9 {
-            bs.push_n(0b10110111, 8);
-        }
-
-        assert!(bs.data.0 == 0b1011011110110111101101111011011110110111101101111011011110110111);
-        assert!(bs.data.1 == 0b10110111);
-    }
-
-    #[test]
-    fn pop_n() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b0010_1000_1100_1110_0101_0111;
-
-        assert!(bs.pop_n(4) == 0b0111);
-        assert!(bs.data.0 == 0b0010_1000_1100_1110_0101);
-
-        assert!(bs.pop_n(4) == 0b0101);
-        assert!(bs.data.0 == 0b0010_1000_1100_1110);
-
-        assert!(bs.pop_n(4) == 0b1110);
-        assert!(bs.data.0 == 0b0010_1000_1100);
-
-        assert!(bs.pop_n(4) == 0b1100);
-        assert!(bs.data.0 == 0b0010_1000);
-
-        assert!(bs.pop_n(4) == 0b1000);
-        assert!(bs.data.0 == 0b0010);
-
-        assert!(bs.pop_n(4) == 0b0010);
-        assert!(bs.data.0 == 0);
-    }
-
-    #[test]
-    fn pop_n_overflow() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b1011011110110111101101111011011110110111101101111011011110110111;
-        bs.data.1 = 0b10110111;
-        for _ in 0..9 {
-            assert!(bs.pop_n(8) == 0b10110111);
-        }
-    }
-
-    #[test]
-    fn pop_to_nth() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b0010_1000_1100_1110_0101_0111;
-
-        assert!(bs.pop_to_nth(4) == false);
-        assert!(bs.data.0 == 0b0010_1000_1100_1110_0101);
-
-        assert!(bs.pop_to_nth(4) == false);
-        assert!(bs.data.0 == 0b0010_1000_1100_1110);
-
-        assert!(bs.pop_to_nth(4) == true);
-        assert!(bs.data.0 == 0b0010_1000_1100);
-
-        assert!(bs.pop_to_nth(4) == true);
-        assert!(bs.data.0 == 0b0010_1000);
-
-        assert!(bs.pop_to_nth(4) == true);
-        assert!(bs.data.0 == 0b0010);
-
-        assert!(bs.pop_to_nth(4) == false);
-        assert!(bs.data.0 == 0);
-    }
-
-    #[test]
-    fn pop_to_nth_overflow() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b00110111_10110111_00110111_10110111_00110111_10110111_00110111_10110111;
-        bs.data.1 = 0b00110111_10110111;
-        for _ in 0..5 {
-            assert!(bs.pop_to_nth(8) == true);
-            assert!(bs.pop_to_nth(8) == false);
-        }
-    }
-
-    #[test]
-    fn peek() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b10110111;
-
-        assert!(bs.peek() == true);
-        bs.pop();
-
-        assert!(bs.peek() == true);
-        bs.pop();
-
-        assert!(bs.peek() == true);
-        bs.pop();
-
-        assert!(bs.peek() == false);
-        bs.pop();
-
-        assert!(bs.peek() == true);
-        bs.pop();
-
-        assert!(bs.peek() == true);
-        bs.pop();
-
-        assert!(bs.peek() == false);
-        bs.pop();
-
-        assert!(bs.peek() == true);
-    }
-
-    #[test]
-    fn peek_n() {
-        let mut bs = BitStack128::new();
-        bs.data.0 = 0b10110111;
-
-        assert!(bs.peek_n(4) == 0b0111);
-        bs.pop_n(4);
-
-        assert!(bs.peek_n(4) == 0b1011);
-        bs.pop_n(4);
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 24d3377..af81b6b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -23,8 +23,6 @@ extern crate lazy_static;
 mod accel;
 mod algorithm;
 mod bbox;
-mod bbox4;
-mod bitstack;
 mod boundable;
 mod camera;
 mod color;
@@ -61,9 +59,7 @@ use ray::{Ray, AccelRay};
 use surface::SurfaceIntersection;
 use renderer::LightPath;
 use bbox::BBox;
-use bbox4::BBox4;
 use accel::BVHNode;
-use accel::BVH4Node;
 use timer::Timer;
 
 
@@ -130,9 +126,7 @@ fn main() {
                  mem::size_of::<SurfaceIntersection>());
         println!("LightPath size: {} bytes", mem::size_of::<LightPath>());
         println!("BBox size: {} bytes", mem::size_of::<BBox>());
-        println!("BBox4 size: {} bytes", mem::size_of::<BBox4>());
         println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>());
-        println!("BVH4Node size: {} bytes", mem::size_of::<BVH4Node>());
         return;
     }
 
diff --git a/src/ray.rs b/src/ray.rs
index 3980a78..d75e1d6 100644
--- a/src/ray.rs
+++ b/src/ray.rs
@@ -2,7 +2,6 @@
 
 use std;
 
-use bitstack::BitStack128;
 use float4::Float4;
 use math::{Vector, Point, Matrix4x4};
 
@@ -59,7 +58,6 @@ pub struct AccelRay {
     pub time: f32,
     pub flags: u32,
     pub id: u32,
-    pub trav_stack: BitStack128,
 }
 
 impl AccelRay {
@@ -71,7 +69,6 @@ impl AccelRay {
             time: ray.time,
             flags: ray.flags,
             id: id,
-            trav_stack: BitStack128::new_with_1(),
         }
     }