psychopath/src/accel/bvh4_simd.rs
2019-06-28 22:56:51 +09:00

387 lines
14 KiB
Rust

//! This BVH4 implementation pulls a lot of ideas from the paper
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
//! by Fuetterling et al.
//!
//! Specifically, the table-based traversal order approach they
//! propose is largely followed by this implementation.
#![allow(dead_code)]
use mem_arena::MemArena;
use crate::{
bbox::BBox,
bbox4::BBox4,
boundable::Boundable,
lerp::lerp_slice,
math::Vector,
ray::{RayBatch, RayStack},
timer::Timer,
};
use super::{
bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH},
ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME,
};
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
use float4::Bool4;
pub fn ray_code(dir: Vector) -> usize {
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
ray_sign_is_neg[0] as usize
+ ((ray_sign_is_neg[1] as usize) << 1)
+ ((ray_sign_is_neg[2] as usize) << 2)
}
#[derive(Copy, Clone, Debug)]
pub struct BVH4<'a> {
root: Option<&'a BVH4Node<'a>>,
depth: usize,
node_count: usize,
_bounds: Option<&'a [BBox]>,
}
#[derive(Copy, Clone, Debug)]
pub enum BVH4Node<'a> {
Internal {
bounds: &'a [BBox4],
children: &'a [BVH4Node<'a>],
traversal_code: u8,
},
Leaf {
object_range: (usize, usize),
},
}
impl<'a> BVH4<'a> {
pub fn from_objects<'b, T, F>(
arena: &'a MemArena,
objects: &mut [T],
objects_per_leaf: usize,
bounder: F,
) -> BVH4<'a>
where
F: 'b + Fn(&T) -> &'b [BBox],
{
if objects.len() == 0 {
BVH4 {
root: None,
depth: 0,
node_count: 0,
_bounds: None,
}
} else {
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
let fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
let node_count = BVH4::construct_from_base(
arena,
&base,
&base.nodes[base.root_node_index()],
fill_node,
);
BVH4 {
root: Some(fill_node),
depth: (base.depth / 2) + 1,
node_count: node_count,
_bounds: {
let range = base.nodes[base.root_node_index()].bounds_range();
Some(arena.copy_slice(&base.bounds[range.0..range.1]))
},
}
}
}
pub fn tree_depth(&self) -> usize {
self.depth
}
pub fn traverse<T, F>(
&self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
objects: &[T],
mut obj_ray_test: F,
) where
F: FnMut(&T, &mut RayBatch, &mut RayStack),
{
if self.root.is_none() {
return;
}
let mut trav_time: f64 = 0.0;
let mut timer = Timer::new();
let traversal_table =
&TRAVERSAL_TABLE[ray_code(rays.dir_inv_local(ray_stack.next_task_ray_idx(0)))];
// +2 of max depth for root and last child
let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2];
let mut stack_ptr = 1;
while stack_ptr > 0 {
match node_stack[stack_ptr] {
&BVH4Node::Internal {
bounds,
children,
traversal_code,
} => {
let mut all_hits = Bool4::new();
// Ray testing
ray_stack.pop_do_next_task(children.len(), |ray_idx| {
if rays.is_done(ray_idx) {
([0; 4], 0)
} else {
let hits = lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
rays.orig_local(ray_idx),
rays.dir_inv_local(ray_idx),
rays.max_t(ray_idx),
);
if !hits.all_false() {
all_hits = all_hits | hits;
let mut lanes = [0u8; 4];
let mut lane_count = 0;
for i in 0..children.len() {
if hits.get_n(i) {
lanes[lane_count] = i as u8;
lane_count += 1;
}
}
(lanes, lane_count)
} else {
([0; 4], 0)
}
}
});
// If there were any intersections, create tasks.
if !all_hits.all_false() {
let order_code = traversal_table[traversal_code as usize];
let mut lanes = [0usize; 4];
let mut lane_count = 0;
for i in 0..children.len() {
let inv_i = (children.len() - 1) - i;
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
if all_hits.get_n(child_i) {
node_stack[stack_ptr + lane_count] = &children[child_i];
lanes[lane_count] = child_i;
lane_count += 1;
}
}
ray_stack.push_lanes_to_tasks(&lanes[..lane_count]);
stack_ptr += lane_count - 1;
} else {
stack_ptr -= 1;
}
}
&BVH4Node::Leaf { object_range } => {
trav_time += timer.tick() as f64;
// Set up the tasks for each object.
let obj_count = object_range.1 - object_range.0;
for _ in 0..(obj_count - 1) {
ray_stack.duplicate_next_task();
}
// Do the ray tests.
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, rays, ray_stack);
}
timer.tick();
stack_ptr -= 1;
}
}
}
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
}
fn construct_from_base(
arena: &'a MemArena,
base: &BVHBase,
node: &BVHBaseNode,
fill_node: &mut BVH4Node<'a>,
) -> usize {
let mut node_count = 0;
match node {
// Create internal node
&BVHBaseNode::Internal {
bounds_range: _,
children_indices,
split_axis,
} => {
let child_l = &base.nodes[children_indices.0];
let child_r = &base.nodes[children_indices.1];
// Prepare convenient access to the stuff we need.
let child_count: usize;
let children; // [Optional, Optional, Optional, Optional]
let split_info: SplitAxes;
match *child_l {
BVHBaseNode::Internal {
children_indices: i_l,
split_axis: s_l,
..
} => {
match *child_r {
BVHBaseNode::Internal {
children_indices: i_r,
split_axis: s_r,
..
} => {
// Four nodes
child_count = 4;
children = [
Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1]),
];
split_info = SplitAxes::Full((split_axis, s_l, s_r));
}
BVHBaseNode::Leaf { .. } => {
// Three nodes with left split
child_count = 3;
children = [
Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(child_r),
None,
];
split_info = SplitAxes::Left((split_axis, s_l));
}
}
}
BVHBaseNode::Leaf { .. } => {
match *child_r {
BVHBaseNode::Internal {
children_indices: i_r,
split_axis: s_r,
..
} => {
// Three nodes with right split
child_count = 3;
children = [
Some(child_l),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1]),
None,
];
split_info = SplitAxes::Right((split_axis, s_r));
}
BVHBaseNode::Leaf { .. } => {
// Two nodes
child_count = 2;
children = [Some(child_l), Some(child_r), None, None];
split_info = SplitAxes::TopOnly(split_axis);
}
}
}
}
node_count += child_count;
// Construct bounds
let bounds = {
let bounds_len = children
.iter()
.map(|c| {
if let &Some(n) = c {
let len = n.bounds_range().1 - n.bounds_range().0;
debug_assert!(len >= 1);
len
} else {
0
}
})
.max()
.unwrap();
debug_assert!(bounds_len >= 1);
let bounds =
unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
if bounds_len < 2 {
let b1 =
children[0].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b2 =
children[1].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b3 =
children[2].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b4 =
children[3].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
} else {
for (i, b) in bounds.iter_mut().enumerate() {
let time = i as f32 / (bounds_len - 1) as f32;
let b1 = children[0].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b2 = children[1].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b3 = children[2].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b4 = children[3].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
*b = BBox4::from_bboxes(b1, b2, b3, b4);
}
}
bounds
};
// Construct child nodes
let child_nodes = unsafe {
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
};
for (i, c) in children[0..child_count].iter().enumerate() {
node_count +=
BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
}
// Build this node
*fill_node = BVH4Node::Internal {
bounds: bounds,
children: child_nodes,
traversal_code: calc_traversal_code(split_info),
};
}
// Create internal node
&BVHBaseNode::Leaf { object_range, .. } => {
*fill_node = BVH4Node::Leaf {
object_range: object_range,
};
node_count += 1;
}
}
return node_count;
}
}
impl<'a> Boundable for BVH4<'a> {
fn bounds<'b>(&'b self) -> &'b [BBox] {
self._bounds.unwrap_or(&[])
}
}