Remove non-SIMD BVH4, and keep more bool calculations in SIMD format.
This commit is contained in:
parent
c5d23592b9
commit
b09f9684d1
|
@ -1,13 +1,17 @@
|
||||||
|
//! This BVH4 implementation is based on the ideas from the paper
|
||||||
|
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
|
||||||
|
//! by Fuetterling et al.
|
||||||
|
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
|
|
||||||
use math3d::Vector;
|
|
||||||
use mem_arena::MemArena;
|
use mem_arena::MemArena;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
bbox::BBox,
|
bbox::BBox,
|
||||||
|
bbox4::BBox4,
|
||||||
boundable::Boundable,
|
boundable::Boundable,
|
||||||
lerp::lerp_slice,
|
lerp::lerp_slice,
|
||||||
|
math::Vector,
|
||||||
ray::{RayBatch, RayStack},
|
ray::{RayBatch, RayStack},
|
||||||
timer::Timer,
|
timer::Timer,
|
||||||
};
|
};
|
||||||
|
@ -17,6 +21,9 @@ use super::{
|
||||||
ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME,
|
ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
|
||||||
|
use float4::Bool4;
|
||||||
|
|
||||||
pub fn ray_code(dir: Vector) -> usize {
|
pub fn ray_code(dir: Vector) -> usize {
|
||||||
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
|
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
|
||||||
ray_sign_is_neg[0] as usize
|
ray_sign_is_neg[0] as usize
|
||||||
|
@ -28,20 +35,19 @@ pub fn ray_code(dir: Vector) -> usize {
|
||||||
pub struct BVH4<'a> {
|
pub struct BVH4<'a> {
|
||||||
root: Option<&'a BVH4Node<'a>>,
|
root: Option<&'a BVH4Node<'a>>,
|
||||||
depth: usize,
|
depth: usize,
|
||||||
|
node_count: usize,
|
||||||
|
_bounds: Option<&'a [BBox]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub enum BVH4Node<'a> {
|
pub enum BVH4Node<'a> {
|
||||||
Inner {
|
Internal {
|
||||||
traversal_code: u8,
|
bounds: &'a [BBox4],
|
||||||
bounds_start: &'a BBox,
|
|
||||||
bounds_len: u16,
|
|
||||||
children: &'a [BVH4Node<'a>],
|
children: &'a [BVH4Node<'a>],
|
||||||
|
traversal_code: u8,
|
||||||
},
|
},
|
||||||
|
|
||||||
Leaf {
|
Leaf {
|
||||||
bounds_start: &'a BBox,
|
|
||||||
bounds_len: u16,
|
|
||||||
object_range: (usize, usize),
|
object_range: (usize, usize),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -56,19 +62,32 @@ impl<'a> BVH4<'a> {
|
||||||
where
|
where
|
||||||
F: 'b + Fn(&T) -> &'b [BBox],
|
F: 'b + Fn(&T) -> &'b [BBox],
|
||||||
{
|
{
|
||||||
if objects.is_empty() {
|
if objects.len() == 0 {
|
||||||
BVH4 {
|
BVH4 {
|
||||||
root: None,
|
root: None,
|
||||||
depth: 0,
|
depth: 0,
|
||||||
|
node_count: 0,
|
||||||
|
_bounds: None,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
|
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
|
||||||
|
|
||||||
let root = unsafe { arena.alloc_uninitialized::<BVH4Node>() };
|
let fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
|
||||||
BVH4::construct_from_base(arena, &base, base.root_node_index(), root);
|
let node_count = BVH4::construct_from_base(
|
||||||
|
arena,
|
||||||
|
&base,
|
||||||
|
&base.nodes[base.root_node_index()],
|
||||||
|
fill_node,
|
||||||
|
);
|
||||||
|
|
||||||
BVH4 {
|
BVH4 {
|
||||||
root: Some(root),
|
root: Some(fill_node),
|
||||||
depth: base.depth,
|
depth: (base.depth / 2) + 1,
|
||||||
|
node_count: node_count,
|
||||||
|
_bounds: {
|
||||||
|
let range = base.nodes[base.root_node_index()].bounds_range();
|
||||||
|
Some(arena.copy_slice(&base.bounds[range.0..range.1]))
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -103,118 +122,64 @@ impl<'a> BVH4<'a> {
|
||||||
|
|
||||||
while stack_ptr > 0 {
|
while stack_ptr > 0 {
|
||||||
node_tests += ray_stack.ray_count_in_next_task() as u64;
|
node_tests += ray_stack.ray_count_in_next_task() as u64;
|
||||||
match *node_stack[stack_ptr] {
|
match node_stack[stack_ptr] {
|
||||||
BVH4Node::Inner {
|
&BVH4Node::Internal {
|
||||||
traversal_code,
|
bounds,
|
||||||
bounds_start,
|
|
||||||
bounds_len,
|
|
||||||
children,
|
children,
|
||||||
|
traversal_code,
|
||||||
} => {
|
} => {
|
||||||
// Test rays against bbox.
|
let mut all_hits = Bool4::new_false();
|
||||||
let bounds =
|
|
||||||
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) };
|
|
||||||
|
|
||||||
let mut hit_count = 0;
|
// Ray testing
|
||||||
ray_stack.pop_do_next_task(children.len(), |ray_idx| {
|
ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
|
||||||
let hit = (!rays.is_done(ray_idx))
|
if rays.is_done(ray_idx) {
|
||||||
&& lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
(Bool4::new_false(), 0)
|
||||||
|
} else {
|
||||||
|
let hits = lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
||||||
rays.orig_local(ray_idx),
|
rays.orig_local(ray_idx),
|
||||||
rays.dir_inv_local(ray_idx),
|
rays.dir_inv_local(ray_idx),
|
||||||
rays.max_t(ray_idx),
|
rays.max_t(ray_idx),
|
||||||
);
|
);
|
||||||
|
all_hits = all_hits | hits;
|
||||||
if hit {
|
(hits, children.len())
|
||||||
hit_count += 1;
|
|
||||||
([0, 1, 2, 3], children.len())
|
|
||||||
} else {
|
|
||||||
([0; 4], 0)
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// If there were any intersections, create tasks.
|
// If there were any intersections, create tasks.
|
||||||
if hit_count > 0 {
|
if !all_hits.is_all_false() {
|
||||||
let order_code = traversal_table[traversal_code as usize];
|
let order_code = traversal_table[traversal_code as usize];
|
||||||
match children.len() {
|
let mut lanes = [0usize; 4];
|
||||||
4 => {
|
let mut lane_count = 0;
|
||||||
let i4 = ((order_code >> 6) & 0b11) as usize;
|
for i in 0..children.len() {
|
||||||
let i3 = ((order_code >> 4) & 0b11) as usize;
|
let inv_i = (children.len() - 1) - i;
|
||||||
let i2 = ((order_code >> 2) & 0b11) as usize;
|
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
|
||||||
let i1 = (order_code & 0b11) as usize;
|
if all_hits.get_n(child_i) {
|
||||||
|
node_stack[stack_ptr + lane_count] = &children[child_i];
|
||||||
ray_stack.push_lanes_to_tasks(&[i4, i3, i2, i1]);
|
lanes[lane_count] = child_i;
|
||||||
|
lane_count += 1;
|
||||||
node_stack[stack_ptr] = &children[i4];
|
|
||||||
node_stack[stack_ptr + 1] = &children[i3];
|
|
||||||
node_stack[stack_ptr + 2] = &children[i2];
|
|
||||||
node_stack[stack_ptr + 3] = &children[i1];
|
|
||||||
|
|
||||||
stack_ptr += 3;
|
|
||||||
}
|
}
|
||||||
3 => {
|
|
||||||
let i3 = ((order_code >> 4) & 0b11) as usize;
|
|
||||||
let i2 = ((order_code >> 2) & 0b11) as usize;
|
|
||||||
let i1 = (order_code & 0b11) as usize;
|
|
||||||
|
|
||||||
ray_stack.push_lanes_to_tasks(&[i3, i2, i1]);
|
|
||||||
|
|
||||||
node_stack[stack_ptr] = &children[i3];
|
|
||||||
node_stack[stack_ptr + 1] = &children[i2];
|
|
||||||
node_stack[stack_ptr + 2] = &children[i1];
|
|
||||||
|
|
||||||
stack_ptr += 2;
|
|
||||||
}
|
}
|
||||||
2 => {
|
|
||||||
let i2 = ((order_code >> 2) & 0b11) as usize;
|
|
||||||
let i1 = (order_code & 0b11) as usize;
|
|
||||||
|
|
||||||
ray_stack.push_lanes_to_tasks(&[i2, i1]);
|
ray_stack.push_lanes_to_tasks(&lanes[..lane_count]);
|
||||||
|
stack_ptr += lane_count - 1;
|
||||||
node_stack[stack_ptr] = &children[i2];
|
|
||||||
node_stack[stack_ptr + 1] = &children[i1];
|
|
||||||
|
|
||||||
stack_ptr += 1;
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
stack_ptr -= 1;
|
stack_ptr -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BVH4Node::Leaf {
|
&BVH4Node::Leaf { object_range } => {
|
||||||
object_range,
|
|
||||||
bounds_start,
|
|
||||||
bounds_len,
|
|
||||||
} => {
|
|
||||||
// Test rays against bounds.
|
|
||||||
let bounds =
|
|
||||||
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) };
|
|
||||||
let object_count = object_range.1 - object_range.0;
|
|
||||||
let mut hit_count = 0;
|
|
||||||
|
|
||||||
ray_stack.pop_do_next_task(object_count, |ray_idx| {
|
|
||||||
let hit = (!rays.is_done(ray_idx))
|
|
||||||
&& lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
|
||||||
rays.orig_local(ray_idx),
|
|
||||||
rays.dir_inv_local(ray_idx),
|
|
||||||
rays.max_t(ray_idx),
|
|
||||||
);
|
|
||||||
if hit {
|
|
||||||
hit_count += 1;
|
|
||||||
([0, 1, 2, 3], object_count)
|
|
||||||
} else {
|
|
||||||
([0; 4], 0)
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
trav_time += timer.tick() as f64;
|
trav_time += timer.tick() as f64;
|
||||||
|
|
||||||
if hit_count > 0 {
|
// Set up the tasks for each object.
|
||||||
ray_stack.push_lanes_to_tasks(&[0, 1, 2, 3, 4, 5, 6, 7][..object_count]);
|
let obj_count = object_range.1 - object_range.0;
|
||||||
|
for _ in 0..(obj_count - 1) {
|
||||||
|
ray_stack.duplicate_next_task();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the ray tests.
|
||||||
for obj in &objects[object_range.0..object_range.1] {
|
for obj in &objects[object_range.0..object_range.1] {
|
||||||
obj_ray_test(obj, rays, ray_stack);
|
obj_ray_test(obj, rays, ray_stack);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
timer.tick();
|
timer.tick();
|
||||||
|
|
||||||
|
@ -237,12 +202,15 @@ impl<'a> BVH4<'a> {
|
||||||
fn construct_from_base(
|
fn construct_from_base(
|
||||||
arena: &'a MemArena,
|
arena: &'a MemArena,
|
||||||
base: &BVHBase,
|
base: &BVHBase,
|
||||||
node_index: usize,
|
node: &BVHBaseNode,
|
||||||
node_mem: &mut BVH4Node<'a>,
|
fill_node: &mut BVH4Node<'a>,
|
||||||
) {
|
) -> usize {
|
||||||
match base.nodes[node_index] {
|
let mut node_count = 0;
|
||||||
BVHBaseNode::Internal {
|
|
||||||
bounds_range,
|
match node {
|
||||||
|
// Create internal node
|
||||||
|
&BVHBaseNode::Internal {
|
||||||
|
bounds_range: _,
|
||||||
children_indices,
|
children_indices,
|
||||||
split_axis,
|
split_axis,
|
||||||
} => {
|
} => {
|
||||||
|
@ -251,7 +219,7 @@ impl<'a> BVH4<'a> {
|
||||||
|
|
||||||
// Prepare convenient access to the stuff we need.
|
// Prepare convenient access to the stuff we need.
|
||||||
let child_count: usize;
|
let child_count: usize;
|
||||||
let child_indices: [usize; 4];
|
let children; // [Optional, Optional, Optional, Optional]
|
||||||
let split_info: SplitAxes;
|
let split_info: SplitAxes;
|
||||||
match *child_l {
|
match *child_l {
|
||||||
BVHBaseNode::Internal {
|
BVHBaseNode::Internal {
|
||||||
|
@ -267,13 +235,23 @@ impl<'a> BVH4<'a> {
|
||||||
} => {
|
} => {
|
||||||
// Four nodes
|
// Four nodes
|
||||||
child_count = 4;
|
child_count = 4;
|
||||||
child_indices = [i_l.0, i_l.1, i_r.0, i_r.1];
|
children = [
|
||||||
|
Some(&base.nodes[i_l.0]),
|
||||||
|
Some(&base.nodes[i_l.1]),
|
||||||
|
Some(&base.nodes[i_r.0]),
|
||||||
|
Some(&base.nodes[i_r.1]),
|
||||||
|
];
|
||||||
split_info = SplitAxes::Full((split_axis, s_l, s_r));
|
split_info = SplitAxes::Full((split_axis, s_l, s_r));
|
||||||
}
|
}
|
||||||
BVHBaseNode::Leaf { .. } => {
|
BVHBaseNode::Leaf { .. } => {
|
||||||
// Three nodes with left split
|
// Three nodes with left split
|
||||||
child_count = 3;
|
child_count = 3;
|
||||||
child_indices = [i_l.0, i_l.1, children_indices.1, 0];
|
children = [
|
||||||
|
Some(&base.nodes[i_l.0]),
|
||||||
|
Some(&base.nodes[i_l.1]),
|
||||||
|
Some(child_r),
|
||||||
|
None,
|
||||||
|
];
|
||||||
split_info = SplitAxes::Left((split_axis, s_l));
|
split_info = SplitAxes::Left((split_axis, s_l));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -287,76 +265,112 @@ impl<'a> BVH4<'a> {
|
||||||
} => {
|
} => {
|
||||||
// Three nodes with right split
|
// Three nodes with right split
|
||||||
child_count = 3;
|
child_count = 3;
|
||||||
child_indices = [children_indices.0, i_r.0, i_r.1, 0];
|
children = [
|
||||||
|
Some(child_l),
|
||||||
|
Some(&base.nodes[i_r.0]),
|
||||||
|
Some(&base.nodes[i_r.1]),
|
||||||
|
None,
|
||||||
|
];
|
||||||
split_info = SplitAxes::Right((split_axis, s_r));
|
split_info = SplitAxes::Right((split_axis, s_r));
|
||||||
}
|
}
|
||||||
BVHBaseNode::Leaf { .. } => {
|
BVHBaseNode::Leaf { .. } => {
|
||||||
// Two nodes
|
// Two nodes
|
||||||
child_count = 2;
|
child_count = 2;
|
||||||
child_indices = [children_indices.0, children_indices.1, 0, 0];
|
children = [Some(child_l), Some(child_r), None, None];
|
||||||
split_info = SplitAxes::TopOnly(split_axis);
|
split_info = SplitAxes::TopOnly(split_axis);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy bounds
|
node_count += child_count;
|
||||||
let bounds = arena
|
|
||||||
.copy_slice_with_alignment(&base.bounds[bounds_range.0..bounds_range.1], 32);
|
|
||||||
|
|
||||||
// Build children
|
// Construct bounds
|
||||||
let children_mem = unsafe {
|
let bounds = {
|
||||||
|
let bounds_len = children
|
||||||
|
.iter()
|
||||||
|
.map(|c| {
|
||||||
|
if let &Some(n) = c {
|
||||||
|
let len = n.bounds_range().1 - n.bounds_range().0;
|
||||||
|
debug_assert!(len >= 1);
|
||||||
|
len
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.max()
|
||||||
|
.unwrap();
|
||||||
|
debug_assert!(bounds_len >= 1);
|
||||||
|
let bounds =
|
||||||
|
unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
|
||||||
|
if bounds_len < 2 {
|
||||||
|
let b1 =
|
||||||
|
children[0].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
||||||
|
let b2 =
|
||||||
|
children[1].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
||||||
|
let b3 =
|
||||||
|
children[2].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
||||||
|
let b4 =
|
||||||
|
children[3].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
||||||
|
bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
|
||||||
|
} else {
|
||||||
|
for (i, b) in bounds.iter_mut().enumerate() {
|
||||||
|
let time = i as f32 / (bounds_len - 1) as f32;
|
||||||
|
|
||||||
|
let b1 = children[0].map_or(BBox::new(), |c| {
|
||||||
|
let (x, y) = c.bounds_range();
|
||||||
|
lerp_slice(&base.bounds[x..y], time)
|
||||||
|
});
|
||||||
|
let b2 = children[1].map_or(BBox::new(), |c| {
|
||||||
|
let (x, y) = c.bounds_range();
|
||||||
|
lerp_slice(&base.bounds[x..y], time)
|
||||||
|
});
|
||||||
|
let b3 = children[2].map_or(BBox::new(), |c| {
|
||||||
|
let (x, y) = c.bounds_range();
|
||||||
|
lerp_slice(&base.bounds[x..y], time)
|
||||||
|
});
|
||||||
|
let b4 = children[3].map_or(BBox::new(), |c| {
|
||||||
|
let (x, y) = c.bounds_range();
|
||||||
|
lerp_slice(&base.bounds[x..y], time)
|
||||||
|
});
|
||||||
|
*b = BBox4::from_bboxes(b1, b2, b3, b4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bounds
|
||||||
|
};
|
||||||
|
|
||||||
|
// Construct child nodes
|
||||||
|
let child_nodes = unsafe {
|
||||||
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
|
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
|
||||||
};
|
};
|
||||||
for i in 0..child_count {
|
for (i, c) in children[0..child_count].iter().enumerate() {
|
||||||
BVH4::construct_from_base(arena, base, child_indices[i], &mut children_mem[i]);
|
node_count +=
|
||||||
|
BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill in node
|
// Build this node
|
||||||
*node_mem = BVH4Node::Inner {
|
*fill_node = BVH4Node::Internal {
|
||||||
|
bounds: bounds,
|
||||||
|
children: child_nodes,
|
||||||
traversal_code: calc_traversal_code(split_info),
|
traversal_code: calc_traversal_code(split_info),
|
||||||
bounds_start: &bounds[0],
|
|
||||||
bounds_len: bounds.len() as u16,
|
|
||||||
children: children_mem,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
BVHBaseNode::Leaf {
|
// Create internal node
|
||||||
bounds_range,
|
&BVHBaseNode::Leaf { object_range, .. } => {
|
||||||
object_range,
|
*fill_node = BVH4Node::Leaf {
|
||||||
} => {
|
|
||||||
let bounds = arena.copy_slice(&base.bounds[bounds_range.0..bounds_range.1]);
|
|
||||||
|
|
||||||
*node_mem = BVH4Node::Leaf {
|
|
||||||
bounds_start: &bounds[0],
|
|
||||||
bounds_len: bounds.len() as u16,
|
|
||||||
object_range: object_range,
|
object_range: object_range,
|
||||||
};
|
};
|
||||||
}
|
node_count += 1;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
return node_count;
|
||||||
static ref DEGENERATE_BOUNDS: [BBox; 1] = [BBox::new()];
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Boundable for BVH4<'a> {
|
impl<'a> Boundable for BVH4<'a> {
|
||||||
fn bounds(&self) -> &[BBox] {
|
fn bounds<'b>(&'b self) -> &'b [BBox] {
|
||||||
match self.root {
|
self._bounds.unwrap_or(&[])
|
||||||
None => &DEGENERATE_BOUNDS[..],
|
|
||||||
Some(root) => match *root {
|
|
||||||
BVH4Node::Inner {
|
|
||||||
bounds_start,
|
|
||||||
bounds_len,
|
|
||||||
..
|
|
||||||
}
|
|
||||||
| BVH4Node::Leaf {
|
|
||||||
bounds_start,
|
|
||||||
bounds_len,
|
|
||||||
..
|
|
||||||
} => unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) },
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,386 +0,0 @@
|
||||||
//! This BVH4 implementation pulls a lot of ideas from the paper
|
|
||||||
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
|
|
||||||
//! by Fuetterling et al.
|
|
||||||
//!
|
|
||||||
//! Specifically, the table-based traversal order approach they
|
|
||||||
//! propose is largely followed by this implementation.
|
|
||||||
|
|
||||||
#![allow(dead_code)]
|
|
||||||
|
|
||||||
use mem_arena::MemArena;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
bbox::BBox,
|
|
||||||
bbox4::BBox4,
|
|
||||||
boundable::Boundable,
|
|
||||||
lerp::lerp_slice,
|
|
||||||
math::Vector,
|
|
||||||
ray::{RayBatch, RayStack},
|
|
||||||
timer::Timer,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH},
|
|
||||||
ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME,
|
|
||||||
};
|
|
||||||
|
|
||||||
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
|
|
||||||
use float4::Bool4;
|
|
||||||
|
|
||||||
pub fn ray_code(dir: Vector) -> usize {
|
|
||||||
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
|
|
||||||
ray_sign_is_neg[0] as usize
|
|
||||||
+ ((ray_sign_is_neg[1] as usize) << 1)
|
|
||||||
+ ((ray_sign_is_neg[2] as usize) << 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
|
||||||
pub struct BVH4<'a> {
|
|
||||||
root: Option<&'a BVH4Node<'a>>,
|
|
||||||
depth: usize,
|
|
||||||
node_count: usize,
|
|
||||||
_bounds: Option<&'a [BBox]>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
|
||||||
pub enum BVH4Node<'a> {
|
|
||||||
Internal {
|
|
||||||
bounds: &'a [BBox4],
|
|
||||||
children: &'a [BVH4Node<'a>],
|
|
||||||
traversal_code: u8,
|
|
||||||
},
|
|
||||||
|
|
||||||
Leaf {
|
|
||||||
object_range: (usize, usize),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> BVH4<'a> {
|
|
||||||
pub fn from_objects<'b, T, F>(
|
|
||||||
arena: &'a MemArena,
|
|
||||||
objects: &mut [T],
|
|
||||||
objects_per_leaf: usize,
|
|
||||||
bounder: F,
|
|
||||||
) -> BVH4<'a>
|
|
||||||
where
|
|
||||||
F: 'b + Fn(&T) -> &'b [BBox],
|
|
||||||
{
|
|
||||||
if objects.len() == 0 {
|
|
||||||
BVH4 {
|
|
||||||
root: None,
|
|
||||||
depth: 0,
|
|
||||||
node_count: 0,
|
|
||||||
_bounds: None,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
|
|
||||||
|
|
||||||
let fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
|
|
||||||
let node_count = BVH4::construct_from_base(
|
|
||||||
arena,
|
|
||||||
&base,
|
|
||||||
&base.nodes[base.root_node_index()],
|
|
||||||
fill_node,
|
|
||||||
);
|
|
||||||
|
|
||||||
BVH4 {
|
|
||||||
root: Some(fill_node),
|
|
||||||
depth: (base.depth / 2) + 1,
|
|
||||||
node_count: node_count,
|
|
||||||
_bounds: {
|
|
||||||
let range = base.nodes[base.root_node_index()].bounds_range();
|
|
||||||
Some(arena.copy_slice(&base.bounds[range.0..range.1]))
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tree_depth(&self) -> usize {
|
|
||||||
self.depth
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn traverse<T, F>(
|
|
||||||
&self,
|
|
||||||
rays: &mut RayBatch,
|
|
||||||
ray_stack: &mut RayStack,
|
|
||||||
objects: &[T],
|
|
||||||
mut obj_ray_test: F,
|
|
||||||
) where
|
|
||||||
F: FnMut(&T, &mut RayBatch, &mut RayStack),
|
|
||||||
{
|
|
||||||
if self.root.is_none() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut trav_time: f64 = 0.0;
|
|
||||||
let mut timer = Timer::new();
|
|
||||||
|
|
||||||
let traversal_table =
|
|
||||||
&TRAVERSAL_TABLE[ray_code(rays.dir_inv_local(ray_stack.next_task_ray_idx(0)))];
|
|
||||||
|
|
||||||
// +2 of max depth for root and last child
|
|
||||||
let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2];
|
|
||||||
let mut stack_ptr = 1;
|
|
||||||
|
|
||||||
while stack_ptr > 0 {
|
|
||||||
match node_stack[stack_ptr] {
|
|
||||||
&BVH4Node::Internal {
|
|
||||||
bounds,
|
|
||||||
children,
|
|
||||||
traversal_code,
|
|
||||||
} => {
|
|
||||||
let mut all_hits = Bool4::new();
|
|
||||||
|
|
||||||
// Ray testing
|
|
||||||
ray_stack.pop_do_next_task(children.len(), |ray_idx| {
|
|
||||||
if rays.is_done(ray_idx) {
|
|
||||||
([0; 4], 0)
|
|
||||||
} else {
|
|
||||||
let hits = lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
|
||||||
rays.orig_local(ray_idx),
|
|
||||||
rays.dir_inv_local(ray_idx),
|
|
||||||
rays.max_t(ray_idx),
|
|
||||||
);
|
|
||||||
|
|
||||||
if !hits.all_false() {
|
|
||||||
all_hits = all_hits | hits;
|
|
||||||
let mut lanes = [0u8; 4];
|
|
||||||
let mut lane_count = 0;
|
|
||||||
for i in 0..children.len() {
|
|
||||||
if hits.get_n(i) {
|
|
||||||
lanes[lane_count] = i as u8;
|
|
||||||
lane_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(lanes, lane_count)
|
|
||||||
} else {
|
|
||||||
([0; 4], 0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// If there were any intersections, create tasks.
|
|
||||||
if !all_hits.all_false() {
|
|
||||||
let order_code = traversal_table[traversal_code as usize];
|
|
||||||
let mut lanes = [0usize; 4];
|
|
||||||
let mut lane_count = 0;
|
|
||||||
for i in 0..children.len() {
|
|
||||||
let inv_i = (children.len() - 1) - i;
|
|
||||||
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
|
|
||||||
if all_hits.get_n(child_i) {
|
|
||||||
node_stack[stack_ptr + lane_count] = &children[child_i];
|
|
||||||
lanes[lane_count] = child_i;
|
|
||||||
lane_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ray_stack.push_lanes_to_tasks(&lanes[..lane_count]);
|
|
||||||
stack_ptr += lane_count - 1;
|
|
||||||
} else {
|
|
||||||
stack_ptr -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
&BVH4Node::Leaf { object_range } => {
|
|
||||||
trav_time += timer.tick() as f64;
|
|
||||||
|
|
||||||
// Set up the tasks for each object.
|
|
||||||
let obj_count = object_range.1 - object_range.0;
|
|
||||||
for _ in 0..(obj_count - 1) {
|
|
||||||
ray_stack.duplicate_next_task();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do the ray tests.
|
|
||||||
for obj in &objects[object_range.0..object_range.1] {
|
|
||||||
obj_ray_test(obj, rays, ray_stack);
|
|
||||||
}
|
|
||||||
|
|
||||||
timer.tick();
|
|
||||||
|
|
||||||
stack_ptr -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trav_time += timer.tick() as f64;
|
|
||||||
ACCEL_TRAV_TIME.with(|att| {
|
|
||||||
let v = att.get();
|
|
||||||
att.set(v + trav_time);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
fn construct_from_base(
|
|
||||||
arena: &'a MemArena,
|
|
||||||
base: &BVHBase,
|
|
||||||
node: &BVHBaseNode,
|
|
||||||
fill_node: &mut BVH4Node<'a>,
|
|
||||||
) -> usize {
|
|
||||||
let mut node_count = 0;
|
|
||||||
|
|
||||||
match node {
|
|
||||||
// Create internal node
|
|
||||||
&BVHBaseNode::Internal {
|
|
||||||
bounds_range: _,
|
|
||||||
children_indices,
|
|
||||||
split_axis,
|
|
||||||
} => {
|
|
||||||
let child_l = &base.nodes[children_indices.0];
|
|
||||||
let child_r = &base.nodes[children_indices.1];
|
|
||||||
|
|
||||||
// Prepare convenient access to the stuff we need.
|
|
||||||
let child_count: usize;
|
|
||||||
let children; // [Optional, Optional, Optional, Optional]
|
|
||||||
let split_info: SplitAxes;
|
|
||||||
match *child_l {
|
|
||||||
BVHBaseNode::Internal {
|
|
||||||
children_indices: i_l,
|
|
||||||
split_axis: s_l,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
match *child_r {
|
|
||||||
BVHBaseNode::Internal {
|
|
||||||
children_indices: i_r,
|
|
||||||
split_axis: s_r,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
// Four nodes
|
|
||||||
child_count = 4;
|
|
||||||
children = [
|
|
||||||
Some(&base.nodes[i_l.0]),
|
|
||||||
Some(&base.nodes[i_l.1]),
|
|
||||||
Some(&base.nodes[i_r.0]),
|
|
||||||
Some(&base.nodes[i_r.1]),
|
|
||||||
];
|
|
||||||
split_info = SplitAxes::Full((split_axis, s_l, s_r));
|
|
||||||
}
|
|
||||||
BVHBaseNode::Leaf { .. } => {
|
|
||||||
// Three nodes with left split
|
|
||||||
child_count = 3;
|
|
||||||
children = [
|
|
||||||
Some(&base.nodes[i_l.0]),
|
|
||||||
Some(&base.nodes[i_l.1]),
|
|
||||||
Some(child_r),
|
|
||||||
None,
|
|
||||||
];
|
|
||||||
split_info = SplitAxes::Left((split_axis, s_l));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BVHBaseNode::Leaf { .. } => {
|
|
||||||
match *child_r {
|
|
||||||
BVHBaseNode::Internal {
|
|
||||||
children_indices: i_r,
|
|
||||||
split_axis: s_r,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
// Three nodes with right split
|
|
||||||
child_count = 3;
|
|
||||||
children = [
|
|
||||||
Some(child_l),
|
|
||||||
Some(&base.nodes[i_r.0]),
|
|
||||||
Some(&base.nodes[i_r.1]),
|
|
||||||
None,
|
|
||||||
];
|
|
||||||
split_info = SplitAxes::Right((split_axis, s_r));
|
|
||||||
}
|
|
||||||
BVHBaseNode::Leaf { .. } => {
|
|
||||||
// Two nodes
|
|
||||||
child_count = 2;
|
|
||||||
children = [Some(child_l), Some(child_r), None, None];
|
|
||||||
split_info = SplitAxes::TopOnly(split_axis);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
node_count += child_count;
|
|
||||||
|
|
||||||
// Construct bounds
|
|
||||||
let bounds = {
|
|
||||||
let bounds_len = children
|
|
||||||
.iter()
|
|
||||||
.map(|c| {
|
|
||||||
if let &Some(n) = c {
|
|
||||||
let len = n.bounds_range().1 - n.bounds_range().0;
|
|
||||||
debug_assert!(len >= 1);
|
|
||||||
len
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.max()
|
|
||||||
.unwrap();
|
|
||||||
debug_assert!(bounds_len >= 1);
|
|
||||||
let bounds =
|
|
||||||
unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
|
|
||||||
if bounds_len < 2 {
|
|
||||||
let b1 =
|
|
||||||
children[0].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
|
||||||
let b2 =
|
|
||||||
children[1].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
|
||||||
let b3 =
|
|
||||||
children[2].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
|
||||||
let b4 =
|
|
||||||
children[3].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
|
|
||||||
bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
|
|
||||||
} else {
|
|
||||||
for (i, b) in bounds.iter_mut().enumerate() {
|
|
||||||
let time = i as f32 / (bounds_len - 1) as f32;
|
|
||||||
|
|
||||||
let b1 = children[0].map_or(BBox::new(), |c| {
|
|
||||||
let (x, y) = c.bounds_range();
|
|
||||||
lerp_slice(&base.bounds[x..y], time)
|
|
||||||
});
|
|
||||||
let b2 = children[1].map_or(BBox::new(), |c| {
|
|
||||||
let (x, y) = c.bounds_range();
|
|
||||||
lerp_slice(&base.bounds[x..y], time)
|
|
||||||
});
|
|
||||||
let b3 = children[2].map_or(BBox::new(), |c| {
|
|
||||||
let (x, y) = c.bounds_range();
|
|
||||||
lerp_slice(&base.bounds[x..y], time)
|
|
||||||
});
|
|
||||||
let b4 = children[3].map_or(BBox::new(), |c| {
|
|
||||||
let (x, y) = c.bounds_range();
|
|
||||||
lerp_slice(&base.bounds[x..y], time)
|
|
||||||
});
|
|
||||||
*b = BBox4::from_bboxes(b1, b2, b3, b4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bounds
|
|
||||||
};
|
|
||||||
|
|
||||||
// Construct child nodes
|
|
||||||
let child_nodes = unsafe {
|
|
||||||
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
|
|
||||||
};
|
|
||||||
for (i, c) in children[0..child_count].iter().enumerate() {
|
|
||||||
node_count +=
|
|
||||||
BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build this node
|
|
||||||
*fill_node = BVH4Node::Internal {
|
|
||||||
bounds: bounds,
|
|
||||||
children: child_nodes,
|
|
||||||
traversal_code: calc_traversal_code(split_info),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create internal node
|
|
||||||
&BVHBaseNode::Leaf { object_range, .. } => {
|
|
||||||
*fill_node = BVH4Node::Leaf {
|
|
||||||
object_range: object_range,
|
|
||||||
};
|
|
||||||
node_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return node_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Boundable for BVH4<'a> {
|
|
||||||
fn bounds<'b>(&'b self) -> &'b [BBox] {
|
|
||||||
self._bounds.unwrap_or(&[])
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,6 +1,5 @@
|
||||||
// mod bvh;
|
// mod bvh;
|
||||||
mod bvh4;
|
mod bvh4;
|
||||||
mod bvh4_simd;
|
|
||||||
mod bvh_base;
|
mod bvh_base;
|
||||||
mod light_array;
|
mod light_array;
|
||||||
mod light_tree;
|
mod light_tree;
|
||||||
|
@ -15,7 +14,7 @@ use crate::{
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{
|
||||||
// bvh::{BVHNode, BVH},
|
// bvh::{BVHNode, BVH},
|
||||||
bvh4_simd::{ray_code, BVH4Node, BVH4},
|
bvh4::{ray_code, BVH4Node, BVH4},
|
||||||
light_array::LightArray,
|
light_array::LightArray,
|
||||||
light_tree::LightTree,
|
light_tree::LightTree,
|
||||||
};
|
};
|
||||||
|
|
|
@ -265,7 +265,7 @@ impl<'a> Surface for RectangleLight<'a> {
|
||||||
) {
|
) {
|
||||||
let _ = shader; // Silence 'unused' warning
|
let _ = shader; // Silence 'unused' warning
|
||||||
|
|
||||||
ray_stack.pop_do_next_task(0, |ray_idx| {
|
ray_stack.pop_do_next_task(|ray_idx| {
|
||||||
let time = rays.time(ray_idx);
|
let time = rays.time(ray_idx);
|
||||||
let orig = rays.orig(ray_idx);
|
let orig = rays.orig(ray_idx);
|
||||||
let dir = rays.dir(ray_idx);
|
let dir = rays.dir(ray_idx);
|
||||||
|
@ -332,8 +332,6 @@ impl<'a> Surface for RectangleLight<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
([0; 4], 0)
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -214,7 +214,7 @@ impl<'a> Surface for SphereLight<'a> {
|
||||||
) {
|
) {
|
||||||
let _ = shader; // Silence 'unused' warning
|
let _ = shader; // Silence 'unused' warning
|
||||||
|
|
||||||
ray_stack.pop_do_next_task(0, |ray_idx| {
|
ray_stack.pop_do_next_task(|ray_idx| {
|
||||||
let time = rays.time(ray_idx);
|
let time = rays.time(ray_idx);
|
||||||
|
|
||||||
// Get the transform space
|
// Get the transform space
|
||||||
|
@ -242,7 +242,7 @@ impl<'a> Surface for SphereLight<'a> {
|
||||||
let discriminant = (b * b) - (4.0 * a * c);
|
let discriminant = (b * b) - (4.0 * a * c);
|
||||||
if discriminant < 0.0 {
|
if discriminant < 0.0 {
|
||||||
// Discriminant less than zero? No solution => no intersection.
|
// Discriminant less than zero? No solution => no intersection.
|
||||||
return ([0; 4], 0);
|
return;
|
||||||
}
|
}
|
||||||
let discriminant = discriminant.sqrt();
|
let discriminant = discriminant.sqrt();
|
||||||
|
|
||||||
|
@ -268,7 +268,7 @@ impl<'a> Surface for SphereLight<'a> {
|
||||||
// Check our intersection for validity against this ray's extents
|
// Check our intersection for validity against this ray's extents
|
||||||
if t0 > rays.max_t(ray_idx) || t1 <= 0.0 {
|
if t0 > rays.max_t(ray_idx) || t1 <= 0.0 {
|
||||||
// Didn't hit because sphere is entirely outside of ray's extents
|
// Didn't hit because sphere is entirely outside of ray's extents
|
||||||
return ([0; 4], 0);
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let t = if t0 > 0.0 {
|
let t = if t0 > 0.0 {
|
||||||
|
@ -278,7 +278,7 @@ impl<'a> Surface for SphereLight<'a> {
|
||||||
} else {
|
} else {
|
||||||
// Didn't hit because ray is entirely within the sphere, and
|
// Didn't hit because ray is entirely within the sphere, and
|
||||||
// therefore doesn't hit its surface.
|
// therefore doesn't hit its surface.
|
||||||
return ([0; 4], 0);
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
// We hit the sphere, so calculate intersection info.
|
// We hit the sphere, so calculate intersection info.
|
||||||
|
@ -334,8 +334,6 @@ impl<'a> Surface for SphereLight<'a> {
|
||||||
// Set ray's max t
|
// Set ray's max t
|
||||||
rays.set_max_t(ray_idx, t);
|
rays.set_max_t(ray_idx, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
([0; 4], 0)
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
38
src/ray.rs
38
src/ray.rs
|
@ -1,6 +1,6 @@
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use float4::Float4;
|
use float4::{Bool4, Float4};
|
||||||
|
|
||||||
use crate::math::{Matrix4x4, Point, Vector};
|
use crate::math::{Matrix4x4, Point, Vector};
|
||||||
|
|
||||||
|
@ -293,11 +293,31 @@ impl RayStack {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pops the next task off the stack, and executes the provided closure for
|
/// Pops the next task off the stack, and executes the provided closure for
|
||||||
/// each ray index in the task. The return value of the closure is the list
|
/// each ray index in the task.
|
||||||
/// of lanes (by index) to add the given ray index back into.
|
pub fn pop_do_next_task<F>(&mut self, mut handle_ray: F)
|
||||||
pub fn pop_do_next_task<F>(&mut self, needed_lanes: usize, mut handle_ray: F)
|
|
||||||
where
|
where
|
||||||
F: FnMut(usize) -> ([u8; 4], usize),
|
F: FnMut(usize),
|
||||||
|
{
|
||||||
|
// Pop the task and do necessary bookkeeping.
|
||||||
|
let task = self.tasks.pop().unwrap();
|
||||||
|
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
|
||||||
|
self.lanes[task.lane].end_len = task.start_idx;
|
||||||
|
|
||||||
|
// Execute task.
|
||||||
|
for i in task_range.0..task_range.1 {
|
||||||
|
let ray_idx = self.lanes[task.lane].idxs[i];
|
||||||
|
handle_ray(ray_idx as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.lanes[task.lane].idxs.truncate(task_range.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pops the next task off the stack, executes the provided closure for
|
||||||
|
/// each ray index in the task, and pushes the ray indices back onto the
|
||||||
|
/// indicated lanes.
|
||||||
|
pub fn pop_do_next_task_and_push_rays<F>(&mut self, needed_lanes: usize, mut handle_ray: F)
|
||||||
|
where
|
||||||
|
F: FnMut(usize) -> (Bool4, usize),
|
||||||
{
|
{
|
||||||
// Prepare lanes.
|
// Prepare lanes.
|
||||||
self.ensure_lane_count(needed_lanes);
|
self.ensure_lane_count(needed_lanes);
|
||||||
|
@ -311,9 +331,10 @@ impl RayStack {
|
||||||
let mut source_lane_cap = task_range.0;
|
let mut source_lane_cap = task_range.0;
|
||||||
for i in task_range.0..task_range.1 {
|
for i in task_range.0..task_range.1 {
|
||||||
let ray_idx = self.lanes[task.lane].idxs[i];
|
let ray_idx = self.lanes[task.lane].idxs[i];
|
||||||
let (add_list, list_len) = handle_ray(ray_idx as usize);
|
let (push_mask, c) = handle_ray(ray_idx as usize);
|
||||||
for &l in &add_list[..list_len] {
|
for l in 0..c {
|
||||||
if l == task.lane as u8 {
|
if push_mask.get_n(l) {
|
||||||
|
if l == task.lane {
|
||||||
self.lanes[l as usize].idxs[source_lane_cap] = ray_idx;
|
self.lanes[l as usize].idxs[source_lane_cap] = ray_idx;
|
||||||
source_lane_cap += 1;
|
source_lane_cap += 1;
|
||||||
} else {
|
} else {
|
||||||
|
@ -321,6 +342,7 @@ impl RayStack {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
self.lanes[task.lane].idxs.truncate(source_lane_cap);
|
self.lanes[task.lane].idxs.truncate(source_lane_cap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,7 +157,7 @@ impl<'a> Surface for TriangleMesh<'a> {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Test each ray against the current triangle.
|
// Test each ray against the current triangle.
|
||||||
ray_stack.pop_do_next_task(0, |ray_idx| {
|
ray_stack.pop_do_next_task(|ray_idx| {
|
||||||
let ray_idx = ray_idx as usize;
|
let ray_idx = ray_idx as usize;
|
||||||
let ray_time = rays.time(ray_idx);
|
let ray_time = rays.time(ray_idx);
|
||||||
|
|
||||||
|
@ -275,8 +275,6 @@ impl<'a> Surface for TriangleMesh<'a> {
|
||||||
rays.set_max_t(ray_idx, t);
|
rays.set_max_t(ray_idx, t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
([0; 4], 0)
|
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
|
@ -12,6 +12,8 @@ use crate::{
|
||||||
transform_stack::TransformStack,
|
transform_stack::TransformStack,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use float4::Bool4;
|
||||||
|
|
||||||
pub struct Tracer<'a> {
|
pub struct Tracer<'a> {
|
||||||
ray_stack: RayStack,
|
ray_stack: RayStack,
|
||||||
inner: TracerInner<'a>,
|
inner: TracerInner<'a>,
|
||||||
|
@ -96,10 +98,10 @@ impl<'a> TracerInner<'a> {
|
||||||
// Do transforms
|
// Do transforms
|
||||||
// TODO: re-divide rays based on direction (maybe?).
|
// TODO: re-divide rays based on direction (maybe?).
|
||||||
let xforms = self.xform_stack.top();
|
let xforms = self.xform_stack.top();
|
||||||
ray_stack.pop_do_next_task(2, |ray_idx| {
|
ray_stack.pop_do_next_task_and_push_rays(2, |ray_idx| {
|
||||||
let t = rays.time(ray_idx);
|
let t = rays.time(ray_idx);
|
||||||
rays.update_local(ray_idx, &lerp_slice(xforms, t));
|
rays.update_local(ray_idx, &lerp_slice(xforms, t));
|
||||||
([0, 1, 0, 0], 2)
|
(Bool4::new(true, true, false, false), 2)
|
||||||
});
|
});
|
||||||
ray_stack.push_lanes_to_tasks(&[0, 1]);
|
ray_stack.push_lanes_to_tasks(&[0, 1]);
|
||||||
}
|
}
|
||||||
|
@ -129,16 +131,14 @@ impl<'a> TracerInner<'a> {
|
||||||
// Undo transforms
|
// Undo transforms
|
||||||
let xforms = self.xform_stack.top();
|
let xforms = self.xform_stack.top();
|
||||||
if !xforms.is_empty() {
|
if !xforms.is_empty() {
|
||||||
ray_stack.pop_do_next_task(0, |ray_idx| {
|
ray_stack.pop_do_next_task(|ray_idx| {
|
||||||
let t = rays.time(ray_idx);
|
let t = rays.time(ray_idx);
|
||||||
rays.update_local(ray_idx, &lerp_slice(xforms, t));
|
rays.update_local(ray_idx, &lerp_slice(xforms, t));
|
||||||
([0; 4], 0)
|
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let ident = Matrix4x4::new();
|
let ident = Matrix4x4::new();
|
||||||
ray_stack.pop_do_next_task(0, |ray_idx| {
|
ray_stack.pop_do_next_task(|ray_idx| {
|
||||||
rays.update_local(ray_idx, &ident);
|
rays.update_local(ray_idx, &ident);
|
||||||
([0; 4], 0)
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -621,7 +621,22 @@ mod x86_64_sse {
|
||||||
|
|
||||||
impl Bool4 {
|
impl Bool4 {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn new() -> Bool4 {
|
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Bool4 {
|
||||||
|
use std::arch::x86_64::_mm_set_ps;
|
||||||
|
Bool4 {
|
||||||
|
data: unsafe {
|
||||||
|
_mm_set_ps(
|
||||||
|
if d { 1.0 } else { 0.0 },
|
||||||
|
if c { 1.0 } else { 0.0 },
|
||||||
|
if b { 1.0 } else { 0.0 },
|
||||||
|
if a { 1.0 } else { 0.0 },
|
||||||
|
)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn new_false() -> Bool4 {
|
||||||
use std::arch::x86_64::_mm_set1_ps;
|
use std::arch::x86_64::_mm_set1_ps;
|
||||||
Bool4 {
|
Bool4 {
|
||||||
data: unsafe { _mm_set1_ps(0.0) },
|
data: unsafe { _mm_set1_ps(0.0) },
|
||||||
|
@ -667,7 +682,8 @@ mod x86_64_sse {
|
||||||
///
|
///
|
||||||
/// This is the `OR` operation on all the contained bools. If even
|
/// This is the `OR` operation on all the contained bools. If even
|
||||||
/// one bool is true, this returns true.
|
/// one bool is true, this returns true.
|
||||||
pub fn all_false(&self) -> bool {
|
#[inline(always)]
|
||||||
|
pub fn is_all_false(&self) -> bool {
|
||||||
let a = unsafe { *(&self.data as *const __m128 as *const u128) };
|
let a = unsafe { *(&self.data as *const __m128 as *const u128) };
|
||||||
a == 0
|
a == 0
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user