Actually using the None match arm in BVH4 traversal now.

It seems to maybe be just a hair faster.  But, also, it sets the
code up to try some more optimizations.
This commit is contained in:
Nathan Vegdahl 2017-04-16 17:22:51 -07:00
parent f93dde1810
commit 92fd83a766

View File

@ -108,6 +108,8 @@ impl<'a> BVH4<'a> {
let noc3 = (node_order_code >> 4) & 3; let noc3 = (node_order_code >> 4) & 3;
let noc4 = (node_order_code >> 6) & 3; let noc4 = (node_order_code >> 6) & 3;
let mut all_hits = 0;
// Ray testing // Ray testing
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
if (!r.is_done()) && (first_loop || r.trav_stack.pop()) { if (!r.is_done()) && (first_loop || r.trav_stack.pop()) {
@ -115,6 +117,8 @@ impl<'a> BVH4<'a> {
.intersect_accel_ray(r) .intersect_accel_ray(r)
.to_bitmask(); .to_bitmask();
all_hits |= hits;
if hits != 0 { if hits != 0 {
// Push hit bits onto ray's traversal stack // Push hit bits onto ray's traversal stack
let shuffled_hits = match children.len() { let shuffled_hits = match children.len() {
@ -142,9 +146,13 @@ impl<'a> BVH4<'a> {
if part > 0 { if part > 0 {
for i in 0..children.len() { for i in 0..children.len() {
let inv_i = (children.len() - 1) - i; let inv_i = (children.len() - 1) - i;
node_stack[stack_ptr + i] = let child_i = ((node_order_code >> (inv_i * 2)) & 3) as usize;
Some(&children[((node_order_code >> (inv_i * 2)) & 3) as node_stack[stack_ptr + i] = if ((all_hits >> child_i) & 1) ==
usize]); 0 {
None
} else {
Some(&children[child_i])
};
ray_i_stack[stack_ptr + i] = part; ray_i_stack[stack_ptr + i] = part;
} }