Keep Bool4 in its native format instead of converting to a bitmask.
This gives a small performance boost.
This commit is contained in:
parent
2fddcae0fd
commit
c5d23592b9
|
@ -25,6 +25,7 @@ use super::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
|
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
|
||||||
|
use float4::Bool4;
|
||||||
|
|
||||||
pub fn ray_code(dir: Vector) -> usize {
|
pub fn ray_code(dir: Vector) -> usize {
|
||||||
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
|
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
|
||||||
|
@ -128,27 +129,25 @@ impl<'a> BVH4<'a> {
|
||||||
children,
|
children,
|
||||||
traversal_code,
|
traversal_code,
|
||||||
} => {
|
} => {
|
||||||
let mut all_hits = 0;
|
let mut all_hits = Bool4::new();
|
||||||
|
|
||||||
// Ray testing
|
// Ray testing
|
||||||
ray_stack.pop_do_next_task(children.len(), |ray_idx| {
|
ray_stack.pop_do_next_task(children.len(), |ray_idx| {
|
||||||
if rays.is_done(ray_idx) {
|
if rays.is_done(ray_idx) {
|
||||||
([0; 4], 0)
|
([0; 4], 0)
|
||||||
} else {
|
} else {
|
||||||
let hits = lerp_slice(bounds, rays.time(ray_idx))
|
let hits = lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
||||||
.intersect_ray(
|
|
||||||
rays.orig_local(ray_idx),
|
rays.orig_local(ray_idx),
|
||||||
rays.dir_inv_local(ray_idx),
|
rays.dir_inv_local(ray_idx),
|
||||||
rays.max_t(ray_idx),
|
rays.max_t(ray_idx),
|
||||||
)
|
);
|
||||||
.to_bitmask();
|
|
||||||
|
|
||||||
if hits != 0 {
|
if !hits.all_false() {
|
||||||
all_hits |= hits;
|
all_hits = all_hits | hits;
|
||||||
let mut lanes = [0u8; 4];
|
let mut lanes = [0u8; 4];
|
||||||
let mut lane_count = 0;
|
let mut lane_count = 0;
|
||||||
for i in 0..children.len() {
|
for i in 0..children.len() {
|
||||||
if (hits >> i) & 1 != 0 {
|
if hits.get_n(i) {
|
||||||
lanes[lane_count] = i as u8;
|
lanes[lane_count] = i as u8;
|
||||||
lane_count += 1;
|
lane_count += 1;
|
||||||
}
|
}
|
||||||
|
@ -161,14 +160,14 @@ impl<'a> BVH4<'a> {
|
||||||
});
|
});
|
||||||
|
|
||||||
// If there were any intersections, create tasks.
|
// If there were any intersections, create tasks.
|
||||||
if all_hits > 0 {
|
if !all_hits.all_false() {
|
||||||
let order_code = traversal_table[traversal_code as usize];
|
let order_code = traversal_table[traversal_code as usize];
|
||||||
let mut lanes = [0usize; 4];
|
let mut lanes = [0usize; 4];
|
||||||
let mut lane_count = 0;
|
let mut lane_count = 0;
|
||||||
for i in 0..children.len() {
|
for i in 0..children.len() {
|
||||||
let inv_i = (children.len() - 1) - i;
|
let inv_i = (children.len() - 1) - i;
|
||||||
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
|
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
|
||||||
if ((all_hits >> child_i) & 1) != 0 {
|
if all_hits.get_n(child_i) {
|
||||||
node_stack[stack_ptr + lane_count] = &children[child_i];
|
node_stack[stack_ptr + lane_count] = &children[child_i];
|
||||||
lanes[lane_count] = child_i;
|
lanes[lane_count] = child_i;
|
||||||
lane_count += 1;
|
lane_count += 1;
|
||||||
|
|
|
@ -620,6 +620,14 @@ mod x86_64_sse {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Bool4 {
|
impl Bool4 {
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn new() -> Bool4 {
|
||||||
|
use std::arch::x86_64::_mm_set1_ps;
|
||||||
|
Bool4 {
|
||||||
|
data: unsafe { _mm_set1_ps(0.0) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the value of the nth element.
|
/// Returns the value of the nth element.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn get_n(&self, n: usize) -> bool {
|
pub fn get_n(&self, n: usize) -> bool {
|
||||||
|
@ -637,24 +645,33 @@ mod x86_64_sse {
|
||||||
self.get_n(0)
|
self.get_n(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the value of the 1th element.
|
/// Returns the value of the 1st element.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn get_1(&self) -> bool {
|
pub fn get_1(&self) -> bool {
|
||||||
self.get_n(1)
|
self.get_n(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the value of the 2th element.
|
/// Returns the value of the 2nd element.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn get_2(&self) -> bool {
|
pub fn get_2(&self) -> bool {
|
||||||
self.get_n(2)
|
self.get_n(2)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the value of the 3th element.
|
/// Returns the value of the 3rd element.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn get_3(&self) -> bool {
|
pub fn get_3(&self) -> bool {
|
||||||
self.get_n(3)
|
self.get_n(3)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether all four bools are false.
|
||||||
|
///
|
||||||
|
/// This is the `OR` operation on all the contained bools. If even
|
||||||
|
/// one bool is true, this returns true.
|
||||||
|
pub fn all_false(&self) -> bool {
|
||||||
|
let a = unsafe { *(&self.data as *const __m128 as *const u128) };
|
||||||
|
a == 0
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_bitmask(&self) -> u8 {
|
pub fn to_bitmask(&self) -> u8 {
|
||||||
let a = unsafe { *(&self.data as *const __m128 as *const u8).offset(0) };
|
let a = unsafe { *(&self.data as *const __m128 as *const u8).offset(0) };
|
||||||
|
|
Loading…
Reference in New Issue
Block a user