Misc optimizations that add up to a nice speed boost.
This commit is contained in:
parent
c4b8971805
commit
4f7335db8c
|
@ -125,34 +125,40 @@ impl<'a> BVH4<'a> {
|
||||||
// Ray testing
|
// Ray testing
|
||||||
ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
|
ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
|
||||||
if rays.is_done(ray_idx) {
|
if rays.is_done(ray_idx) {
|
||||||
(Bool4::new_false(), 0)
|
Bool4::new_false()
|
||||||
} else {
|
} else {
|
||||||
let hits = lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
let hits = if bounds.len() == 1 {
|
||||||
|
bounds[0].intersect_ray(
|
||||||
rays.orig_local(ray_idx),
|
rays.orig_local(ray_idx),
|
||||||
rays.dir_inv_local(ray_idx),
|
rays.dir_inv_local(ray_idx),
|
||||||
rays.max_t(ray_idx),
|
rays.max_t(ray_idx),
|
||||||
);
|
)
|
||||||
|
} else {
|
||||||
|
lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
|
||||||
|
rays.orig_local(ray_idx),
|
||||||
|
rays.dir_inv_local(ray_idx),
|
||||||
|
rays.max_t(ray_idx),
|
||||||
|
)
|
||||||
|
};
|
||||||
all_hits = all_hits | hits;
|
all_hits = all_hits | hits;
|
||||||
(hits, children.len())
|
hits
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// If there were any intersections, create tasks.
|
// If there were any intersections, create tasks.
|
||||||
if !all_hits.is_all_false() {
|
if !all_hits.is_all_false() {
|
||||||
let order_code = traversal_table[traversal_code as usize];
|
let order_code = traversal_table[traversal_code as usize];
|
||||||
let mut lanes = [0usize; 4];
|
|
||||||
let mut lane_count = 0;
|
let mut lane_count = 0;
|
||||||
for i in 0..children.len() {
|
let mut i = children.len() as u8;
|
||||||
let inv_i = (children.len() - 1) - i;
|
while i > 0 {
|
||||||
let child_i = ((order_code >> (inv_i * 2)) & 3) as usize;
|
i -= 1;
|
||||||
if all_hits.get_n(child_i) {
|
let child_i = ((order_code >> (i * 2)) & 3) as usize;
|
||||||
|
if ray_stack.push_lane_to_task(child_i) {
|
||||||
node_stack[stack_ptr + lane_count] = &children[child_i];
|
node_stack[stack_ptr + lane_count] = &children[child_i];
|
||||||
lanes[lane_count] = child_i;
|
|
||||||
lane_count += 1;
|
lane_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ray_stack.push_lanes_to_tasks(&lanes[..lane_count]);
|
|
||||||
stack_ptr += lane_count - 1;
|
stack_ptr += lane_count - 1;
|
||||||
} else {
|
} else {
|
||||||
stack_ptr -= 1;
|
stack_ptr -= 1;
|
||||||
|
|
59
src/ray.rs
59
src/ray.rs
|
@ -259,17 +259,29 @@ impl RayStack {
|
||||||
self.lanes[lane].idxs.push(ray_idx as RayIndexType);
|
self.lanes[lane].idxs.push(ray_idx as RayIndexType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pushes any excess indices on the given lane to a new task on the
|
||||||
|
/// task stack.
|
||||||
|
///
|
||||||
|
/// Returns whether a task was pushed or not. No task will be pushed
|
||||||
|
/// if there are no excess indices on the end of the lane.
|
||||||
|
pub fn push_lane_to_task(&mut self, lane_idx: usize) -> bool {
|
||||||
|
if self.lanes[lane_idx].end_len < self.lanes[lane_idx].idxs.len() {
|
||||||
|
self.tasks.push(RayTask {
|
||||||
|
lane: lane_idx,
|
||||||
|
start_idx: self.lanes[lane_idx].end_len,
|
||||||
|
});
|
||||||
|
self.lanes[lane_idx].end_len = self.lanes[lane_idx].idxs.len();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Takes the given list of lane indices, and pushes any excess indices on
|
/// Takes the given list of lane indices, and pushes any excess indices on
|
||||||
/// the end of each into a new task, in the order provided.
|
/// the end of each into a new task, in the order provided.
|
||||||
pub fn push_lanes_to_tasks(&mut self, lane_idxs: &[usize]) {
|
pub fn push_lanes_to_tasks(&mut self, lane_idxs: &[usize]) {
|
||||||
for &l in lane_idxs {
|
for &l in lane_idxs {
|
||||||
if self.lanes[l].end_len < self.lanes[l].idxs.len() {
|
self.push_lane_to_task(l);
|
||||||
self.tasks.push(RayTask {
|
|
||||||
lane: l,
|
|
||||||
start_idx: self.lanes[l].end_len,
|
|
||||||
});
|
|
||||||
self.lanes[l].end_len = self.lanes[l].idxs.len();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,36 +347,39 @@ impl RayStack {
|
||||||
/// Pops the next task off the stack, executes the provided closure for
|
/// Pops the next task off the stack, executes the provided closure for
|
||||||
/// each ray index in the task, and pushes the ray indices back onto the
|
/// each ray index in the task, and pushes the ray indices back onto the
|
||||||
/// indicated lanes.
|
/// indicated lanes.
|
||||||
pub fn pop_do_next_task_and_push_rays<F>(&mut self, needed_lanes: usize, mut handle_ray: F)
|
pub fn pop_do_next_task_and_push_rays<F>(&mut self, output_lane_count: usize, mut handle_ray: F)
|
||||||
where
|
where
|
||||||
F: FnMut(usize) -> (Bool4, usize),
|
F: FnMut(usize) -> Bool4,
|
||||||
{
|
{
|
||||||
// Prepare lanes.
|
|
||||||
self.ensure_lane_count(needed_lanes);
|
|
||||||
|
|
||||||
// Pop the task and do necessary bookkeeping.
|
// Pop the task and do necessary bookkeeping.
|
||||||
let task = self.tasks.pop().unwrap();
|
let task = self.tasks.pop().unwrap();
|
||||||
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
|
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
|
||||||
self.lanes[task.lane].end_len = task.start_idx;
|
self.lanes[task.lane].end_len = task.start_idx;
|
||||||
|
|
||||||
|
// SAFETY: this is probably evil, and depends on behavior of Vec that
|
||||||
|
// are not actually promised. But we're essentially truncating the lane
|
||||||
|
// to the start of our task range, but will continue to access it's
|
||||||
|
// elements beyond that range via `get_unchecked()` below. Because the
|
||||||
|
// memory is not freed nor altered, this is safe. However, again, the
|
||||||
|
// Vec apis don't promise this behavior. So:
|
||||||
|
//
|
||||||
|
// TODO: build a slightly different lane abstraction to get this same
|
||||||
|
// efficiency without depending on implicit Vec behavior.
|
||||||
|
unsafe {
|
||||||
|
self.lanes[task.lane].idxs.set_len(task.start_idx);
|
||||||
|
}
|
||||||
|
|
||||||
// Execute task.
|
// Execute task.
|
||||||
let mut source_lane_cap = task_range.0;
|
|
||||||
for i in task_range.0..task_range.1 {
|
for i in task_range.0..task_range.1 {
|
||||||
let ray_idx = self.lanes[task.lane].idxs[i];
|
let ray_idx = *unsafe { self.lanes[task.lane].idxs.get_unchecked(i) };
|
||||||
let (push_mask, c) = handle_ray(ray_idx as usize);
|
let push_mask = handle_ray(ray_idx as usize);
|
||||||
for l in 0..c {
|
for l in 0..output_lane_count {
|
||||||
if push_mask.get_n(l) {
|
if push_mask.get_n(l) {
|
||||||
if l == task.lane {
|
|
||||||
self.lanes[l as usize].idxs[source_lane_cap] = ray_idx;
|
|
||||||
source_lane_cap += 1;
|
|
||||||
} else {
|
|
||||||
self.lanes[l as usize].idxs.push(ray_idx);
|
self.lanes[l as usize].idxs.push(ray_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.lanes[task.lane].idxs.truncate(source_lane_cap);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A lane within a RayStack.
|
/// A lane within a RayStack.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user