Print stats for BVH and BVH4 traversal, and working on speeding up BVH4 traversal.

This commit is contained in:
Nathan Vegdahl 2017-04-22 11:18:29 -07:00
parent 597bcf0518
commit fdf2b4babf
6 changed files with 180 additions and 39 deletions

View File

@ -7,8 +7,10 @@ use bbox::BBox;
use boundable::Boundable;
use lerp::lerp_slice;
use ray::AccelRay;
use timer::Timer;
use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
use super::ACCEL_TRAV_TIME;
#[derive(Copy, Clone, Debug)]
@ -61,51 +63,63 @@ impl<'a> BVH<'a> {
pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
where F: FnMut(&T, &mut [AccelRay])
{
match self.root {
None => {}
if self.root.is_none() {
return;
}
Some(root) => {
// +2 of max depth for root and last child
let mut node_stack = [root; BVH_MAX_DEPTH + 2];
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
let mut stack_ptr = 1;
let mut trav_time: f64 = 0.0;
let mut timer = Timer::new();
while stack_ptr > 0 {
match node_stack[stack_ptr] {
&BVHNode::Internal { bounds, children, split_axis } => {
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
});
if part > 0 {
node_stack[stack_ptr] = children.0;
node_stack[stack_ptr + 1] = children.1;
ray_i_stack[stack_ptr] = part;
ray_i_stack[stack_ptr + 1] = part;
if rays[0].dir_inv.get_n(split_axis as usize).is_sign_positive() {
node_stack.swap(stack_ptr, stack_ptr + 1);
}
stack_ptr += 1;
} else {
stack_ptr -= 1;
}
// +2 of max depth for root and last child
let mut node_stack = [self.root.unwrap(); BVH_MAX_DEPTH + 2];
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
let mut stack_ptr = 1;
while stack_ptr > 0 {
match node_stack[stack_ptr] {
&BVHNode::Internal { bounds, children, split_axis } => {
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
});
if part > 0 {
node_stack[stack_ptr] = children.0;
node_stack[stack_ptr + 1] = children.1;
ray_i_stack[stack_ptr] = part;
ray_i_stack[stack_ptr + 1] = part;
if rays[0].dir_inv.get_n(split_axis as usize) >= 0.0 {
node_stack.swap(stack_ptr, stack_ptr + 1);
}
stack_ptr += 1;
} else {
stack_ptr -= 1;
}
}
&BVHNode::Leaf { bounds, object_range } => {
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
});
if part > 0 {
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, &mut rays[..part]);
}
}
&BVHNode::Leaf { bounds, object_range } => {
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
});
stack_ptr -= 1;
trav_time += timer.tick() as f64;
if part > 0 {
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, &mut rays[..part]);
}
}
timer.tick();
stack_ptr -= 1;
}
}
}
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
}
fn construct_from_base(arena: &'a MemArena,

View File

@ -9,14 +9,16 @@
use mem_arena::MemArena;
use algorithm::partition;
use algorithm::partition_with_side;
use bbox::BBox;
use bbox4::BBox4;
use boundable::Boundable;
use lerp::lerp_slice;
use ray::AccelRay;
use timer::Timer;
use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
use super::ACCEL_TRAV_TIME;
// TRAVERSAL_TABLE
include!("bvh4_table.inc");
@ -87,10 +89,14 @@ impl<'a> BVH4<'a> {
return;
}
let mut trav_time: f64 = 0.0;
let mut timer = Timer::new();
// +2 of max depth for root and last child
let mut node_stack = [self.root; BVH_MAX_DEPTH + 2];
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
let mut stack_ptr = 1;
let mut unpopped = 0;
let mut first_loop = true;
let ray_code = (rays[0].dir_inv.x().is_sign_negative() as u8) |
@ -111,8 +117,11 @@ impl<'a> BVH4<'a> {
let mut all_hits = 0;
// Ray testing
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
if (!r.is_done()) && (first_loop || r.trav_stack.pop()) {
let part = filter_rays(&ray_i_stack[stack_ptr..],
&mut rays[..ray_i_stack[stack_ptr]],
unpopped,
|r, pop_count| {
if (!r.is_done()) && (first_loop || r.trav_stack.pop_to_nth(pop_count)) {
let hits = lerp_slice(bounds, r.time)
.intersect_accel_ray(r)
.to_bitmask();
@ -141,6 +150,7 @@ impl<'a> BVH4<'a> {
}
return false;
});
unpopped = 0;
// Update stack based on ray testing results
if part > 0 {
@ -163,20 +173,29 @@ impl<'a> BVH4<'a> {
Some(&BVH4Node::Leaf { object_range }) => {
let part = if !first_loop {
partition(&mut rays[..ray_i_stack[stack_ptr]], |r| r.trav_stack.pop())
filter_rays(&ray_i_stack[stack_ptr..],
&mut rays[..ray_i_stack[stack_ptr]],
unpopped,
|r, pop_count| r.trav_stack.pop_to_nth(pop_count))
} else {
ray_i_stack[stack_ptr]
};
unpopped = 0;
trav_time += timer.tick() as f64;
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, &mut rays[..part]);
}
timer.tick();
stack_ptr -= 1;
}
None => {
if !first_loop {
// unpopped += 1;
for r in (&mut rays[..ray_i_stack[stack_ptr]]).iter_mut() {
r.trav_stack.pop();
}
@ -187,6 +206,12 @@ impl<'a> BVH4<'a> {
first_loop = false;
}
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
}
fn construct_from_base(arena: &'a MemArena,
@ -391,3 +416,40 @@ fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u
static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9];
split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize]
}
fn filter_rays<F>(ray_i_stack: &[usize],
rays: &mut [AccelRay],
unpopped: usize,
mut ray_test: F)
-> usize
where F: FnMut(&mut AccelRay, usize) -> bool
{
// let part = if unpopped == 0 {
partition_with_side(rays, |r, _| ray_test(r, 1))
// } else {
// let mut part_n = [0, rays.len()]; // Where we are in the partition
// let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side
// partition_with_side(rays, |r, side| {
// let pop_count = 1 +
// if side {
// part_n[1] -= 1;
// while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped {
// part_pop[1] += 1;
// }
// part_pop[1]
// } else {
// while part_n[0] >= ray_i_stack[part_pop[0]] {
// part_pop[0] -= 1;
// }
// part_n[0] += 1;
// part_pop[0]
// };
// return ray_test(r, pop_count);
// })
// };
// part
}

View File

@ -5,6 +5,8 @@ mod light_array;
mod light_tree;
mod objects_split;
use std::cell::Cell;
use math::{Vector, Point, Normal};
use shading::surface_closure::SurfaceClosure;
@ -12,6 +14,10 @@ pub use self::bvh::{BVH, BVHNode};
pub use self::bvh4::{BVH4, BVH4Node};
pub use self::light_tree::LightTree;
// Track BVH traversal time
thread_local! {
pub static ACCEL_TRAV_TIME: Cell<f64> = Cell::new(0.0);
}
pub trait LightAccel {
/// Returns (index_of_light, selection_pdf, whittled_n)

View File

@ -77,6 +77,55 @@ pub fn partition<T, F>(slc: &mut [T], mut pred: F) -> usize
}
}
/// Partitions a slice in-place with the given unary predicate, returning
/// the index of the first element for which the predicate evaluates
/// false.
///
/// The predicate is executed precisely once on every element in
/// the slice, and is allowed to modify the elements.
///
/// The only difference between this and plain partition above, is that
/// the predicate function is passed a bool representing which side
/// of the array we're currently on: left or right. False means left,
/// True means right.
pub fn partition_with_side<T, F>(slc: &mut [T], mut pred: F) -> usize
where F: FnMut(&mut T, bool) -> bool
{
// This version uses raw pointers and pointer arithmetic to squeeze more
// performance out of the code.
unsafe {
let mut a = slc.as_mut_ptr();
let mut b = a.offset(slc.len() as isize);
let start = a as usize;
loop {
loop {
if a == b {
return ((a as usize) - start) / std::mem::size_of::<T>();
}
if !pred(&mut *a, false) {
break;
}
a = a.offset(1);
}
loop {
b = b.offset(-1);
if a == b {
return ((a as usize) - start) / std::mem::size_of::<T>();
}
if pred(&mut *b, true) {
break;
}
}
std::ptr::swap(a, b);
a = a.offset(1);
}
}
}
/// Partitions two slices in-place in concert based on the given unary
/// predicate, returning the index of the first element for which the

View File

@ -186,6 +186,8 @@ fn main() {
let ntime = rtime as f64 / rstats.total_time;
println!("\tRendered scene in {:.3}s", rtime);
println!("\t\tTrace: {:.3}s", ntime * rstats.trace_time);
println!("\t\t\tTraversal: {:.3}s",
ntime * rstats.accel_traversal_time);
println!("\t\tRay generation: {:.3}s",
ntime * rstats.ray_generation_time);
println!("\t\tSample writing: {:.3}s",

View File

@ -9,6 +9,7 @@ use crossbeam::sync::MsQueue;
use scoped_threadpool::Pool;
use algorithm::partition_pair;
use accel::ACCEL_TRAV_TIME;
use color::{Color, XYZ, SpectralSample, map_0_1_to_wavelength};
use hash::hash_u32;
use hilbert;
@ -35,6 +36,7 @@ pub struct Renderer<'a> {
#[derive(Debug, Copy, Clone)]
pub struct RenderStats {
pub trace_time: f64,
pub accel_traversal_time: f64,
pub ray_generation_time: f64,
pub sample_writing_time: f64,
pub total_time: f64,
@ -44,6 +46,7 @@ impl RenderStats {
fn new() -> RenderStats {
RenderStats {
trace_time: 0.0,
accel_traversal_time: 0.0,
ray_generation_time: 0.0,
sample_writing_time: 0.0,
total_time: 0.0,
@ -52,6 +55,7 @@ impl RenderStats {
fn collect(&mut self, other: RenderStats) {
self.trace_time += other.trace_time;
self.accel_traversal_time += other.accel_traversal_time;
self.ray_generation_time += other.ray_generation_time;
self.sample_writing_time += other.sample_writing_time;
self.total_time += other.total_time;
@ -210,6 +214,10 @@ impl<'a> Renderer<'a> {
}
stats.total_time += total_timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
stats.accel_traversal_time = att.get();
att.set(0.0);
});
// Collect stats
cstats.write().unwrap().collect(stats);