Print stats for BVH and BVH4 traversal, and working on speeding up BVH4 traversal.
This commit is contained in:
parent
597bcf0518
commit
fdf2b4babf
|
@ -7,8 +7,10 @@ use bbox::BBox;
|
|||
use boundable::Boundable;
|
||||
use lerp::lerp_slice;
|
||||
use ray::AccelRay;
|
||||
use timer::Timer;
|
||||
|
||||
use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
|
||||
use super::ACCEL_TRAV_TIME;
|
||||
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
@ -61,12 +63,15 @@ impl<'a> BVH<'a> {
|
|||
pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
|
||||
where F: FnMut(&T, &mut [AccelRay])
|
||||
{
|
||||
match self.root {
|
||||
None => {}
|
||||
if self.root.is_none() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut trav_time: f64 = 0.0;
|
||||
let mut timer = Timer::new();
|
||||
|
||||
Some(root) => {
|
||||
// +2 of max depth for root and last child
|
||||
let mut node_stack = [root; BVH_MAX_DEPTH + 2];
|
||||
let mut node_stack = [self.root.unwrap(); BVH_MAX_DEPTH + 2];
|
||||
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
|
||||
let mut stack_ptr = 1;
|
||||
|
||||
|
@ -81,7 +86,7 @@ impl<'a> BVH<'a> {
|
|||
node_stack[stack_ptr + 1] = children.1;
|
||||
ray_i_stack[stack_ptr] = part;
|
||||
ray_i_stack[stack_ptr + 1] = part;
|
||||
if rays[0].dir_inv.get_n(split_axis as usize).is_sign_positive() {
|
||||
if rays[0].dir_inv.get_n(split_axis as usize) >= 0.0 {
|
||||
node_stack.swap(stack_ptr, stack_ptr + 1);
|
||||
}
|
||||
stack_ptr += 1;
|
||||
|
@ -94,18 +99,27 @@ impl<'a> BVH<'a> {
|
|||
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
|
||||
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
|
||||
});
|
||||
|
||||
trav_time += timer.tick() as f64;
|
||||
|
||||
if part > 0 {
|
||||
for obj in &objects[object_range.0..object_range.1] {
|
||||
obj_ray_test(obj, &mut rays[..part]);
|
||||
}
|
||||
}
|
||||
|
||||
timer.tick();
|
||||
|
||||
stack_ptr -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trav_time += timer.tick() as f64;
|
||||
ACCEL_TRAV_TIME.with(|att| {
|
||||
let v = att.get();
|
||||
att.set(v + trav_time);
|
||||
});
|
||||
}
|
||||
|
||||
fn construct_from_base(arena: &'a MemArena,
|
||||
|
|
|
@ -9,14 +9,16 @@
|
|||
|
||||
use mem_arena::MemArena;
|
||||
|
||||
use algorithm::partition;
|
||||
use algorithm::partition_with_side;
|
||||
use bbox::BBox;
|
||||
use bbox4::BBox4;
|
||||
use boundable::Boundable;
|
||||
use lerp::lerp_slice;
|
||||
use ray::AccelRay;
|
||||
use timer::Timer;
|
||||
|
||||
use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH};
|
||||
use super::ACCEL_TRAV_TIME;
|
||||
|
||||
// TRAVERSAL_TABLE
|
||||
include!("bvh4_table.inc");
|
||||
|
@ -87,10 +89,14 @@ impl<'a> BVH4<'a> {
|
|||
return;
|
||||
}
|
||||
|
||||
let mut trav_time: f64 = 0.0;
|
||||
let mut timer = Timer::new();
|
||||
|
||||
// +2 of max depth for root and last child
|
||||
let mut node_stack = [self.root; BVH_MAX_DEPTH + 2];
|
||||
let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2];
|
||||
let mut stack_ptr = 1;
|
||||
let mut unpopped = 0;
|
||||
let mut first_loop = true;
|
||||
|
||||
let ray_code = (rays[0].dir_inv.x().is_sign_negative() as u8) |
|
||||
|
@ -111,8 +117,11 @@ impl<'a> BVH4<'a> {
|
|||
let mut all_hits = 0;
|
||||
|
||||
// Ray testing
|
||||
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
|
||||
if (!r.is_done()) && (first_loop || r.trav_stack.pop()) {
|
||||
let part = filter_rays(&ray_i_stack[stack_ptr..],
|
||||
&mut rays[..ray_i_stack[stack_ptr]],
|
||||
unpopped,
|
||||
|r, pop_count| {
|
||||
if (!r.is_done()) && (first_loop || r.trav_stack.pop_to_nth(pop_count)) {
|
||||
let hits = lerp_slice(bounds, r.time)
|
||||
.intersect_accel_ray(r)
|
||||
.to_bitmask();
|
||||
|
@ -141,6 +150,7 @@ impl<'a> BVH4<'a> {
|
|||
}
|
||||
return false;
|
||||
});
|
||||
unpopped = 0;
|
||||
|
||||
// Update stack based on ray testing results
|
||||
if part > 0 {
|
||||
|
@ -163,20 +173,29 @@ impl<'a> BVH4<'a> {
|
|||
|
||||
Some(&BVH4Node::Leaf { object_range }) => {
|
||||
let part = if !first_loop {
|
||||
partition(&mut rays[..ray_i_stack[stack_ptr]], |r| r.trav_stack.pop())
|
||||
filter_rays(&ray_i_stack[stack_ptr..],
|
||||
&mut rays[..ray_i_stack[stack_ptr]],
|
||||
unpopped,
|
||||
|r, pop_count| r.trav_stack.pop_to_nth(pop_count))
|
||||
} else {
|
||||
ray_i_stack[stack_ptr]
|
||||
};
|
||||
unpopped = 0;
|
||||
|
||||
trav_time += timer.tick() as f64;
|
||||
|
||||
for obj in &objects[object_range.0..object_range.1] {
|
||||
obj_ray_test(obj, &mut rays[..part]);
|
||||
}
|
||||
|
||||
timer.tick();
|
||||
|
||||
stack_ptr -= 1;
|
||||
}
|
||||
|
||||
None => {
|
||||
if !first_loop {
|
||||
// unpopped += 1;
|
||||
for r in (&mut rays[..ray_i_stack[stack_ptr]]).iter_mut() {
|
||||
r.trav_stack.pop();
|
||||
}
|
||||
|
@ -187,6 +206,12 @@ impl<'a> BVH4<'a> {
|
|||
|
||||
first_loop = false;
|
||||
}
|
||||
|
||||
trav_time += timer.tick() as f64;
|
||||
ACCEL_TRAV_TIME.with(|att| {
|
||||
let v = att.get();
|
||||
att.set(v + trav_time);
|
||||
});
|
||||
}
|
||||
|
||||
fn construct_from_base(arena: &'a MemArena,
|
||||
|
@ -391,3 +416,40 @@ fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u
|
|||
static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9];
|
||||
split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize]
|
||||
}
|
||||
|
||||
|
||||
fn filter_rays<F>(ray_i_stack: &[usize],
|
||||
rays: &mut [AccelRay],
|
||||
unpopped: usize,
|
||||
mut ray_test: F)
|
||||
-> usize
|
||||
where F: FnMut(&mut AccelRay, usize) -> bool
|
||||
{
|
||||
// let part = if unpopped == 0 {
|
||||
partition_with_side(rays, |r, _| ray_test(r, 1))
|
||||
// } else {
|
||||
// let mut part_n = [0, rays.len()]; // Where we are in the partition
|
||||
// let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side
|
||||
|
||||
// partition_with_side(rays, |r, side| {
|
||||
// let pop_count = 1 +
|
||||
// if side {
|
||||
// part_n[1] -= 1;
|
||||
// while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped {
|
||||
// part_pop[1] += 1;
|
||||
// }
|
||||
// part_pop[1]
|
||||
// } else {
|
||||
// while part_n[0] >= ray_i_stack[part_pop[0]] {
|
||||
// part_pop[0] -= 1;
|
||||
// }
|
||||
// part_n[0] += 1;
|
||||
// part_pop[0]
|
||||
// };
|
||||
|
||||
// return ray_test(r, pop_count);
|
||||
// })
|
||||
// };
|
||||
|
||||
// part
|
||||
}
|
||||
|
|
|
@ -5,6 +5,8 @@ mod light_array;
|
|||
mod light_tree;
|
||||
mod objects_split;
|
||||
|
||||
use std::cell::Cell;
|
||||
|
||||
use math::{Vector, Point, Normal};
|
||||
use shading::surface_closure::SurfaceClosure;
|
||||
|
||||
|
@ -12,6 +14,10 @@ pub use self::bvh::{BVH, BVHNode};
|
|||
pub use self::bvh4::{BVH4, BVH4Node};
|
||||
pub use self::light_tree::LightTree;
|
||||
|
||||
// Track BVH traversal time
|
||||
thread_local! {
|
||||
pub static ACCEL_TRAV_TIME: Cell<f64> = Cell::new(0.0);
|
||||
}
|
||||
|
||||
pub trait LightAccel {
|
||||
/// Returns (index_of_light, selection_pdf, whittled_n)
|
||||
|
|
|
@ -77,6 +77,55 @@ pub fn partition<T, F>(slc: &mut [T], mut pred: F) -> usize
|
|||
}
|
||||
}
|
||||
|
||||
/// Partitions a slice in-place with the given unary predicate, returning
|
||||
/// the index of the first element for which the predicate evaluates
|
||||
/// false.
|
||||
///
|
||||
/// The predicate is executed precisely once on every element in
|
||||
/// the slice, and is allowed to modify the elements.
|
||||
///
|
||||
/// The only difference between this and plain partition above, is that
|
||||
/// the predicate function is passed a bool representing which side
|
||||
/// of the array we're currently on: left or right. False means left,
|
||||
/// True means right.
|
||||
pub fn partition_with_side<T, F>(slc: &mut [T], mut pred: F) -> usize
|
||||
where F: FnMut(&mut T, bool) -> bool
|
||||
{
|
||||
// This version uses raw pointers and pointer arithmetic to squeeze more
|
||||
// performance out of the code.
|
||||
unsafe {
|
||||
let mut a = slc.as_mut_ptr();
|
||||
let mut b = a.offset(slc.len() as isize);
|
||||
let start = a as usize;
|
||||
|
||||
loop {
|
||||
loop {
|
||||
if a == b {
|
||||
return ((a as usize) - start) / std::mem::size_of::<T>();
|
||||
}
|
||||
if !pred(&mut *a, false) {
|
||||
break;
|
||||
}
|
||||
a = a.offset(1);
|
||||
}
|
||||
|
||||
loop {
|
||||
b = b.offset(-1);
|
||||
if a == b {
|
||||
return ((a as usize) - start) / std::mem::size_of::<T>();
|
||||
}
|
||||
if pred(&mut *b, true) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::ptr::swap(a, b);
|
||||
|
||||
a = a.offset(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Partitions two slices in-place in concert based on the given unary
|
||||
/// predicate, returning the index of the first element for which the
|
||||
|
|
|
@ -186,6 +186,8 @@ fn main() {
|
|||
let ntime = rtime as f64 / rstats.total_time;
|
||||
println!("\tRendered scene in {:.3}s", rtime);
|
||||
println!("\t\tTrace: {:.3}s", ntime * rstats.trace_time);
|
||||
println!("\t\t\tTraversal: {:.3}s",
|
||||
ntime * rstats.accel_traversal_time);
|
||||
println!("\t\tRay generation: {:.3}s",
|
||||
ntime * rstats.ray_generation_time);
|
||||
println!("\t\tSample writing: {:.3}s",
|
||||
|
|
|
@ -9,6 +9,7 @@ use crossbeam::sync::MsQueue;
|
|||
use scoped_threadpool::Pool;
|
||||
|
||||
use algorithm::partition_pair;
|
||||
use accel::ACCEL_TRAV_TIME;
|
||||
use color::{Color, XYZ, SpectralSample, map_0_1_to_wavelength};
|
||||
use hash::hash_u32;
|
||||
use hilbert;
|
||||
|
@ -35,6 +36,7 @@ pub struct Renderer<'a> {
|
|||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct RenderStats {
|
||||
pub trace_time: f64,
|
||||
pub accel_traversal_time: f64,
|
||||
pub ray_generation_time: f64,
|
||||
pub sample_writing_time: f64,
|
||||
pub total_time: f64,
|
||||
|
@ -44,6 +46,7 @@ impl RenderStats {
|
|||
fn new() -> RenderStats {
|
||||
RenderStats {
|
||||
trace_time: 0.0,
|
||||
accel_traversal_time: 0.0,
|
||||
ray_generation_time: 0.0,
|
||||
sample_writing_time: 0.0,
|
||||
total_time: 0.0,
|
||||
|
@ -52,6 +55,7 @@ impl RenderStats {
|
|||
|
||||
fn collect(&mut self, other: RenderStats) {
|
||||
self.trace_time += other.trace_time;
|
||||
self.accel_traversal_time += other.accel_traversal_time;
|
||||
self.ray_generation_time += other.ray_generation_time;
|
||||
self.sample_writing_time += other.sample_writing_time;
|
||||
self.total_time += other.total_time;
|
||||
|
@ -210,6 +214,10 @@ impl<'a> Renderer<'a> {
|
|||
}
|
||||
|
||||
stats.total_time += total_timer.tick() as f64;
|
||||
ACCEL_TRAV_TIME.with(|att| {
|
||||
stats.accel_traversal_time = att.get();
|
||||
att.set(0.0);
|
||||
});
|
||||
|
||||
// Collect stats
|
||||
cstats.write().unwrap().collect(stats);
|
||||
|
|
Loading…
Reference in New Issue
Block a user