diff --git a/src/accel/bvh.rs b/src/accel/bvh.rs index 075df2f..8a57592 100644 --- a/src/accel/bvh.rs +++ b/src/accel/bvh.rs @@ -7,8 +7,10 @@ use bbox::BBox; use boundable::Boundable; use lerp::lerp_slice; use ray::AccelRay; +use timer::Timer; use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}; +use super::ACCEL_TRAV_TIME; #[derive(Copy, Clone, Debug)] @@ -61,51 +63,63 @@ impl<'a> BVH<'a> { pub fn traverse(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F) where F: FnMut(&T, &mut [AccelRay]) { - match self.root { - None => {} + if self.root.is_none() { + return; + } - Some(root) => { - // +2 of max depth for root and last child - let mut node_stack = [root; BVH_MAX_DEPTH + 2]; - let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; - let mut stack_ptr = 1; + let mut trav_time: f64 = 0.0; + let mut timer = Timer::new(); - while stack_ptr > 0 { - match node_stack[stack_ptr] { - &BVHNode::Internal { bounds, children, split_axis } => { - let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { - (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) - }); - if part > 0 { - node_stack[stack_ptr] = children.0; - node_stack[stack_ptr + 1] = children.1; - ray_i_stack[stack_ptr] = part; - ray_i_stack[stack_ptr + 1] = part; - if rays[0].dir_inv.get_n(split_axis as usize).is_sign_positive() { - node_stack.swap(stack_ptr, stack_ptr + 1); - } - stack_ptr += 1; - } else { - stack_ptr -= 1; - } + // +2 of max depth for root and last child + let mut node_stack = [self.root.unwrap(); BVH_MAX_DEPTH + 2]; + let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; + let mut stack_ptr = 1; + + while stack_ptr > 0 { + match node_stack[stack_ptr] { + &BVHNode::Internal { bounds, children, split_axis } => { + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); + if part > 0 { + node_stack[stack_ptr] = children.0; + node_stack[stack_ptr + 1] = children.1; + ray_i_stack[stack_ptr] = part; + ray_i_stack[stack_ptr + 1] = part; + if rays[0].dir_inv.get_n(split_axis as usize) >= 0.0 { + node_stack.swap(stack_ptr, stack_ptr + 1); } + stack_ptr += 1; + } else { + stack_ptr -= 1; + } + } - &BVHNode::Leaf { bounds, object_range } => { - let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { - (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) - }); - if part > 0 { - for obj in &objects[object_range.0..object_range.1] { - obj_ray_test(obj, &mut rays[..part]); - } - } + &BVHNode::Leaf { bounds, object_range } => { + let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { + (!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) + }); - stack_ptr -= 1; + trav_time += timer.tick() as f64; + + if part > 0 { + for obj in &objects[object_range.0..object_range.1] { + obj_ray_test(obj, &mut rays[..part]); } } + + timer.tick(); + + stack_ptr -= 1; } } } + + trav_time += timer.tick() as f64; + ACCEL_TRAV_TIME.with(|att| { + let v = att.get(); + att.set(v + trav_time); + }); } fn construct_from_base(arena: &'a MemArena, diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs index 38d74e5..5b84fb4 100644 --- a/src/accel/bvh4.rs +++ b/src/accel/bvh4.rs @@ -9,14 +9,16 @@ use mem_arena::MemArena; -use algorithm::partition; +use algorithm::partition_with_side; use bbox::BBox; use bbox4::BBox4; use boundable::Boundable; use lerp::lerp_slice; use ray::AccelRay; +use timer::Timer; use super::bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}; +use super::ACCEL_TRAV_TIME; // TRAVERSAL_TABLE include!("bvh4_table.inc"); @@ -87,10 +89,14 @@ impl<'a> BVH4<'a> { return; } + let mut trav_time: f64 = 0.0; + let mut timer = Timer::new(); + // +2 of max depth for root and last child let mut node_stack = [self.root; BVH_MAX_DEPTH + 2]; let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; let mut stack_ptr = 1; + let mut unpopped = 0; let mut first_loop = true; let ray_code = (rays[0].dir_inv.x().is_sign_negative() as u8) | @@ -111,8 +117,11 @@ impl<'a> BVH4<'a> { let mut all_hits = 0; // Ray testing - let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| { - if (!r.is_done()) && (first_loop || r.trav_stack.pop()) { + let part = filter_rays(&ray_i_stack[stack_ptr..], + &mut rays[..ray_i_stack[stack_ptr]], + unpopped, + |r, pop_count| { + if (!r.is_done()) && (first_loop || r.trav_stack.pop_to_nth(pop_count)) { let hits = lerp_slice(bounds, r.time) .intersect_accel_ray(r) .to_bitmask(); @@ -141,6 +150,7 @@ impl<'a> BVH4<'a> { } return false; }); + unpopped = 0; // Update stack based on ray testing results if part > 0 { @@ -163,20 +173,29 @@ impl<'a> BVH4<'a> { Some(&BVH4Node::Leaf { object_range }) => { let part = if !first_loop { - partition(&mut rays[..ray_i_stack[stack_ptr]], |r| r.trav_stack.pop()) + filter_rays(&ray_i_stack[stack_ptr..], + &mut rays[..ray_i_stack[stack_ptr]], + unpopped, + |r, pop_count| r.trav_stack.pop_to_nth(pop_count)) } else { ray_i_stack[stack_ptr] }; + unpopped = 0; + + trav_time += timer.tick() as f64; for obj in &objects[object_range.0..object_range.1] { obj_ray_test(obj, &mut rays[..part]); } + timer.tick(); + stack_ptr -= 1; } None => { if !first_loop { + // unpopped += 1; for r in (&mut rays[..ray_i_stack[stack_ptr]]).iter_mut() { r.trav_stack.pop(); } @@ -187,6 +206,12 @@ impl<'a> BVH4<'a> { first_loop = false; } + + trav_time += timer.tick() as f64; + ACCEL_TRAV_TIME.with(|att| { + let v = att.get(); + att.set(v + trav_time); + }); } fn construct_from_base(arena: &'a MemArena, @@ -391,3 +416,40 @@ fn calc_traversal_code(split_1: u8, split_2: u8, split_3: u8, topology: u8) -> u static T_TABLE: [u8; 4] = [0, 27, 27 + 9, 27 + 9 + 9]; split_1 + (split_2 * 3) + (split_3 * 9) + T_TABLE[topology as usize] } + + +fn filter_rays(ray_i_stack: &[usize], + rays: &mut [AccelRay], + unpopped: usize, + mut ray_test: F) + -> usize + where F: FnMut(&mut AccelRay, usize) -> bool +{ + // let part = if unpopped == 0 { + partition_with_side(rays, |r, _| ray_test(r, 1)) + // } else { + // let mut part_n = [0, rays.len()]; // Where we are in the partition + // let mut part_pop = [unpopped, 0]; // Number of bits to pop on the left and right side + + // partition_with_side(rays, |r, side| { + // let pop_count = 1 + + // if side { + // part_n[1] -= 1; + // while part_n[1] < ray_i_stack[part_pop[1] + 1] && part_pop[1] < unpopped { + // part_pop[1] += 1; + // } + // part_pop[1] + // } else { + // while part_n[0] >= ray_i_stack[part_pop[0]] { + // part_pop[0] -= 1; + // } + // part_n[0] += 1; + // part_pop[0] + // }; + + // return ray_test(r, pop_count); + // }) + // }; + + // part +} diff --git a/src/accel/mod.rs b/src/accel/mod.rs index 34b8a41..0000196 100644 --- a/src/accel/mod.rs +++ b/src/accel/mod.rs @@ -5,6 +5,8 @@ mod light_array; mod light_tree; mod objects_split; +use std::cell::Cell; + use math::{Vector, Point, Normal}; use shading::surface_closure::SurfaceClosure; @@ -12,6 +14,10 @@ pub use self::bvh::{BVH, BVHNode}; pub use self::bvh4::{BVH4, BVH4Node}; pub use self::light_tree::LightTree; +// Track BVH traversal time +thread_local! { + pub static ACCEL_TRAV_TIME: Cell = Cell::new(0.0); +} pub trait LightAccel { /// Returns (index_of_light, selection_pdf, whittled_n) diff --git a/src/algorithm.rs b/src/algorithm.rs index b4b6231..2355d48 100644 --- a/src/algorithm.rs +++ b/src/algorithm.rs @@ -77,6 +77,55 @@ pub fn partition(slc: &mut [T], mut pred: F) -> usize } } +/// Partitions a slice in-place with the given unary predicate, returning +/// the index of the first element for which the predicate evaluates +/// false. +/// +/// The predicate is executed precisely once on every element in +/// the slice, and is allowed to modify the elements. +/// +/// The only difference between this and plain partition above, is that +/// the predicate function is passed a bool representing which side +/// of the array we're currently on: left or right. False means left, +/// True means right. +pub fn partition_with_side(slc: &mut [T], mut pred: F) -> usize + where F: FnMut(&mut T, bool) -> bool +{ + // This version uses raw pointers and pointer arithmetic to squeeze more + // performance out of the code. + unsafe { + let mut a = slc.as_mut_ptr(); + let mut b = a.offset(slc.len() as isize); + let start = a as usize; + + loop { + loop { + if a == b { + return ((a as usize) - start) / std::mem::size_of::(); + } + if !pred(&mut *a, false) { + break; + } + a = a.offset(1); + } + + loop { + b = b.offset(-1); + if a == b { + return ((a as usize) - start) / std::mem::size_of::(); + } + if pred(&mut *b, true) { + break; + } + } + + std::ptr::swap(a, b); + + a = a.offset(1); + } + } +} + /// Partitions two slices in-place in concert based on the given unary /// predicate, returning the index of the first element for which the diff --git a/src/main.rs b/src/main.rs index dbdd91b..5668eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -186,6 +186,8 @@ fn main() { let ntime = rtime as f64 / rstats.total_time; println!("\tRendered scene in {:.3}s", rtime); println!("\t\tTrace: {:.3}s", ntime * rstats.trace_time); + println!("\t\t\tTraversal: {:.3}s", + ntime * rstats.accel_traversal_time); println!("\t\tRay generation: {:.3}s", ntime * rstats.ray_generation_time); println!("\t\tSample writing: {:.3}s", diff --git a/src/renderer.rs b/src/renderer.rs index cbbba60..e1420bf 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -9,6 +9,7 @@ use crossbeam::sync::MsQueue; use scoped_threadpool::Pool; use algorithm::partition_pair; +use accel::ACCEL_TRAV_TIME; use color::{Color, XYZ, SpectralSample, map_0_1_to_wavelength}; use hash::hash_u32; use hilbert; @@ -35,6 +36,7 @@ pub struct Renderer<'a> { #[derive(Debug, Copy, Clone)] pub struct RenderStats { pub trace_time: f64, + pub accel_traversal_time: f64, pub ray_generation_time: f64, pub sample_writing_time: f64, pub total_time: f64, @@ -44,6 +46,7 @@ impl RenderStats { fn new() -> RenderStats { RenderStats { trace_time: 0.0, + accel_traversal_time: 0.0, ray_generation_time: 0.0, sample_writing_time: 0.0, total_time: 0.0, @@ -52,6 +55,7 @@ impl RenderStats { fn collect(&mut self, other: RenderStats) { self.trace_time += other.trace_time; + self.accel_traversal_time += other.accel_traversal_time; self.ray_generation_time += other.ray_generation_time; self.sample_writing_time += other.sample_writing_time; self.total_time += other.total_time; @@ -210,6 +214,10 @@ impl<'a> Renderer<'a> { } stats.total_time += total_timer.tick() as f64; + ACCEL_TRAV_TIME.with(|att| { + stats.accel_traversal_time = att.get(); + att.set(0.0); + }); // Collect stats cstats.write().unwrap().collect(stats);