diff --git a/Cargo.toml b/Cargo.toml index 0afa579..628dfe5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,9 @@ name = "psychopath" version = "0.1.0" authors = ["Nathan Vegdahl "] +[profile.release] +debug = true + [dependencies] docopt = "0.6" rustc-serialize = "0.3" diff --git a/src/bbox.rs b/src/bbox.rs index 75ecea2..7af6478 100644 --- a/src/bbox.rs +++ b/src/bbox.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] use std; -use std::ops::BitOr; +use std::ops::{BitOr, BitOrAssign}; use std::iter::Iterator; use math::{Point, Matrix4x4}; @@ -77,6 +77,14 @@ impl BBox { return b; } + + pub fn surface_area(&self) -> f32 { + let x = self.max[0] - self.min[0]; + let y = self.max[1] - self.min[1]; + let z = self.max[2] - self.min[2]; + + ((x * y) + (y * z) + (z * x)) * 2.0 + } } @@ -90,6 +98,12 @@ impl BitOr for BBox { } } +impl BitOrAssign for BBox { + fn bitor_assign(&mut self, rhs: BBox) { + *self = *self | rhs; + } +} + impl Lerp for BBox { fn lerp(self, other: BBox, alpha: f32) -> BBox { diff --git a/src/bvh.rs b/src/bvh.rs index 2fe8081..2419b07 100644 --- a/src/bvh.rs +++ b/src/bvh.rs @@ -1,5 +1,6 @@ #![allow(dead_code)] +use std; use std::cmp::Ordering; use quickersort::sort_by; use lerp::lerp_slice; @@ -10,6 +11,7 @@ use algorithm::{partition, merge_slices_append}; use math::log2_64; const BVH_MAX_DEPTH: usize = 64; +const SAH_BIN_COUNT: usize = 13; // Prime numbers work best, for some reason #[derive(Debug)] pub struct BVH { @@ -114,47 +116,110 @@ impl BVH { split_axis: 0, }); - // Determine which axis to split on + // Get combined object bounds let bounds = { let mut bb = BBox::new(); for obj in &objects[..] { - bb = bb | lerp_slice(bounder(obj), 0.5); + bb |= lerp_slice(bounder(obj), 0.5); } bb }; - let split_axis = { - let x_ext = bounds.max[0] - bounds.min[0]; - let y_ext = bounds.max[1] - bounds.min[1]; - let z_ext = bounds.max[2] - bounds.min[2]; - if x_ext > y_ext && x_ext > z_ext { - 0 - } else if y_ext > z_ext { - 1 - } else { - 2 - } - }; - // Partition objects based on split. + // Partition objects. // If we're too near the max depth, we do balanced building to // avoid exceeding max depth. - // Otherwise we do cooler clever stuff to build better trees. - let split_index = if (log2_64(objects.len() as u64) as usize) < - (BVH_MAX_DEPTH - depth) { - // Clever splitting, when we have room to play - let split_pos = (bounds.min[split_axis] + bounds.max[split_axis]) * 0.5; + // Otherwise we do SAH splitting to build better trees. + let (split_index, split_axis) = if (log2_64(objects.len() as u64) as usize) < + (BVH_MAX_DEPTH - depth) { + // SAH splitting, when we have room to play + + // Pre-calc SAH div points + let sah_divs = { + let mut sah_divs = [[0.0f32; SAH_BIN_COUNT - 1]; 3]; + for d in 0..3 { + let extent = bounds.max[d] - bounds.min[d]; + for div in 0..(SAH_BIN_COUNT - 1) { + let part = extent * ((div + 1) as f32 / SAH_BIN_COUNT as f32); + sah_divs[d][div] = bounds.min[d] + part; + } + } + sah_divs + }; + + // Build SAH bins + let sah_bins = { + let mut sah_bins = [[(BBox::new(), BBox::new(), 0, 0); SAH_BIN_COUNT - 1]; 3]; + for obj in objects.iter() { + let tb = lerp_slice(bounder(obj), 0.5); + let centroid = (tb.min.into_vector() + tb.max.into_vector()) * 0.5; + + for d in 0..3 { + for div in 0..(SAH_BIN_COUNT - 1) { + if centroid[d] <= sah_divs[d][div] { + sah_bins[d][div].0 |= tb; + sah_bins[d][div].2 += 1; + } else { + sah_bins[d][div].1 |= tb; + sah_bins[d][div].3 += 1; + } + } + } + } + sah_bins + }; + + // Find best split axis and div point + let (split_axis, div) = { + let mut dim = 0; + let mut div_n = 0.0; + let mut smallest_cost = std::f32::INFINITY; + + for d in 0..3 { + for div in 0..(SAH_BIN_COUNT - 1) { + let left_cost = sah_bins[d][div].0.surface_area() * + sah_bins[d][div].2 as f32; + let right_cost = sah_bins[d][div].1.surface_area() * + sah_bins[d][div].3 as f32; + let tot_cost = left_cost + right_cost; + if tot_cost < smallest_cost { + dim = d; + div_n = sah_divs[d][div]; + smallest_cost = tot_cost; + } + } + } + + (dim, div_n) + }; + + // Partition let mut split_i = partition(&mut objects[..], |obj| { let tb = lerp_slice(bounder(obj), 0.5); let centroid = (tb.min[split_axis] + tb.max[split_axis]) * 0.5; - centroid < split_pos + centroid < div }); if split_i < 1 { split_i = 1; + } else if split_i >= objects.len() { + split_i = objects.len() - 1; } - split_i + (split_i, split_axis) } else { // Balanced splitting, when we don't have room to play + let split_axis = { + let mut axis = 0; + let mut largest = std::f32::NEG_INFINITY; + for i in 0..3 { + let extent = bounds.max[i] - bounds.min[i]; + if extent > largest { + largest = extent; + axis = i; + } + } + axis + }; + sort_by(objects, &|a, b| { let tb_a = lerp_slice(bounder(a), 0.5); @@ -171,7 +236,7 @@ impl BVH { } }); - objects.len() / 2 + (objects.len() / 2, split_axis) }; // Create child nodes @@ -215,8 +280,9 @@ impl BVH { return; } - let mut i_stack = [0; BVH_MAX_DEPTH + 1]; - let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 1]; + // +2 of max depth for root and last child + let mut i_stack = [0; BVH_MAX_DEPTH + 2]; + let mut ray_i_stack = [rays.len(); BVH_MAX_DEPTH + 2]; let mut stack_ptr = 1; while stack_ptr > 0 { diff --git a/src/math/mod.rs b/src/math/mod.rs index 6c22d85..036a1fb 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -46,14 +46,15 @@ pub fn log2_64(value: u64) -> u64 { 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5]; - let value = value | (value >> 1); - let value = value | (value >> 2); - let value = value | (value >> 4); - let value = value | (value >> 8); - let value = value | (value >> 16); - let value = value | (value >> 32); + let value = value | value.wrapping_shr(1); + let value = value | value.wrapping_shr(2); + let value = value | value.wrapping_shr(4); + let value = value | value.wrapping_shr(8); + let value = value | value.wrapping_shr(16); + let value = value | value.wrapping_shr(32); - TAB64[((((value - (value >> 1)) * 0x07EDD5E59A4E28C2)) >> 58) as usize] + TAB64[((value.wrapping_sub(value.wrapping_shr(1)) as u64).wrapping_mul(0x07EDD5E59A4E28C2)) + .wrapping_shr(58) as usize] }