Minor optimization in the transform stack.

Got rid of the scratch space by writing directly into the main
stack vector when merging transforms.  Involves a small bit of
unsafe code.
This commit is contained in:
Nathan Vegdahl 2016-08-06 01:51:20 -07:00
parent 91d88cbdf9
commit 024ca560af
2 changed files with 47 additions and 11 deletions

View File

@ -1,6 +1,7 @@
#![allow(dead_code)] #![allow(dead_code)]
use std; use std;
use std::cmp;
use lerp::{Lerp, lerp_slice}; use lerp::{Lerp, lerp_slice};
@ -140,3 +141,36 @@ pub fn merge_slices_append<T: Lerp + Copy, F>(slice1: &[T],
} }
} }
} }
/// Merges two slices of things, storing the result in slice_out.
/// Panics if slice_out is not the right size.
pub fn merge_slices_to<T: Lerp + Copy, F>(slice1: &[T],
slice2: &[T],
slice_out: &mut [T],
merge: F)
where F: Fn(&T, &T) -> T
{
assert!(slice_out.len() == cmp::max(slice1.len(), slice2.len()));
// Transform the bounding boxes
if slice1.len() == 0 || slice2.len() == 0 {
return;
} else if slice1.len() == slice2.len() {
for (xfo, (xf1, xf2)) in Iterator::zip(slice_out.iter_mut(),
Iterator::zip(slice1.iter(), slice2.iter())) {
*xfo = merge(xf1, xf2);
}
} else if slice1.len() > slice2.len() {
let s = (slice1.len() - 1) as f32;
for (i, (xfo, xf1)) in Iterator::zip(slice_out.iter_mut(), slice1.iter()).enumerate() {
let xf2 = lerp_slice(slice2, i as f32 / s);
*xfo = merge(xf1, &xf2);
}
} else if slice1.len() < slice2.len() {
let s = (slice2.len() - 1) as f32;
for (i, (xfo, xf2)) in Iterator::zip(slice_out.iter_mut(), slice2.iter()).enumerate() {
let xf1 = lerp_slice(slice1, i as f32 / s);
*xfo = merge(&xf1, xf2);
}
}
}

View File

@ -1,6 +1,7 @@
use std::iter; use std::iter;
use std::cmp;
use algorithm::{partition, merge_slices_append}; use algorithm::{partition, merge_slices_to};
use math::Matrix4x4; use math::Matrix4x4;
use lerp::lerp_slice; use lerp::lerp_slice;
use assembly::{Assembly, Object, InstanceType}; use assembly::{Assembly, Object, InstanceType};
@ -179,7 +180,6 @@ fn split_rays_by_direction(rays: &mut [AccelRay]) -> [&mut [AccelRay]; 8] {
struct TransformStack { struct TransformStack {
stack: Vec<Matrix4x4>, stack: Vec<Matrix4x4>,
stack_indices: Vec<usize>, stack_indices: Vec<usize>,
scratch_space: Vec<Matrix4x4>,
} }
impl TransformStack { impl TransformStack {
@ -187,7 +187,6 @@ impl TransformStack {
let mut ts = TransformStack { let mut ts = TransformStack {
stack: Vec::new(), stack: Vec::new(),
stack_indices: Vec::new(), stack_indices: Vec::new(),
scratch_space: Vec::new(),
}; };
ts.stack_indices.push(0); ts.stack_indices.push(0);
@ -205,14 +204,17 @@ impl TransformStack {
let sil = self.stack_indices.len(); let sil = self.stack_indices.len();
let i1 = self.stack_indices[sil - 2]; let i1 = self.stack_indices[sil - 2];
let i2 = self.stack_indices[sil - 1]; let i2 = self.stack_indices[sil - 1];
// Reserve stack space for the new transforms.
self.scratch_space.clear(); // Note this leaves exposed uninitialized memory. The subsequent call to
merge_slices_append(&self.stack[i1..i2], // merge_slices_to() fills that memory in.
xforms, {
&mut self.scratch_space, let maxlen = cmp::max(xforms.len(), i2 - i1);
|xf1, xf2| *xf1 * *xf2); self.stack.reserve(maxlen);
let l = self.stack.len();
self.stack.extend(&self.scratch_space); unsafe { self.stack.set_len(l + maxlen) };
}
let (xfs1, xfs2) = self.stack.split_at_mut(i2);
merge_slices_to(&xfs1[i1..i2], xforms, xfs2, |xf1, xf2| *xf1 * *xf2);
} }
self.stack_indices.push(self.stack.len()); self.stack_indices.push(self.stack.len());