Move multiple-object logic out of BVH4.

This allows each part of Psychopath tp handle the logic in the
best way, instead of a one-size-fits-all approach.
This commit is contained in:
Nathan Vegdahl 2019-06-29 08:28:41 +09:00
parent 874b07df02
commit 4ef376dc89
4 changed files with 163 additions and 164 deletions

View File

@ -96,14 +96,9 @@ impl<'a> BVH4<'a> {
self.depth self.depth
} }
pub fn traverse<T, F>( pub fn traverse<F>(&self, rays: &mut RayBatch, ray_stack: &mut RayStack, mut obj_ray_test: F)
&self, where
rays: &mut RayBatch, F: FnMut(std::ops::Range<usize>, &mut RayBatch, &mut RayStack),
ray_stack: &mut RayStack,
objects: &[T],
mut obj_ray_test: F,
) where
F: FnMut(&T, &mut RayBatch, &mut RayStack),
{ {
if self.root.is_none() { if self.root.is_none() {
return; return;
@ -170,16 +165,8 @@ impl<'a> BVH4<'a> {
&BVH4Node::Leaf { object_range } => { &BVH4Node::Leaf { object_range } => {
trav_time += timer.tick() as f64; trav_time += timer.tick() as f64;
// Set up the tasks for each object.
let obj_count = object_range.1 - object_range.0;
for _ in 0..(obj_count - 1) {
ray_stack.duplicate_next_task();
}
// Do the ray tests. // Do the ray tests.
for obj in &objects[object_range.0..object_range.1] { obj_ray_test(object_range.0..object_range.1, rays, ray_stack);
obj_ray_test(obj, rays, ray_stack);
}
timer.tick(); timer.tick();

View File

@ -292,24 +292,37 @@ impl RayStack {
self.lanes[l].end_len = self.lanes[l].idxs.len(); self.lanes[l].end_len = self.lanes[l].idxs.len();
} }
/// Pops the next task off the stack, and executes the provided closure for // Pops the next task off the stack.
/// each ray index in the task. pub fn pop_task(&mut self) {
pub fn pop_do_next_task<F>(&mut self, mut handle_ray: F) let task = self.tasks.pop().unwrap();
self.lanes[task.lane].end_len = task.start_idx;
self.lanes[task.lane].idxs.truncate(task.start_idx);
}
// Executes a task without popping it from the task stack.
pub fn do_next_task<F>(&mut self, mut handle_ray: F)
where where
F: FnMut(usize), F: FnMut(usize),
{ {
// Pop the task and do necessary bookkeeping. let task = self.tasks.last().unwrap();
let task = self.tasks.pop().unwrap();
let task_range = (task.start_idx, self.lanes[task.lane].end_len); let task_range = (task.start_idx, self.lanes[task.lane].end_len);
self.lanes[task.lane].end_len = task.start_idx;
// Execute task. // Execute task.
for i in task_range.0..task_range.1 { for i in task_range.0..task_range.1 {
let ray_idx = self.lanes[task.lane].idxs[i]; let ray_idx = self.lanes[task.lane].idxs[i];
handle_ray(ray_idx as usize); handle_ray(ray_idx as usize);
} }
}
self.lanes[task.lane].idxs.truncate(task_range.0); /// Pops the next task off the stack, and executes the provided closure for
/// each ray index in the task.
#[inline(always)]
pub fn pop_do_next_task<F>(&mut self, handle_ray: F)
where
F: FnMut(usize),
{
self.do_next_task(handle_ray);
self.pop_task();
} }
/// Pops the next task off the stack, executes the provided closure for /// Pops the next task off the stack, executes the provided closure for

View File

@ -130,153 +130,154 @@ impl<'a> Surface for TriangleMesh<'a> {
Matrix4x4::new() Matrix4x4::new()
}; };
self.accel.traverse( self.accel
rays, .traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
ray_stack, for tri_idx in idx_range {
self.indices, let tri_indices = self.indices[tri_idx];
|tri_indices, rays, ray_stack| {
// For static triangles with static transforms, cache them.
let is_cached = self.time_sample_count == 1 && space.len() <= 1;
let mut tri = if is_cached {
let tri = (
self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
);
if space.is_empty() {
tri
} else {
(
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
)
}
} else {
unsafe { std::mem::uninitialized() }
};
// Test each ray against the current triangle. // For static triangles with static transforms, cache them.
ray_stack.pop_do_next_task(|ray_idx| { let is_cached = self.time_sample_count == 1 && space.len() <= 1;
let ray_idx = ray_idx as usize; let mut tri = if is_cached {
let ray_time = rays.time(ray_idx); let tri = (
self.vertices[tri_indices.0 as usize],
// Get triangle if necessary self.vertices[tri_indices.1 as usize],
if !is_cached { self.vertices[tri_indices.2 as usize],
tri = if self.time_sample_count == 1 { );
// No deformation motion blur, so fast-path it. if space.is_empty() {
tri
} else {
( (
self.vertices[tri_indices.0 as usize], tri.0 * static_mat_space,
self.vertices[tri_indices.1 as usize], tri.1 * static_mat_space,
self.vertices[tri_indices.2 as usize], tri.2 * static_mat_space,
) )
} else {
// Deformation motion blur, need to interpolate.
let p0_slice = &self.vertices[(tri_indices.0 as usize
* self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)];
let p1_slice = &self.vertices[(tri_indices.1 as usize
* self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)];
let p2_slice = &self.vertices[(tri_indices.2 as usize
* self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let p0 = lerp_slice(p0_slice, ray_time);
let p1 = lerp_slice(p1_slice, ray_time);
let p2 = lerp_slice(p2_slice, ray_time);
(p0, p1, p2)
};
}
// Transform triangle if necessary, and get transform space.
let mat_space = if !space.is_empty() {
if space.len() > 1 {
// Per-ray transform, for motion blur
let mat_space = lerp_slice(space, ray_time).inverse();
tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space);
mat_space
} else {
// Same transform for all rays
if !is_cached {
tri = (
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
);
}
static_mat_space
} }
} else { } else {
// No transforms unsafe { std::mem::uninitialized() }
Matrix4x4::new()
}; };
// Test ray against triangle // Test each ray against the current triangle.
if let Some((t, b0, b1, b2)) = triangle::intersect_ray( ray_stack.do_next_task(|ray_idx| {
rays.orig(ray_idx), let ray_idx = ray_idx as usize;
rays.dir(ray_idx), let ray_time = rays.time(ray_idx);
rays.max_t(ray_idx),
tri,
) {
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
} else {
// Calculate intersection point and error magnitudes
let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2));
// Calculate geometric surface normal // Get triangle if necessary
let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); if !is_cached {
tri = if self.time_sample_count == 1 {
// Calculate interpolated surface normal, if any // No deformation motion blur, so fast-path it.
let shading_normal = if let Some(normals) = self.normals { (
let n0_slice = &normals[(tri_indices.0 as usize self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
)
} else {
// Deformation motion blur, need to interpolate.
let p0_slice = &self.vertices[(tri_indices.0 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)]; ..((tri_indices.0 as usize + 1) * self.time_sample_count)];
let n1_slice = &normals[(tri_indices.1 as usize let p1_slice = &self.vertices[(tri_indices.1 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)]; ..((tri_indices.1 as usize + 1) * self.time_sample_count)];
let n2_slice = &normals[(tri_indices.2 as usize let p2_slice = &self.vertices[(tri_indices.2 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)]; ..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let n0 = lerp_slice(n0_slice, ray_time).normalized(); let p0 = lerp_slice(p0_slice, ray_time);
let n1 = lerp_slice(n1_slice, ray_time).normalized(); let p1 = lerp_slice(p1_slice, ray_time);
let n2 = lerp_slice(n2_slice, ray_time).normalized(); let p2 = lerp_slice(p2_slice, ray_time);
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; (p0, p1, p2)
if dot(s_nor, geo_normal) >= 0.0 {
s_nor
} else {
-s_nor
}
} else {
geo_normal
}; };
let intersection_data = SurfaceIntersectionData {
incoming: rays.dir(ray_idx),
t: t,
pos: pos,
pos_err: pos_err,
nor: shading_normal,
nor_g: geo_normal,
local_space: mat_space,
sample_pdf: 0.0,
};
// Fill in intersection data
isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: shader.shade(&intersection_data, ray_time),
};
rays.set_max_t(ray_idx, t);
} }
}
}); // Transform triangle if necessary, and get transform space.
}, let mat_space = if !space.is_empty() {
); if space.len() > 1 {
// Per-ray transform, for motion blur
let mat_space = lerp_slice(space, ray_time).inverse();
tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space);
mat_space
} else {
// Same transform for all rays
if !is_cached {
tri = (
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
);
}
static_mat_space
}
} else {
// No transforms
Matrix4x4::new()
};
// Test ray against triangle
if let Some((t, b0, b1, b2)) = triangle::intersect_ray(
rays.orig(ray_idx),
rays.dir(ray_idx),
rays.max_t(ray_idx),
tri,
) {
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
} else {
// Calculate intersection point and error magnitudes
let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2));
// Calculate geometric surface normal
let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal();
// Calculate interpolated surface normal, if any
let shading_normal = if let Some(normals) = self.normals {
let n0_slice = &normals[(tri_indices.0 as usize
* self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)];
let n1_slice = &normals[(tri_indices.1 as usize
* self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)];
let n2_slice = &normals[(tri_indices.2 as usize
* self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let n0 = lerp_slice(n0_slice, ray_time).normalized();
let n1 = lerp_slice(n1_slice, ray_time).normalized();
let n2 = lerp_slice(n2_slice, ray_time).normalized();
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
if dot(s_nor, geo_normal) >= 0.0 {
s_nor
} else {
-s_nor
}
} else {
geo_normal
};
let intersection_data = SurfaceIntersectionData {
incoming: rays.dir(ray_idx),
t: t,
pos: pos,
pos_err: pos_err,
nor: shading_normal,
nor_g: geo_normal,
local_space: mat_space,
sample_pdf: 0.0,
};
// Fill in intersection data
isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: shader.shade(&intersection_data, ray_time),
};
rays.set_max_t(ray_idx, t);
}
}
});
}
ray_stack.pop_task();
});
} }
} }

View File

@ -85,11 +85,11 @@ impl<'a> TracerInner<'a> {
rays: &mut RayBatch, rays: &mut RayBatch,
ray_stack: &mut RayStack, ray_stack: &mut RayStack,
) { ) {
assembly.object_accel.traverse( assembly
rays, .object_accel
ray_stack, .traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
&assembly.instances[..], let inst = &assembly.instances[idx_range.start];
|inst, rays, ray_stack| {
// Transform rays if needed // Transform rays if needed
if let Some((xstart, xend)) = inst.transform_indices { if let Some((xstart, xend)) = inst.transform_indices {
// Push transforms to stack // Push transforms to stack
@ -98,12 +98,11 @@ impl<'a> TracerInner<'a> {
// Do transforms // Do transforms
// TODO: re-divide rays based on direction (maybe?). // TODO: re-divide rays based on direction (maybe?).
let xforms = self.xform_stack.top(); let xforms = self.xform_stack.top();
ray_stack.pop_do_next_task_and_push_rays(2, |ray_idx| { ray_stack.do_next_task(|ray_idx| {
let t = rays.time(ray_idx); let t = rays.time(ray_idx);
rays.update_local(ray_idx, &lerp_slice(xforms, t)); rays.update_local(ray_idx, &lerp_slice(xforms, t));
(Bool4::new(true, true, false, false), 2)
}); });
ray_stack.push_lanes_to_tasks(&[0, 1]); ray_stack.duplicate_next_task();
} }
// Trace rays // Trace rays
@ -142,8 +141,7 @@ impl<'a> TracerInner<'a> {
}); });
} }
} }
}, });
);
} }
fn trace_object<'b>( fn trace_object<'b>(