Moved more work out of the triangle mesh intersection inner loop.

This makes the code a bit spaghetti-like, but it provides noticable
speed-ups in the all the scenes I tested.
This commit is contained in:
Nathan Vegdahl 2018-07-01 17:30:58 -07:00
parent b14b1b13b5
commit 5c20fa3ea4

View File

@ -1,5 +1,7 @@
#![allow(dead_code)] #![allow(dead_code)]
use std;
use mem_arena::MemArena; use mem_arena::MemArena;
use accel::BVH4; use accel::BVH4;
@ -131,55 +133,84 @@ impl<'a> Surface for TriangleMesh<'a> {
self.accel self.accel
.traverse(&mut accel_rays[..], self.indices, |tri_indices, rs| { .traverse(&mut accel_rays[..], self.indices, |tri_indices, rs| {
// For static triangles with static transforms, cache them.
let is_cached = self.time_sample_count == 1 && space.len() <= 1;
let mut tri = if is_cached {
let tri = (
self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
);
if space.is_empty() {
tri
} else {
(
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
)
}
} else {
unsafe { std::mem::uninitialized() }
};
// Test each ray against the current triangle.
for r in rs { for r in rs {
let wr = &wrays[r.id as usize]; let wr = &wrays[r.id as usize];
// Get triangle // Get triangle if necessary
let tri = { if !is_cached {
let p0_slice = &self.vertices[(tri_indices.0 as usize tri = if self.time_sample_count == 1 {
* self.time_sample_count) // No deformation motion blur, so fast-path it.
..((tri_indices.0 as usize + 1) (
* self.time_sample_count)]; self.vertices[tri_indices.0 as usize],
let p1_slice = &self.vertices[(tri_indices.1 as usize self.vertices[tri_indices.1 as usize],
* self.time_sample_count) self.vertices[tri_indices.2 as usize],
..((tri_indices.1 as usize + 1) )
* self.time_sample_count)]; } else {
let p2_slice = &self.vertices[(tri_indices.2 as usize // Deformation motion blur, need to interpolate.
* self.time_sample_count) let p0_slice = &self.vertices[(tri_indices.0 as usize
..((tri_indices.2 as usize + 1) * self.time_sample_count)
* self.time_sample_count)]; ..((tri_indices.0 as usize + 1)
* self.time_sample_count)];
let p1_slice = &self.vertices[(tri_indices.1 as usize
* self.time_sample_count)
..((tri_indices.1 as usize + 1)
* self.time_sample_count)];
let p2_slice = &self.vertices[(tri_indices.2 as usize
* self.time_sample_count)
..((tri_indices.2 as usize + 1)
* self.time_sample_count)];
let p0 = lerp_slice(p0_slice, wr.time); let p0 = lerp_slice(p0_slice, wr.time);
let p1 = lerp_slice(p1_slice, wr.time); let p1 = lerp_slice(p1_slice, wr.time);
let p2 = lerp_slice(p2_slice, wr.time); let p2 = lerp_slice(p2_slice, wr.time);
(p0, p1, p2) (p0, p1, p2)
}; };
}
// Transform triangle as necessary, and get transform // Transform triangle if necessary, and get transform space.
// space. let mat_space = if !space.is_empty() {
let (mat_space, tri) = if !space.is_empty() {
if space.len() > 1 { if space.len() > 1 {
// Per-ray transform, for motion blur // Per-ray transform, for motion blur
let mat_space = lerp_slice(space, wr.time).inverse(); let mat_space = lerp_slice(space, wr.time).inverse();
( tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space);
mat_space, mat_space
(tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space),
)
} else { } else {
// Same transform for all rays // Same transform for all rays
( if !is_cached {
static_mat_space, tri = (
(
tri.0 * static_mat_space, tri.0 * static_mat_space,
tri.1 * static_mat_space, tri.1 * static_mat_space,
tri.2 * static_mat_space, tri.2 * static_mat_space,
), );
) }
static_mat_space
} }
} else { } else {
// No transforms // No transforms
(Matrix4x4::new(), tri) Matrix4x4::new()
}; };
// Test ray against triangle // Test ray against triangle