Merge pull request #11 from cessen/orst_traversal

ORST traversal
This commit is contained in:
Nathan Vegdahl 2019-07-06 09:55:50 +09:00 committed by GitHub
commit 452a29a95c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 1213 additions and 662 deletions

7
Cargo.lock generated
View File

@ -100,6 +100,11 @@ dependencies = [
name = "color" name = "color"
version = "0.1.0" version = "0.1.0"
[[package]]
name = "copy_in_place"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "crossbeam" name = "crossbeam"
version = "0.3.2" version = "0.3.2"
@ -239,6 +244,7 @@ dependencies = [
"bvh_order 0.1.0", "bvh_order 0.1.0",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"color 0.1.0", "color 0.1.0",
"copy_in_place 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"float4 0.1.0", "float4 0.1.0",
"half 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "half 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -557,6 +563,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" "checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum copy_in_place 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b792a46b1ef44bb5e9a04721d34e186522431be965a283437107843d62ddbaad"
"checksum crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19" "checksum crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"

View File

@ -25,6 +25,7 @@ debug = true
# Crates.io dependencies # Crates.io dependencies
base64 = "0.9" base64 = "0.9"
clap = "2.30" clap = "2.30"
copy_in_place = "0.2.0"
crossbeam = "0.3" crossbeam = "0.3"
half = "1.0" half = "1.0"
lazy_static = "1.0" lazy_static = "1.0"

View File

@ -1,36 +1,52 @@
//! This BVH4 implementation is based on the ideas from the paper
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
//! by Fuetterling et al.
#![allow(dead_code)] #![allow(dead_code)]
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
use mem_arena::MemArena; use mem_arena::MemArena;
use crate::{ use crate::{
algorithm::partition, bbox::BBox, boundable::Boundable, lerp::lerp_slice, ray::AccelRay, bbox::BBox,
timer::Timer, bbox4::BBox4,
boundable::Boundable,
lerp::lerp_slice,
math::Vector,
ray::{RayBatch, RayStack},
}; };
use super::{ use super::{
bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH}, bvh_base::{BVHBase, BVHBaseNode, BVH_MAX_DEPTH},
ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME, ACCEL_NODE_RAY_TESTS,
}; };
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
use float4::Bool4;
pub fn ray_code(dir: Vector) -> usize {
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
ray_sign_is_neg[0] as usize
+ ((ray_sign_is_neg[1] as usize) << 1)
+ ((ray_sign_is_neg[2] as usize) << 2)
}
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub struct BVH4<'a> { pub struct BVH4<'a> {
root: Option<&'a BVH4Node<'a>>, root: Option<&'a BVH4Node<'a>>,
depth: usize, depth: usize,
node_count: usize,
_bounds: Option<&'a [BBox]>,
} }
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum BVH4Node<'a> { pub enum BVH4Node<'a> {
Inner { Internal {
traversal_code: u8, bounds: &'a [BBox4],
bounds_start: &'a BBox,
bounds_len: u16,
children: &'a [BVH4Node<'a>], children: &'a [BVH4Node<'a>],
traversal_code: u8,
}, },
Leaf { Leaf {
bounds_start: &'a BBox,
bounds_len: u16,
object_range: (usize, usize), object_range: (usize, usize),
}, },
} }
@ -45,19 +61,32 @@ impl<'a> BVH4<'a> {
where where
F: 'b + Fn(&T) -> &'b [BBox], F: 'b + Fn(&T) -> &'b [BBox],
{ {
if objects.is_empty() { if objects.len() == 0 {
BVH4 { BVH4 {
root: None, root: None,
depth: 0, depth: 0,
node_count: 0,
_bounds: None,
} }
} else { } else {
let base = BVHBase::from_objects(objects, objects_per_leaf, bounder); let base = BVHBase::from_objects(objects, objects_per_leaf, bounder);
let root = unsafe { arena.alloc_uninitialized::<BVH4Node>() }; let fill_node = unsafe { arena.alloc_uninitialized_with_alignment::<BVH4Node>(32) };
BVH4::construct_from_base(arena, &base, base.root_node_index(), root); let node_count = BVH4::construct_from_base(
arena,
&base,
&base.nodes[base.root_node_index()],
fill_node,
);
BVH4 { BVH4 {
root: Some(root), root: Some(fill_node),
depth: base.depth, depth: (base.depth / 2) + 1,
node_count: node_count,
_bounds: {
let range = base.nodes[base.root_node_index()].bounds_range();
Some(arena.copy_slice(&base.bounds[range.0..range.1]))
},
} }
} }
} }
@ -66,135 +95,85 @@ impl<'a> BVH4<'a> {
self.depth self.depth
} }
pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F) pub fn traverse<F>(&self, rays: &mut RayBatch, ray_stack: &mut RayStack, mut obj_ray_test: F)
where where
F: FnMut(&T, &mut [AccelRay]), F: FnMut(std::ops::Range<usize>, &mut RayBatch, &mut RayStack),
{ {
if self.root.is_none() { if self.root.is_none() {
return; return;
} }
let mut timer = Timer::new();
let mut trav_time: f64 = 0.0;
let mut node_tests: u64 = 0; let mut node_tests: u64 = 0;
let traversal_table = { let traversal_table =
let ray_sign_is_neg = [ &TRAVERSAL_TABLE[ray_code(rays.dir_inv_local(ray_stack.next_task_ray_idx(0)))];
rays[0].dir_inv.x() < 0.0,
rays[0].dir_inv.y() < 0.0,
rays[0].dir_inv.z() < 0.0,
];
let ray_code = ray_sign_is_neg[0] as usize
+ ((ray_sign_is_neg[1] as usize) << 1)
+ ((ray_sign_is_neg[2] as usize) << 2);
&TRAVERSAL_TABLE[ray_code]
};
// +2 of max depth for root and last child // +2 of max depth for root and last child
let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2]; let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2];
let mut ray_i_stack = [rays.len(); (BVH_MAX_DEPTH * 3) + 2];
let mut stack_ptr = 1; let mut stack_ptr = 1;
while stack_ptr > 0 { while stack_ptr > 0 {
node_tests += ray_i_stack[stack_ptr] as u64; match node_stack[stack_ptr] {
match *node_stack[stack_ptr] { &BVH4Node::Internal {
BVH4Node::Inner { bounds,
traversal_code,
bounds_start,
bounds_len,
children, children,
traversal_code,
} => { } => {
let bounds = node_tests += ray_stack.ray_count_in_next_task() as u64;
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) }; let mut all_hits = Bool4::new_false();
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r) // Ray testing
ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
if rays.is_done(ray_idx) {
Bool4::new_false()
} else {
let hits = if bounds.len() == 1 {
bounds[0].intersect_ray(
rays.orig_local(ray_idx),
rays.dir_inv_local(ray_idx),
rays.max_t(ray_idx),
)
} else {
lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
rays.orig_local(ray_idx),
rays.dir_inv_local(ray_idx),
rays.max_t(ray_idx),
)
};
all_hits = all_hits | hits;
hits
}
}); });
if part > 0 {
// If there were any intersections, create tasks.
if !all_hits.is_all_false() {
let order_code = traversal_table[traversal_code as usize]; let order_code = traversal_table[traversal_code as usize];
match children.len() { let mut lane_count = 0;
4 => { let mut i = children.len() as u8;
let i4 = ((order_code >> 6) & 0b11) as usize; while i > 0 {
let i3 = ((order_code >> 4) & 0b11) as usize; i -= 1;
let i2 = ((order_code >> 2) & 0b11) as usize; let child_i = ((order_code >> (i * 2)) & 3) as usize;
let i1 = (order_code & 0b11) as usize; if ray_stack.push_lane_to_task(child_i) {
node_stack[stack_ptr + lane_count] = &children[child_i];
ray_i_stack[stack_ptr] = part; lane_count += 1;
ray_i_stack[stack_ptr + 1] = part;
ray_i_stack[stack_ptr + 2] = part;
ray_i_stack[stack_ptr + 3] = part;
node_stack[stack_ptr] = &children[i4];
node_stack[stack_ptr + 1] = &children[i3];
node_stack[stack_ptr + 2] = &children[i2];
node_stack[stack_ptr + 3] = &children[i1];
stack_ptr += 3;
} }
3 => {
let i3 = ((order_code >> 4) & 0b11) as usize;
let i2 = ((order_code >> 2) & 0b11) as usize;
let i1 = (order_code & 0b11) as usize;
ray_i_stack[stack_ptr] = part;
ray_i_stack[stack_ptr + 1] = part;
ray_i_stack[stack_ptr + 2] = part;
node_stack[stack_ptr] = &children[i3];
node_stack[stack_ptr + 1] = &children[i2];
node_stack[stack_ptr + 2] = &children[i1];
stack_ptr += 2;
} }
2 => {
let i2 = ((order_code >> 2) & 0b11) as usize;
let i1 = (order_code & 0b11) as usize;
ray_i_stack[stack_ptr] = part; stack_ptr += lane_count - 1;
ray_i_stack[stack_ptr + 1] = part;
node_stack[stack_ptr] = &children[i2];
node_stack[stack_ptr + 1] = &children[i1];
stack_ptr += 1;
}
_ => unreachable!(),
}
} else { } else {
stack_ptr -= 1; stack_ptr -= 1;
} }
} }
BVH4Node::Leaf { &BVH4Node::Leaf { object_range } => {
object_range, // Do the ray tests.
bounds_start, obj_ray_test(object_range.0..object_range.1, rays, ray_stack);
bounds_len,
} => {
let bounds =
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) };
let part = partition(&mut rays[..ray_i_stack[stack_ptr]], |r| {
(!r.is_done()) && lerp_slice(bounds, r.time).intersect_accel_ray(r)
});
trav_time += timer.tick() as f64;
if part > 0 {
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, &mut rays[..part]);
}
}
timer.tick();
stack_ptr -= 1; stack_ptr -= 1;
} }
} }
} }
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
ACCEL_NODE_RAY_TESTS.with(|anv| { ACCEL_NODE_RAY_TESTS.with(|anv| {
let v = anv.get(); let v = anv.get();
anv.set(v + node_tests); anv.set(v + node_tests);
@ -204,12 +183,15 @@ impl<'a> BVH4<'a> {
fn construct_from_base( fn construct_from_base(
arena: &'a MemArena, arena: &'a MemArena,
base: &BVHBase, base: &BVHBase,
node_index: usize, node: &BVHBaseNode,
node_mem: &mut BVH4Node<'a>, fill_node: &mut BVH4Node<'a>,
) { ) -> usize {
match base.nodes[node_index] { let mut node_count = 0;
BVHBaseNode::Internal {
bounds_range, match node {
// Create internal node
&BVHBaseNode::Internal {
bounds_range: _,
children_indices, children_indices,
split_axis, split_axis,
} => { } => {
@ -218,7 +200,7 @@ impl<'a> BVH4<'a> {
// Prepare convenient access to the stuff we need. // Prepare convenient access to the stuff we need.
let child_count: usize; let child_count: usize;
let child_indices: [usize; 4]; let children; // [Optional, Optional, Optional, Optional]
let split_info: SplitAxes; let split_info: SplitAxes;
match *child_l { match *child_l {
BVHBaseNode::Internal { BVHBaseNode::Internal {
@ -234,13 +216,23 @@ impl<'a> BVH4<'a> {
} => { } => {
// Four nodes // Four nodes
child_count = 4; child_count = 4;
child_indices = [i_l.0, i_l.1, i_r.0, i_r.1]; children = [
Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1]),
];
split_info = SplitAxes::Full((split_axis, s_l, s_r)); split_info = SplitAxes::Full((split_axis, s_l, s_r));
} }
BVHBaseNode::Leaf { .. } => { BVHBaseNode::Leaf { .. } => {
// Three nodes with left split // Three nodes with left split
child_count = 3; child_count = 3;
child_indices = [i_l.0, i_l.1, children_indices.1, 0]; children = [
Some(&base.nodes[i_l.0]),
Some(&base.nodes[i_l.1]),
Some(child_r),
None,
];
split_info = SplitAxes::Left((split_axis, s_l)); split_info = SplitAxes::Left((split_axis, s_l));
} }
} }
@ -254,76 +246,112 @@ impl<'a> BVH4<'a> {
} => { } => {
// Three nodes with right split // Three nodes with right split
child_count = 3; child_count = 3;
child_indices = [children_indices.0, i_r.0, i_r.1, 0]; children = [
Some(child_l),
Some(&base.nodes[i_r.0]),
Some(&base.nodes[i_r.1]),
None,
];
split_info = SplitAxes::Right((split_axis, s_r)); split_info = SplitAxes::Right((split_axis, s_r));
} }
BVHBaseNode::Leaf { .. } => { BVHBaseNode::Leaf { .. } => {
// Two nodes // Two nodes
child_count = 2; child_count = 2;
child_indices = [children_indices.0, children_indices.1, 0, 0]; children = [Some(child_l), Some(child_r), None, None];
split_info = SplitAxes::TopOnly(split_axis); split_info = SplitAxes::TopOnly(split_axis);
} }
} }
} }
} }
// Copy bounds node_count += child_count;
let bounds = arena
.copy_slice_with_alignment(&base.bounds[bounds_range.0..bounds_range.1], 32);
// Build children // Construct bounds
let children_mem = unsafe { let bounds = {
let bounds_len = children
.iter()
.map(|c| {
if let &Some(n) = c {
let len = n.bounds_range().1 - n.bounds_range().0;
debug_assert!(len >= 1);
len
} else {
0
}
})
.max()
.unwrap();
debug_assert!(bounds_len >= 1);
let bounds =
unsafe { arena.alloc_array_uninitialized_with_alignment(bounds_len, 32) };
if bounds_len < 2 {
let b1 =
children[0].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b2 =
children[1].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b3 =
children[2].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
let b4 =
children[3].map_or(BBox::new(), |c| base.bounds[c.bounds_range().0]);
bounds[0] = BBox4::from_bboxes(b1, b2, b3, b4);
} else {
for (i, b) in bounds.iter_mut().enumerate() {
let time = i as f32 / (bounds_len - 1) as f32;
let b1 = children[0].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b2 = children[1].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b3 = children[2].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
let b4 = children[3].map_or(BBox::new(), |c| {
let (x, y) = c.bounds_range();
lerp_slice(&base.bounds[x..y], time)
});
*b = BBox4::from_bboxes(b1, b2, b3, b4);
}
}
bounds
};
// Construct child nodes
let child_nodes = unsafe {
arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32) arena.alloc_array_uninitialized_with_alignment::<BVH4Node>(child_count, 32)
}; };
for i in 0..child_count { for (i, c) in children[0..child_count].iter().enumerate() {
BVH4::construct_from_base(arena, base, child_indices[i], &mut children_mem[i]); node_count +=
BVH4::construct_from_base(arena, base, c.unwrap(), &mut child_nodes[i]);
} }
// Fill in node // Build this node
*node_mem = BVH4Node::Inner { *fill_node = BVH4Node::Internal {
bounds: bounds,
children: child_nodes,
traversal_code: calc_traversal_code(split_info), traversal_code: calc_traversal_code(split_info),
bounds_start: &bounds[0],
bounds_len: bounds.len() as u16,
children: children_mem,
}; };
} }
BVHBaseNode::Leaf { // Create internal node
bounds_range, &BVHBaseNode::Leaf { object_range, .. } => {
object_range, *fill_node = BVH4Node::Leaf {
} => {
let bounds = arena.copy_slice(&base.bounds[bounds_range.0..bounds_range.1]);
*node_mem = BVH4Node::Leaf {
bounds_start: &bounds[0],
bounds_len: bounds.len() as u16,
object_range: object_range, object_range: object_range,
}; };
node_count += 1;
} }
} }
}
}
lazy_static! { return node_count;
static ref DEGENERATE_BOUNDS: [BBox; 1] = [BBox::new()]; }
} }
impl<'a> Boundable for BVH4<'a> { impl<'a> Boundable for BVH4<'a> {
fn bounds(&self) -> &[BBox] { fn bounds<'b>(&'b self) -> &'b [BBox] {
match self.root { self._bounds.unwrap_or(&[])
None => &DEGENERATE_BOUNDS[..],
Some(root) => match *root {
BVH4Node::Inner {
bounds_start,
bounds_len,
..
}
| BVH4Node::Leaf {
bounds_start,
bounds_len,
..
} => unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) },
},
}
} }
} }

View File

@ -1,4 +1,4 @@
mod bvh; // mod bvh;
mod bvh4; mod bvh4;
mod bvh_base; mod bvh_base;
mod light_array; mod light_array;
@ -13,15 +13,14 @@ use crate::{
}; };
pub use self::{ pub use self::{
bvh::{BVHNode, BVH}, // bvh::{BVHNode, BVH},
bvh4::{BVH4Node, BVH4}, bvh4::{ray_code, BVH4Node, BVH4},
light_array::LightArray, light_array::LightArray,
light_tree::LightTree, light_tree::LightTree,
}; };
// Track BVH traversal time // Track BVH traversal time
thread_local! { thread_local! {
pub static ACCEL_TRAV_TIME: Cell<f64> = Cell::new(0.0);
pub static ACCEL_NODE_RAY_TESTS: Cell<u64> = Cell::new(0); pub static ACCEL_NODE_RAY_TESTS: Cell<u64> = Cell::new(0);
} }

View File

@ -7,8 +7,7 @@ use std::{
use crate::{ use crate::{
lerp::{lerp, lerp_slice, Lerp}, lerp::{lerp, lerp_slice, Lerp},
math::{fast_minf32, Matrix4x4, Point}, math::{fast_minf32, Matrix4x4, Point, Vector},
ray::AccelRay,
}; };
const BBOX_MAXT_ADJUST: f32 = 1.000_000_24; const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
@ -40,17 +39,17 @@ impl BBox {
} }
// Returns whether the given ray intersects with the bbox. // Returns whether the given ray intersects with the bbox.
pub fn intersect_accel_ray(&self, ray: &AccelRay) -> bool { pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> bool {
// Calculate slab intersections // Calculate slab intersections
let t1 = (self.min.co - ray.orig.co) * ray.dir_inv.co; let t1 = (self.min.co - orig.co) * dir_inv.co;
let t2 = (self.max.co - ray.orig.co) * ray.dir_inv.co; let t2 = (self.max.co - orig.co) * dir_inv.co;
// Find the far and near intersection // Find the far and near intersection
let mut far_t = t1.v_max(t2); let mut far_t = t1.v_max(t2);
let mut near_t = t1.v_min(t2); let mut near_t = t1.v_min(t2);
far_t.set_3(std::f32::INFINITY); far_t.set_3(std::f32::INFINITY);
near_t.set_3(0.0); near_t.set_3(0.0);
let far_hit_t = fast_minf32(far_t.h_min() * BBOX_MAXT_ADJUST, ray.max_t); let far_hit_t = fast_minf32(far_t.h_min() * BBOX_MAXT_ADJUST, max_t);
let near_hit_t = near_t.h_max(); let near_hit_t = near_t.h_max();
// Did we hit? // Did we hit?

139
src/bbox4.rs Normal file
View File

@ -0,0 +1,139 @@
#![allow(dead_code)]
use std;
use std::ops::{BitOr, BitOrAssign};
use crate::{
bbox::BBox,
lerp::{lerp, Lerp},
math::{Point, Vector},
};
use float4::{Bool4, Float4};
const BBOX_MAXT_ADJUST: f32 = 1.00000024;
/// A SIMD set of 4 3D axis-aligned bounding boxes.
#[derive(Debug, Copy, Clone)]
pub struct BBox4 {
pub x: (Float4, Float4), // (min, max)
pub y: (Float4, Float4), // (min, max)
pub z: (Float4, Float4), // (min, max)
}
impl BBox4 {
/// Creates a degenerate BBox with +infinity min and -infinity max.
pub fn new() -> BBox4 {
BBox4 {
x: (
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
y: (
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
z: (
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
}
}
/// Creates a BBox with min as the minimum extent and max as the maximum
/// extent.
pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 {
BBox4 {
x: (
Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()),
),
y: (
Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()),
),
z: (
Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()),
),
}
}
// Returns whether the given ray intersects with the bboxes.
pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> Bool4 {
// Get the ray data into SIMD format.
let ro_x = orig.co.all_0();
let ro_y = orig.co.all_1();
let ro_z = orig.co.all_2();
let rdi_x = dir_inv.co.all_0();
let rdi_y = dir_inv.co.all_1();
let rdi_z = dir_inv.co.all_2();
let max_t = Float4::splat(max_t);
// Slab tests
let t1_x = (self.x.0 - ro_x) * rdi_x;
let t1_y = (self.y.0 - ro_y) * rdi_y;
let t1_z = (self.z.0 - ro_z) * rdi_z;
let t2_x = (self.x.1 - ro_x) * rdi_x;
let t2_y = (self.y.1 - ro_y) * rdi_y;
let t2_z = (self.z.1 - ro_z) * rdi_z;
// Get the far and near t hits for each axis.
let t_far_x = t1_x.v_max(t2_x);
let t_far_y = t1_y.v_max(t2_y);
let t_far_z = t1_z.v_max(t2_z);
let t_near_x = t1_x.v_min(t2_x);
let t_near_y = t1_y.v_min(t2_y);
let t_near_z = t1_z.v_min(t2_z);
// Calculate over-all far t hit.
let far_t =
(t_far_x.v_min(t_far_y.v_min(t_far_z)) * Float4::splat(BBOX_MAXT_ADJUST)).v_min(max_t);
// Calculate over-all near t hit.
let near_t = t_near_x
.v_max(t_near_y)
.v_max(t_near_z.v_max(Float4::splat(0.0)));
// Hit results
near_t.lt(far_t)
}
}
/// Union of two BBoxes.
impl BitOr for BBox4 {
type Output = BBox4;
fn bitor(self, rhs: BBox4) -> BBox4 {
BBox4 {
x: (self.x.0.v_min(rhs.x.0), self.x.1.v_max(rhs.x.1)),
y: (self.y.0.v_min(rhs.y.0), self.y.1.v_max(rhs.y.1)),
z: (self.z.0.v_min(rhs.z.0), self.z.1.v_max(rhs.z.1)),
}
}
}
impl BitOrAssign for BBox4 {
fn bitor_assign(&mut self, rhs: BBox4) {
*self = *self | rhs;
}
}
impl Lerp for BBox4 {
fn lerp(self, other: BBox4, alpha: f32) -> BBox4 {
BBox4 {
x: (
lerp(self.x.0, other.x.0, alpha),
lerp(self.x.1, other.x.1, alpha),
),
y: (
lerp(self.y.0, other.y.0, alpha),
lerp(self.y.1, other.y.1, alpha),
),
z: (
lerp(self.z.0, other.z.0, alpha),
lerp(self.z.1, other.z.1, alpha),
),
}
}
}

View File

@ -92,6 +92,12 @@ impl<'a> Camera<'a> {
) )
.normalized(); .normalized();
Ray::new(orig * transform, dir * transform, time, wavelength, false) Ray {
orig: orig * transform,
dir: dir * transform,
time: time,
wavelength: wavelength,
max_t: std::f32::INFINITY,
}
} }
} }

View File

@ -6,7 +6,7 @@ use crate::{
color::{Color, SpectralSample}, color::{Color, SpectralSample},
lerp::lerp_slice, lerp::lerp_slice,
math::{cross, dot, Matrix4x4, Normal, Point, Vector}, math::{cross, dot, Matrix4x4, Normal, Point, Vector},
ray::{AccelRay, Ray}, ray::{RayBatch, RayStack},
sampling::{ sampling::{
spherical_triangle_solid_angle, triangle_surface_area, uniform_sample_spherical_triangle, spherical_triangle_solid_angle, triangle_surface_area, uniform_sample_spherical_triangle,
uniform_sample_triangle, uniform_sample_triangle,
@ -257,20 +257,23 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
impl<'a> Surface for RectangleLight<'a> { impl<'a> Surface for RectangleLight<'a> {
fn intersect_rays( fn intersect_rays(
&self, &self,
accel_rays: &mut [AccelRay], rays: &mut RayBatch,
wrays: &[Ray], ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection], isects: &mut [SurfaceIntersection],
shader: &SurfaceShader, shader: &SurfaceShader,
space: &[Matrix4x4], space: &[Matrix4x4],
) { ) {
let _ = shader; // Silence 'unused' warning let _ = shader; // Silence 'unused' warning
for r in accel_rays.iter_mut() { ray_stack.pop_do_next_task(|ray_idx| {
let wr = &wrays[r.id as usize]; let time = rays.time(ray_idx);
let orig = rays.orig(ray_idx);
let dir = rays.dir(ray_idx);
let max_t = rays.max_t(ray_idx);
// Calculate time interpolated values // Calculate time interpolated values
let dim = lerp_slice(self.dimensions, r.time); let dim = lerp_slice(self.dimensions, time);
let xform = lerp_slice(space, r.time); let xform = lerp_slice(space, time);
let space_inv = xform.inverse(); let space_inv = xform.inverse();
@ -281,18 +284,19 @@ impl<'a> Surface for RectangleLight<'a> {
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv; let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
// Test against two triangles that make up the light // Test against two triangles that make up the light
let ray_pre = triangle::RayTriPrecompute::new(dir);
for tri in &[(p1, p2, p3), (p3, p4, p1)] { for tri in &[(p1, p2, p3), (p3, p4, p1)] {
if let Some((t, b0, b1, b2)) = triangle::intersect_ray(wr, *tri) { if let Some((t, b0, b1, b2)) = triangle::intersect_ray(orig, ray_pre, max_t, *tri) {
if t < r.max_t { if t < max_t {
if r.is_occlusion() { if rays.is_occlusion(ray_idx) {
isects[r.id as usize] = SurfaceIntersection::Occlude; isects[ray_idx] = SurfaceIntersection::Occlude;
r.mark_done(); rays.mark_done(ray_idx);
} else { } else {
let (pos, pos_err) = triangle::surface_point(*tri, (b0, b1, b2)); let (pos, pos_err) = triangle::surface_point(*tri, (b0, b1, b2));
let normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); let normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal();
let intersection_data = SurfaceIntersectionData { let intersection_data = SurfaceIntersectionData {
incoming: wr.dir, incoming: dir,
t: t, t: t,
pos: pos, pos: pos,
pos_err: pos_err, pos_err: pos_err,
@ -301,35 +305,35 @@ impl<'a> Surface for RectangleLight<'a> {
local_space: xform, local_space: xform,
sample_pdf: self.sample_pdf( sample_pdf: self.sample_pdf(
&xform, &xform,
wr.orig, orig,
wr.dir, dir,
pos, pos,
wr.wavelength, rays.wavelength(ray_idx),
r.time, time,
), ),
}; };
let closure = { let closure = {
let inv_surface_area = (1.0 / (dim.0 as f64 * dim.1 as f64)) as f32; let inv_surface_area = (1.0 / (dim.0 as f64 * dim.1 as f64)) as f32;
let color = lerp_slice(self.colors, r.time) * inv_surface_area; let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color) SurfaceClosure::Emit(color)
}; };
// Fill in intersection // Fill in intersection
isects[r.id as usize] = SurfaceIntersection::Hit { isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data, intersection_data: intersection_data,
closure: closure, closure: closure,
}; };
// Set ray's max t // Set ray's max t
r.max_t = t; rays.set_max_t(ray_idx, t);
} }
break; break;
} }
} }
} }
} });
} }
} }

View File

@ -8,7 +8,7 @@ use crate::{
color::{Color, SpectralSample}, color::{Color, SpectralSample},
lerp::lerp_slice, lerp::lerp_slice,
math::{coordinate_system_from_vector, dot, Matrix4x4, Normal, Point, Vector}, math::{coordinate_system_from_vector, dot, Matrix4x4, Normal, Point, Vector},
ray::{AccelRay, Ray}, ray::{RayBatch, RayStack},
sampling::{uniform_sample_cone, uniform_sample_cone_pdf, uniform_sample_sphere}, sampling::{uniform_sample_cone, uniform_sample_cone_pdf, uniform_sample_sphere},
shading::surface_closure::SurfaceClosure, shading::surface_closure::SurfaceClosure,
shading::SurfaceShader, shading::SurfaceShader,
@ -206,26 +206,26 @@ impl<'a> SurfaceLight for SphereLight<'a> {
impl<'a> Surface for SphereLight<'a> { impl<'a> Surface for SphereLight<'a> {
fn intersect_rays( fn intersect_rays(
&self, &self,
accel_rays: &mut [AccelRay], rays: &mut RayBatch,
wrays: &[Ray], ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection], isects: &mut [SurfaceIntersection],
shader: &SurfaceShader, shader: &SurfaceShader,
space: &[Matrix4x4], space: &[Matrix4x4],
) { ) {
let _ = shader; // Silence 'unused' warning let _ = shader; // Silence 'unused' warning
for r in accel_rays.iter_mut() { ray_stack.pop_do_next_task(|ray_idx| {
let wr = &wrays[r.id as usize]; let time = rays.time(ray_idx);
// Get the transform space // Get the transform space
let xform = lerp_slice(space, r.time); let xform = lerp_slice(space, time);
// Get the radius of the sphere at the ray's time // Get the radius of the sphere at the ray's time
let radius = lerp_slice(self.radii, r.time); // Radius of the sphere let radius = lerp_slice(self.radii, time); // Radius of the sphere
// Get the ray origin and direction in local space // Get the ray origin and direction in local space
let orig = r.orig.into_vector(); let orig = rays.orig(ray_idx).into_vector();
let dir = wr.dir * xform; let dir = rays.dir(ray_idx) * xform;
// Code adapted to Rust from https://github.com/Tecla/Rayito // Code adapted to Rust from https://github.com/Tecla/Rayito
// Ray-sphere intersection can result in either zero, one or two points // Ray-sphere intersection can result in either zero, one or two points
@ -242,7 +242,7 @@ impl<'a> Surface for SphereLight<'a> {
let discriminant = (b * b) - (4.0 * a * c); let discriminant = (b * b) - (4.0 * a * c);
if discriminant < 0.0 { if discriminant < 0.0 {
// Discriminant less than zero? No solution => no intersection. // Discriminant less than zero? No solution => no intersection.
continue; return;
} }
let discriminant = discriminant.sqrt(); let discriminant = discriminant.sqrt();
@ -257,7 +257,7 @@ impl<'a> Surface for SphereLight<'a> {
// Get our final parametric values // Get our final parametric values
let mut t0 = q / a; let mut t0 = q / a;
let mut t1 = if q != 0.0 { c / q } else { r.max_t }; let mut t1 = if q != 0.0 { c / q } else { rays.max_t(ray_idx) };
// Swap them so they are ordered right // Swap them so they are ordered right
if t0 > t1 { if t0 > t1 {
@ -266,25 +266,25 @@ impl<'a> Surface for SphereLight<'a> {
} }
// Check our intersection for validity against this ray's extents // Check our intersection for validity against this ray's extents
if t0 > r.max_t || t1 <= 0.0 { if t0 > rays.max_t(ray_idx) || t1 <= 0.0 {
// Didn't hit because shere is entirely outside of ray's extents // Didn't hit because sphere is entirely outside of ray's extents
continue; return;
} }
let t = if t0 > 0.0 { let t = if t0 > 0.0 {
t0 t0
} else if t1 <= r.max_t { } else if t1 <= rays.max_t(ray_idx) {
t1 t1
} else { } else {
// Didn't hit because ray is entirely within the sphere, and // Didn't hit because ray is entirely within the sphere, and
// therefore doesn't hit its surface. // therefore doesn't hit its surface.
continue; return;
}; };
// We hit the sphere, so calculate intersection info. // We hit the sphere, so calculate intersection info.
if r.is_occlusion() { if rays.is_occlusion(ray_idx) {
isects[r.id as usize] = SurfaceIntersection::Occlude; isects[ray_idx] = SurfaceIntersection::Occlude;
r.mark_done(); rays.mark_done(ray_idx);
} else { } else {
let inv_xform = xform.inverse(); let inv_xform = xform.inverse();
@ -300,7 +300,7 @@ impl<'a> Surface for SphereLight<'a> {
let normal = unit_pos.into_normal() * inv_xform; let normal = unit_pos.into_normal() * inv_xform;
let intersection_data = SurfaceIntersectionData { let intersection_data = SurfaceIntersectionData {
incoming: wr.dir, incoming: rays.dir(ray_idx),
t: t, t: t,
pos: pos, pos: pos,
pos_err: pos_err, pos_err: pos_err,
@ -309,32 +309,32 @@ impl<'a> Surface for SphereLight<'a> {
local_space: xform, local_space: xform,
sample_pdf: self.sample_pdf( sample_pdf: self.sample_pdf(
&xform, &xform,
wr.orig, rays.orig(ray_idx),
wr.dir, rays.dir(ray_idx),
0.0, 0.0,
0.0, 0.0,
wr.wavelength, rays.wavelength(ray_idx),
r.time, time,
), ),
}; };
let closure = { let closure = {
let inv_surface_area = let inv_surface_area =
(1.0 / (4.0 * PI_64 * radius as f64 * radius as f64)) as f32; (1.0 / (4.0 * PI_64 * radius as f64 * radius as f64)) as f32;
let color = lerp_slice(self.colors, r.time) * inv_surface_area; let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color) SurfaceClosure::Emit(color)
}; };
// Fill in intersection // Fill in intersection
isects[r.id as usize] = SurfaceIntersection::Hit { isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data, intersection_data: intersection_data,
closure: closure, closure: closure,
}; };
// Set ray's max t // Set ray's max t
r.max_t = t; rays.set_max_t(ray_idx, t);
}
} }
});
} }
} }

View File

@ -11,12 +11,12 @@
#![allow(clippy::needless_range_loop)] #![allow(clippy::needless_range_loop)]
#![allow(clippy::excessive_precision)] #![allow(clippy::excessive_precision)]
#[macro_use]
extern crate lazy_static; extern crate lazy_static;
mod accel; mod accel;
mod algorithm; mod algorithm;
mod bbox; mod bbox;
mod bbox4;
mod boundable; mod boundable;
mod camera; mod camera;
mod color; mod color;
@ -47,10 +47,9 @@ use nom::{error_position, take_until};
use mem_arena::MemArena; use mem_arena::MemArena;
use crate::{ use crate::{
accel::{BVH4Node, BVHNode}, accel::BVH4Node,
bbox::BBox, bbox::BBox,
parse::{parse_scene, DataTree}, parse::{parse_scene, DataTree},
ray::{AccelRay, Ray},
renderer::LightPath, renderer::LightPath,
surface::SurfaceIntersection, surface::SurfaceIntersection,
timer::Timer, timer::Timer,
@ -159,15 +158,13 @@ fn main() {
// Print some misc useful dev info. // Print some misc useful dev info.
if args.is_present("dev") { if args.is_present("dev") {
println!("Ray size: {} bytes", mem::size_of::<Ray>());
println!("AccelRay size: {} bytes", mem::size_of::<AccelRay>());
println!( println!(
"SurfaceIntersection size: {} bytes", "SurfaceIntersection size: {} bytes",
mem::size_of::<SurfaceIntersection>() mem::size_of::<SurfaceIntersection>()
); );
println!("LightPath size: {} bytes", mem::size_of::<LightPath>()); println!("LightPath size: {} bytes", mem::size_of::<LightPath>());
println!("BBox size: {} bytes", mem::size_of::<BBox>()); println!("BBox size: {} bytes", mem::size_of::<BBox>());
println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>()); // println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>());
println!("BVH4Node size: {} bytes", mem::size_of::<BVH4Node>()); println!("BVH4Node size: {} bytes", mem::size_of::<BVH4Node>());
return; return;
} }
@ -295,9 +292,10 @@ fn main() {
"\t\tTrace: {:.3}s", "\t\tTrace: {:.3}s",
ntime * rstats.trace_time ntime * rstats.trace_time
); );
println!("\t\t\tRays traced: {}", rstats.ray_count);
println!( println!(
"\t\t\tTraversal: {:.3}s", "\t\t\tRays/sec: {}",
ntime * rstats.accel_traversal_time (rstats.ray_count as f64 / (ntime * rstats.trace_time) as f64) as u64
); );
println!("\t\t\tRay/node tests: {}", rstats.accel_node_visits); println!("\t\t\tRay/node tests: {}", rstats.accel_node_visits);
println!( println!(

View File

@ -1,102 +1,401 @@
#![allow(dead_code)] #![allow(dead_code)]
use float4::Float4; use float4::{Bool4, Float4};
use crate::math::{Matrix4x4, Point, Vector}; use crate::math::{Matrix4x4, Point, Vector};
const OCCLUSION_FLAG: u32 = 1; type RayIndexType = u16;
const DONE_FLAG: u32 = 1 << 1; type FlagType = u8;
const OCCLUSION_FLAG: FlagType = 1;
const DONE_FLAG: FlagType = 1 << 1;
/// This is never used directly in ray tracing--it's only used as a convenience
/// for filling the RayBatch structure.
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Ray { pub struct Ray {
pub orig: Point, pub orig: Point,
pub dir: Vector, pub dir: Vector,
pub max_t: f32,
pub time: f32, pub time: f32,
pub wavelength: f32, pub wavelength: f32,
pub flags: u32,
}
impl Ray {
pub fn new(orig: Point, dir: Vector, time: f32, wavelength: f32, is_occ: bool) -> Ray {
if !is_occ {
Ray {
orig: orig,
dir: dir,
max_t: std::f32::INFINITY,
time: time,
wavelength: wavelength,
flags: 0,
}
} else {
Ray {
orig: orig,
dir: dir,
max_t: 1.0,
time: time,
wavelength: wavelength,
flags: OCCLUSION_FLAG,
}
}
}
pub fn transform(&mut self, mat: &Matrix4x4) {
self.orig = self.orig * *mat;
self.dir = self.dir * *mat;
}
pub fn is_occlusion(&self) -> bool {
(self.flags & OCCLUSION_FLAG) != 0
}
}
#[derive(Debug, Copy, Clone)]
pub struct AccelRay {
pub orig: Point,
pub dir_inv: Vector,
pub max_t: f32, pub max_t: f32,
pub time: f32,
pub flags: u32,
pub id: u32,
} }
impl AccelRay { /// The hot (frequently accessed) parts of ray data.
pub fn new(ray: &Ray, id: u32) -> AccelRay { #[derive(Debug, Copy, Clone)]
AccelRay { struct RayHot {
orig: ray.orig, orig_local: Point, // Local-space ray origin
dir_inv: Vector { dir_inv_local: Vector, // Local-space 1.0/ray direction
co: Float4::splat(1.0) / ray.dir.co, max_t: f32,
}, time: f32,
flags: FlagType,
}
/// The cold (infrequently accessed) parts of ray data.
#[derive(Debug, Copy, Clone)]
struct RayCold {
orig: Point, // World-space ray origin
dir: Vector, // World-space ray direction
wavelength: f32,
}
/// A batch of rays, separated into hot and cold parts.
#[derive(Debug)]
pub struct RayBatch {
hot: Vec<RayHot>,
cold: Vec<RayCold>,
}
impl RayBatch {
/// Creates a new empty ray batch.
pub fn new() -> RayBatch {
RayBatch {
hot: Vec::new(),
cold: Vec::new(),
}
}
/// Creates a new empty ray batch, with pre-allocated capacity for
/// `n` rays.
pub fn with_capacity(n: usize) -> RayBatch {
RayBatch {
hot: Vec::with_capacity(n),
cold: Vec::with_capacity(n),
}
}
pub fn push(&mut self, ray: Ray, is_occlusion: bool) {
self.hot.push(RayHot {
orig_local: ray.orig, // Bogus, to place-hold.
dir_inv_local: ray.dir, // Bogus, to place-hold.
max_t: ray.max_t, max_t: ray.max_t,
time: ray.time, time: ray.time,
flags: ray.flags, flags: if is_occlusion { OCCLUSION_FLAG } else { 0 },
id: id, });
} self.cold.push(RayCold {
orig: ray.orig,
dir: ray.dir,
wavelength: ray.wavelength,
});
} }
pub fn update_from_world_ray(&mut self, wr: &Ray) { pub fn swap(&mut self, a: usize, b: usize) {
self.orig = wr.orig; self.hot.swap(a, b);
self.dir_inv = Vector { self.cold.swap(a, b);
co: Float4::splat(1.0) / wr.dir.co, }
pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) {
self.hot[idx].orig_local = ray.orig;
self.hot[idx].dir_inv_local = Vector {
co: Float4::splat(1.0) / ray.dir.co,
};
self.hot[idx].max_t = ray.max_t;
self.hot[idx].time = ray.time;
self.hot[idx].flags = if is_occlusion { OCCLUSION_FLAG } else { 0 };
self.cold[idx].orig = ray.orig;
self.cold[idx].dir = ray.dir;
self.cold[idx].wavelength = ray.wavelength;
}
pub fn truncate(&mut self, len: usize) {
self.hot.truncate(len);
self.cold.truncate(len);
}
/// Clear all rays, settings the size of the batch back to zero.
///
/// Capacity is maintained.
pub fn clear(&mut self) {
self.hot.clear();
self.cold.clear();
}
pub fn len(&self) -> usize {
self.hot.len()
}
/// Updates the accel data of the given ray (at index `idx`) with the
/// given world-to-local-space transform matrix.
///
/// This should be called when entering (and exiting) traversal of a
/// new transform space.
pub fn update_local(&mut self, idx: usize, xform: &Matrix4x4) {
self.hot[idx].orig_local = self.cold[idx].orig * *xform;
self.hot[idx].dir_inv_local = Vector {
co: Float4::splat(1.0) / (self.cold[idx].dir * *xform).co,
}; };
} }
pub fn update_from_xformed_world_ray(&mut self, wr: &Ray, mat: &Matrix4x4) { //==========================================================
self.orig = wr.orig * *mat; // Data access
self.dir_inv = Vector {
co: Float4::splat(1.0) / (wr.dir * *mat).co, #[inline(always)]
}; pub fn orig(&self, idx: usize) -> Point {
self.cold[idx].orig
} }
pub fn is_occlusion(&self) -> bool { #[inline(always)]
(self.flags & OCCLUSION_FLAG) != 0 pub fn dir(&self, idx: usize) -> Vector {
self.cold[idx].dir
} }
pub fn is_done(&self) -> bool { #[inline(always)]
(self.flags & DONE_FLAG) != 0 pub fn orig_local(&self, idx: usize) -> Point {
self.hot[idx].orig_local
} }
pub fn mark_done(&mut self) { #[inline(always)]
self.flags |= DONE_FLAG; pub fn dir_inv_local(&self, idx: usize) -> Vector {
self.hot[idx].dir_inv_local
}
#[inline(always)]
pub fn time(&self, idx: usize) -> f32 {
self.hot[idx].time
}
#[inline(always)]
pub fn max_t(&self, idx: usize) -> f32 {
self.hot[idx].max_t
}
#[inline(always)]
pub fn set_max_t(&mut self, idx: usize, new_max_t: f32) {
self.hot[idx].max_t = new_max_t;
}
#[inline(always)]
pub fn wavelength(&self, idx: usize) -> f32 {
self.cold[idx].wavelength
}
/// Returns whether the given ray (at index `idx`) is an occlusion ray.
#[inline(always)]
pub fn is_occlusion(&self, idx: usize) -> bool {
(self.hot[idx].flags & OCCLUSION_FLAG) != 0
}
/// Returns whether the given ray (at index `idx`) has finished traversal.
#[inline(always)]
pub fn is_done(&self, idx: usize) -> bool {
(self.hot[idx].flags & DONE_FLAG) != 0
}
/// Marks the given ray (at index `idx`) as an occlusion ray.
#[inline(always)]
pub fn mark_occlusion(&mut self, idx: usize) {
self.hot[idx].flags |= OCCLUSION_FLAG
}
/// Marks the given ray (at index `idx`) as having finished traversal.
#[inline(always)]
pub fn mark_done(&mut self, idx: usize) {
self.hot[idx].flags |= DONE_FLAG
} }
} }
/// A structure used for tracking traversal of a ray batch through a scene.
#[derive(Debug)]
pub struct RayStack {
lanes: Vec<Lane>,
tasks: Vec<RayTask>,
}
impl RayStack {
pub fn new() -> RayStack {
RayStack {
lanes: Vec::new(),
tasks: Vec::new(),
}
}
/// Returns whether the stack is empty of tasks or not.
pub fn is_empty(&self) -> bool {
self.tasks.is_empty()
}
/// Makes sure there are at least `count` lanes.
pub fn ensure_lane_count(&mut self, count: usize) {
while self.lanes.len() < count {
self.lanes.push(Lane {
idxs: Vec::new(),
end_len: 0,
})
}
}
pub fn ray_count_in_next_task(&self) -> usize {
let task = self.tasks.last().unwrap();
let end = self.lanes[task.lane].end_len;
end - task.start_idx
}
pub fn next_task_ray_idx(&self, i: usize) -> usize {
let task = self.tasks.last().unwrap();
let i = i + task.start_idx;
debug_assert!(i < self.lanes[task.lane].end_len);
self.lanes[task.lane].idxs[i] as usize
}
/// Clears the lanes and tasks of the RayStack.
///
/// Note: this is (importantly) different than calling clear individually
/// on the `lanes` and `tasks` members. Specifically, we don't want to
/// clear `lanes` itself, as that would also free all the memory of the
/// individual lanes. Instead, we want to iterate over the individual
/// lanes and clear them, but leave `lanes` itself untouched.
pub fn clear(&mut self) {
for lane in self.lanes.iter_mut() {
lane.idxs.clear();
lane.end_len = 0;
}
self.tasks.clear();
}
/// Pushes the given ray index onto the end of the specified lane.
pub fn push_ray_index(&mut self, ray_idx: usize, lane: usize) {
assert!(self.lanes.len() > lane);
self.lanes[lane].idxs.push(ray_idx as RayIndexType);
}
/// Pushes any excess indices on the given lane to a new task on the
/// task stack.
///
/// Returns whether a task was pushed or not. No task will be pushed
/// if there are no excess indices on the end of the lane.
pub fn push_lane_to_task(&mut self, lane_idx: usize) -> bool {
if self.lanes[lane_idx].end_len < self.lanes[lane_idx].idxs.len() {
self.tasks.push(RayTask {
lane: lane_idx,
start_idx: self.lanes[lane_idx].end_len,
});
self.lanes[lane_idx].end_len = self.lanes[lane_idx].idxs.len();
true
} else {
false
}
}
/// Takes the given list of lane indices, and pushes any excess indices on
/// the end of each into a new task, in the order provided.
pub fn push_lanes_to_tasks(&mut self, lane_idxs: &[usize]) {
for &l in lane_idxs {
self.push_lane_to_task(l);
}
}
pub fn duplicate_next_task(&mut self) {
let task = self.tasks.last().unwrap();
let l = task.lane;
let start = task.start_idx;
let end = self.lanes[l].end_len;
// Extend the indices vector
self.lanes[l].idxs.reserve(end - start);
let old_len = self.lanes[l].idxs.len();
let new_len = old_len + end - start;
unsafe {
self.lanes[l].idxs.set_len(new_len);
}
// Copy elements
copy_in_place::copy_in_place(&mut self.lanes[l].idxs, start..end, end);
// Push the new task onto the stack
self.tasks.push(RayTask {
lane: l,
start_idx: end,
});
self.lanes[l].end_len = self.lanes[l].idxs.len();
}
// Pops the next task off the stack.
pub fn pop_task(&mut self) {
let task = self.tasks.pop().unwrap();
self.lanes[task.lane].end_len = task.start_idx;
self.lanes[task.lane].idxs.truncate(task.start_idx);
}
// Executes a task without popping it from the task stack.
pub fn do_next_task<F>(&mut self, mut handle_ray: F)
where
F: FnMut(usize),
{
let task = self.tasks.last().unwrap();
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
// Execute task.
for i in task_range.0..task_range.1 {
let ray_idx = self.lanes[task.lane].idxs[i];
handle_ray(ray_idx as usize);
}
}
/// Pops the next task off the stack, and executes the provided closure for
/// each ray index in the task.
#[inline(always)]
pub fn pop_do_next_task<F>(&mut self, handle_ray: F)
where
F: FnMut(usize),
{
self.do_next_task(handle_ray);
self.pop_task();
}
/// Pops the next task off the stack, executes the provided closure for
/// each ray index in the task, and pushes the ray indices back onto the
/// indicated lanes.
pub fn pop_do_next_task_and_push_rays<F>(&mut self, output_lane_count: usize, mut handle_ray: F)
where
F: FnMut(usize) -> Bool4,
{
// Pop the task and do necessary bookkeeping.
let task = self.tasks.pop().unwrap();
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
self.lanes[task.lane].end_len = task.start_idx;
// SAFETY: this is probably evil, and depends on behavior of Vec that
// are not actually promised. But we're essentially truncating the lane
// to the start of our task range, but will continue to access it's
// elements beyond that range via `get_unchecked()` below. Because the
// memory is not freed nor altered, this is safe. However, again, the
// Vec apis don't promise this behavior. So:
//
// TODO: build a slightly different lane abstraction to get this same
// efficiency without depending on implicit Vec behavior.
unsafe {
self.lanes[task.lane].idxs.set_len(task.start_idx);
}
// Execute task.
for i in task_range.0..task_range.1 {
let ray_idx = *unsafe { self.lanes[task.lane].idxs.get_unchecked(i) };
let push_mask = handle_ray(ray_idx as usize);
for l in 0..output_lane_count {
if push_mask.get_n(l) {
self.lanes[l as usize].idxs.push(ray_idx);
}
}
}
}
}
/// A lane within a RayStack.
#[derive(Debug)]
struct Lane {
idxs: Vec<RayIndexType>,
end_len: usize,
}
/// A task within a RayStack.
//
// Specifies the lane that the relevant ray pointers are in, and the
// starting index within that lane. The relevant pointers are always
// `&[start_idx..]` within the given lane.
#[derive(Debug)]
struct RayTask {
lane: usize,
start_idx: usize,
}

View File

@ -12,8 +12,7 @@ use scoped_threadpool::Pool;
use float4::Float4; use float4::Float4;
use crate::{ use crate::{
accel::{ACCEL_NODE_RAY_TESTS, ACCEL_TRAV_TIME}, accel::ACCEL_NODE_RAY_TESTS,
algorithm::partition_pair,
color::{map_0_1_to_wavelength, SpectralSample, XYZ}, color::{map_0_1_to_wavelength, SpectralSample, XYZ},
fp_utils::robust_ray_origin, fp_utils::robust_ray_origin,
hash::hash_u32, hash::hash_u32,
@ -21,7 +20,7 @@ use crate::{
image::Image, image::Image,
math::{fast_logit, upper_power_of_two}, math::{fast_logit, upper_power_of_two},
mis::power_heuristic, mis::power_heuristic,
ray::Ray, ray::{Ray, RayBatch},
scene::{Scene, SceneLightSample}, scene::{Scene, SceneLightSample},
surface, surface,
timer::Timer, timer::Timer,
@ -41,8 +40,8 @@ pub struct Renderer<'a> {
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct RenderStats { pub struct RenderStats {
pub trace_time: f64, pub trace_time: f64,
pub accel_traversal_time: f64,
pub accel_node_visits: u64, pub accel_node_visits: u64,
pub ray_count: u64,
pub initial_ray_generation_time: f64, pub initial_ray_generation_time: f64,
pub ray_generation_time: f64, pub ray_generation_time: f64,
pub sample_writing_time: f64, pub sample_writing_time: f64,
@ -53,8 +52,8 @@ impl RenderStats {
fn new() -> RenderStats { fn new() -> RenderStats {
RenderStats { RenderStats {
trace_time: 0.0, trace_time: 0.0,
accel_traversal_time: 0.0,
accel_node_visits: 0, accel_node_visits: 0,
ray_count: 0,
initial_ray_generation_time: 0.0, initial_ray_generation_time: 0.0,
ray_generation_time: 0.0, ray_generation_time: 0.0,
sample_writing_time: 0.0, sample_writing_time: 0.0,
@ -64,8 +63,8 @@ impl RenderStats {
fn collect(&mut self, other: RenderStats) { fn collect(&mut self, other: RenderStats) {
self.trace_time += other.trace_time; self.trace_time += other.trace_time;
self.accel_traversal_time += other.accel_traversal_time;
self.accel_node_visits += other.accel_node_visits; self.accel_node_visits += other.accel_node_visits;
self.ray_count += other.ray_count;
self.initial_ray_generation_time += other.initial_ray_generation_time; self.initial_ray_generation_time += other.initial_ray_generation_time;
self.ray_generation_time += other.ray_generation_time; self.ray_generation_time += other.ray_generation_time;
self.sample_writing_time += other.sample_writing_time; self.sample_writing_time += other.sample_writing_time;
@ -207,7 +206,7 @@ impl<'a> Renderer<'a> {
let mut total_timer = Timer::new(); let mut total_timer = Timer::new();
let mut paths = Vec::new(); let mut paths = Vec::new();
let mut rays = Vec::new(); let mut rays = RayBatch::new();
let mut tracer = Tracer::from_assembly(&self.scene.root); let mut tracer = Tracer::from_assembly(&self.scene.root);
let mut xform_stack = TransformStack::new(); let mut xform_stack = TransformStack::new();
@ -266,7 +265,7 @@ impl<'a> Renderer<'a> {
offset + si as u32, offset + si as u32,
); );
paths.push(path); paths.push(path);
rays.push(ray); rays.push(ray, false);
} }
} }
} }
@ -276,13 +275,20 @@ impl<'a> Renderer<'a> {
let mut pi = paths.len(); let mut pi = paths.len();
while pi > 0 { while pi > 0 {
// Test rays against scene // Test rays against scene
let isects = tracer.trace(&rays); let isects = tracer.trace(&mut rays);
stats.trace_time += timer.tick() as f64; stats.trace_time += timer.tick() as f64;
// Determine next rays to shoot based on result // Determine next rays to shoot based on result
pi = partition_pair(&mut paths[..pi], &mut rays[..pi], |i, path, ray| { let mut new_end = 0;
path.next(&mut xform_stack, &self.scene, &isects[i], &mut *ray) for i in 0..pi {
}); if paths[i].next(&mut xform_stack, &self.scene, &isects[i], &mut rays, i) {
paths.swap(new_end, i);
rays.swap(new_end, i);
new_end += 1;
}
}
rays.truncate(new_end);
pi = new_end;
stats.ray_generation_time += timer.tick() as f64; stats.ray_generation_time += timer.tick() as f64;
} }
@ -338,10 +344,7 @@ impl<'a> Renderer<'a> {
} }
stats.total_time += total_timer.tick() as f64; stats.total_time += total_timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| { stats.ray_count = tracer.rays_traced();
stats.accel_traversal_time = att.get();
att.set(0.0);
});
ACCEL_NODE_RAY_TESTS.with(|anv| { ACCEL_NODE_RAY_TESTS.with(|anv| {
stats.accel_node_visits = anv.get(); stats.accel_node_visits = anv.get();
anv.set(0); anv.set(0);
@ -431,7 +434,8 @@ impl LightPath {
xform_stack: &mut TransformStack, xform_stack: &mut TransformStack,
scene: &Scene, scene: &Scene,
isect: &surface::SurfaceIntersection, isect: &surface::SurfaceIntersection,
ray: &mut Ray, rays: &mut RayBatch,
ray_idx: usize,
) -> bool { ) -> bool {
match self.event { match self.event {
//-------------------------------------------------------------------- //--------------------------------------------------------------------
@ -496,13 +500,13 @@ impl LightPath {
// Distant light // Distant light
SceneLightSample::Distant { direction, .. } => { SceneLightSample::Distant { direction, .. } => {
let (attenuation, closure_pdf) = closure.evaluate( let (attenuation, closure_pdf) = closure.evaluate(
ray.dir, rays.dir(ray_idx),
direction, direction,
idata.nor, idata.nor,
idata.nor_g, idata.nor_g,
self.wavelength, self.wavelength,
); );
let mut shadow_ray = { let shadow_ray = {
// Calculate the shadow ray for testing if the light is // Calculate the shadow ray for testing if the light is
// in shadow or not. // in shadow or not.
let offset_pos = robust_ray_origin( let offset_pos = robust_ray_origin(
@ -511,15 +515,14 @@ impl LightPath {
idata.nor_g.normalized(), idata.nor_g.normalized(),
direction, direction,
); );
Ray::new( Ray {
offset_pos, orig: offset_pos,
direction, dir: direction,
self.time, time: self.time,
self.wavelength, wavelength: self.wavelength,
true, max_t: std::f32::INFINITY,
) }
}; };
shadow_ray.max_t = std::f32::INFINITY;
(attenuation, closure_pdf, shadow_ray) (attenuation, closure_pdf, shadow_ray)
} }
@ -527,7 +530,7 @@ impl LightPath {
SceneLightSample::Surface { sample_geo, .. } => { SceneLightSample::Surface { sample_geo, .. } => {
let dir = sample_geo.0 - idata.pos; let dir = sample_geo.0 - idata.pos;
let (attenuation, closure_pdf) = closure.evaluate( let (attenuation, closure_pdf) = closure.evaluate(
ray.dir, rays.dir(ray_idx),
dir, dir,
idata.nor, idata.nor,
idata.nor_g, idata.nor_g,
@ -548,13 +551,13 @@ impl LightPath {
sample_geo.1.normalized(), sample_geo.1.normalized(),
-dir, -dir,
); );
Ray::new( Ray {
offset_pos, orig: offset_pos,
offset_end - offset_pos, dir: offset_end - offset_pos,
self.time, time: self.time,
self.wavelength, wavelength: self.wavelength,
true, max_t: 1.0,
) }
}; };
(attenuation, closure_pdf, shadow_ray) (attenuation, closure_pdf, shadow_ray)
} }
@ -572,7 +575,7 @@ impl LightPath {
light_info.color().e * attenuation.e * self.light_attenuation light_info.color().e * attenuation.e * self.light_attenuation
/ (light_mis_pdf * light_sel_pdf); / (light_mis_pdf * light_sel_pdf);
*ray = shadow_ray; rays.set_from_ray(&shadow_ray, true, ray_idx);
true true
} }
@ -609,8 +612,13 @@ impl LightPath {
idata.nor_g.normalized(), idata.nor_g.normalized(),
dir, dir,
); );
self.next_bounce_ray = self.next_bounce_ray = Some(Ray {
Some(Ray::new(offset_pos, dir, self.time, self.wavelength, false)); orig: offset_pos,
dir: dir,
time: self.time,
wavelength: self.wavelength,
max_t: std::f32::INFINITY,
});
true true
} else { } else {
@ -626,7 +634,7 @@ impl LightPath {
self.event = LightPathEvent::ShadowRay; self.event = LightPathEvent::ShadowRay;
return true; return true;
} else if do_bounce { } else if do_bounce {
*ray = self.next_bounce_ray.unwrap(); rays.set_from_ray(&self.next_bounce_ray.unwrap(), false, ray_idx);
self.event = LightPathEvent::BounceRay; self.event = LightPathEvent::BounceRay;
self.light_attenuation *= self.next_attenuation_fac; self.light_attenuation *= self.next_attenuation_fac;
return true; return true;
@ -657,7 +665,7 @@ impl LightPath {
// Set up for the next bounce, if any // Set up for the next bounce, if any
if let Some(ref nbr) = self.next_bounce_ray { if let Some(ref nbr) = self.next_bounce_ray {
*ray = *nbr; rays.set_from_ray(nbr, false, ray_idx);
self.light_attenuation *= self.next_attenuation_fac; self.light_attenuation *= self.next_attenuation_fac;
self.event = LightPathEvent::BounceRay; self.event = LightPathEvent::BounceRay;
return true; return true;

View File

@ -8,7 +8,7 @@ use crate::{
boundable::Boundable, boundable::Boundable,
lerp::lerp_slice, lerp::lerp_slice,
math::{cross, dot, Matrix4x4, Normal, Point}, math::{cross, dot, Matrix4x4, Normal, Point},
ray::{AccelRay, Ray}, ray::{RayBatch, RayStack, RayTask}
shading::surface_closure::SurfaceClosure, shading::surface_closure::SurfaceClosure,
}; };
@ -99,8 +99,8 @@ impl<'a> MicropolyBatch<'a> {
impl<'a> MicropolyBatch<'a> { impl<'a> MicropolyBatch<'a> {
fn intersect_rays( fn intersect_rays(
&self, &self,
accel_rays: &mut [AccelRay], rays: &mut RayBatch,
wrays: &[Ray], ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection], isects: &mut [SurfaceIntersection],
space: &[Matrix4x4], space: &[Matrix4x4],
) { ) {
@ -112,7 +112,7 @@ impl<'a> MicropolyBatch<'a> {
}; };
self.accel self.accel
.traverse(&mut accel_rays[..], self.indices, |tri_indices, rs| { .traverse(rays, ray_stack, self.indices, |tri_indices, rs| {
// For static triangles with static transforms, cache them. // For static triangles with static transforms, cache them.
let is_cached = self.time_sample_count == 1 && space.len() <= 1; let is_cached = self.time_sample_count == 1 && space.len() <= 1;
let mut tri = if is_cached { let mut tri = if is_cached {

View File

@ -1,6 +1,6 @@
#![allow(dead_code)] #![allow(dead_code)]
pub mod micropoly_batch; // pub mod micropoly_batch;
pub mod triangle; pub mod triangle;
pub mod triangle_mesh; pub mod triangle_mesh;
@ -9,7 +9,7 @@ use std::fmt::Debug;
use crate::{ use crate::{
boundable::Boundable, boundable::Boundable,
math::{Matrix4x4, Normal, Point, Vector}, math::{Matrix4x4, Normal, Point, Vector},
ray::{AccelRay, Ray}, ray::{RayBatch, RayStack},
shading::surface_closure::SurfaceClosure, shading::surface_closure::SurfaceClosure,
shading::SurfaceShader, shading::SurfaceShader,
}; };
@ -17,8 +17,8 @@ use crate::{
pub trait Surface: Boundable + Debug + Sync { pub trait Surface: Boundable + Debug + Sync {
fn intersect_rays( fn intersect_rays(
&self, &self,
accel_rays: &mut [AccelRay], rays: &mut RayBatch,
wrays: &[Ray], ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection], isects: &mut [SurfaceIntersection],
shader: &SurfaceShader, shader: &SurfaceShader,
space: &[Matrix4x4], space: &[Matrix4x4],

View File

@ -1,6 +1,48 @@
#![allow(dead_code)] #![allow(dead_code)]
use crate::{fp_utils::fp_gamma, math::Point, ray::Ray}; use crate::{
fp_utils::fp_gamma,
math::{Point, Vector},
};
#[derive(Debug, Copy, Clone)]
pub struct RayTriPrecompute {
i: (usize, usize, usize),
s: (f32, f32, f32),
}
impl RayTriPrecompute {
pub fn new(ray_dir: Vector) -> RayTriPrecompute {
// Calculate the permuted dimension indices for the new ray space.
let (xi, yi, zi) = {
let xabs = ray_dir.x().abs();
let yabs = ray_dir.y().abs();
let zabs = ray_dir.z().abs();
if xabs > yabs && xabs > zabs {
(1, 2, 0)
} else if yabs > zabs {
(2, 0, 1)
} else {
(0, 1, 2)
}
};
let dir_x = ray_dir.get_n(xi);
let dir_y = ray_dir.get_n(yi);
let dir_z = ray_dir.get_n(zi);
// Calculate shear constants.
let sx = dir_x / dir_z;
let sy = dir_y / dir_z;
let sz = 1.0 / dir_z;
RayTriPrecompute {
i: (xi, yi, zi),
s: (sx, sy, sz),
}
}
}
/// Intersects `ray` with `tri`, returning `Some((t, b0, b1, b2))`, or `None` /// Intersects `ray` with `tri`, returning `Some((t, b0, b1, b2))`, or `None`
/// if no intersection. /// if no intersection.
@ -13,42 +55,23 @@ use crate::{fp_utils::fp_gamma, math::Point, ray::Ray};
/// ///
/// Uses the ray-triangle test from the paper "Watertight Ray/Triangle /// Uses the ray-triangle test from the paper "Watertight Ray/Triangle
/// Intersection" by Woop et al. /// Intersection" by Woop et al.
pub fn intersect_ray(ray: &Ray, tri: (Point, Point, Point)) -> Option<(f32, f32, f32, f32)> { pub fn intersect_ray(
// Calculate the permuted dimension indices for the new ray space. ray_orig: Point,
let (xi, yi, zi) = { ray_pre: RayTriPrecompute,
let xabs = ray.dir.x().abs(); ray_max_t: f32,
let yabs = ray.dir.y().abs(); tri: (Point, Point, Point),
let zabs = ray.dir.z().abs(); ) -> Option<(f32, f32, f32, f32)> {
if xabs > yabs && xabs > zabs {
(1, 2, 0)
} else if yabs > zabs {
(2, 0, 1)
} else {
(0, 1, 2)
}
};
let dir_x = ray.dir.get_n(xi);
let dir_y = ray.dir.get_n(yi);
let dir_z = ray.dir.get_n(zi);
// Calculate shear constants.
let sx = dir_x / dir_z;
let sy = dir_y / dir_z;
let sz = 1.0 / dir_z;
// Calculate vertices in ray space. // Calculate vertices in ray space.
let p0 = tri.0 - ray.orig; let p0 = tri.0 - ray_orig;
let p1 = tri.1 - ray.orig; let p1 = tri.1 - ray_orig;
let p2 = tri.2 - ray.orig; let p2 = tri.2 - ray_orig;
let p0x = p0.get_n(xi) - (sx * p0.get_n(zi)); let p0x = p0.get_n(ray_pre.i.0) - (ray_pre.s.0 * p0.get_n(ray_pre.i.2));
let p0y = p0.get_n(yi) - (sy * p0.get_n(zi)); let p0y = p0.get_n(ray_pre.i.1) - (ray_pre.s.1 * p0.get_n(ray_pre.i.2));
let p1x = p1.get_n(xi) - (sx * p1.get_n(zi)); let p1x = p1.get_n(ray_pre.i.0) - (ray_pre.s.0 * p1.get_n(ray_pre.i.2));
let p1y = p1.get_n(yi) - (sy * p1.get_n(zi)); let p1y = p1.get_n(ray_pre.i.1) - (ray_pre.s.1 * p1.get_n(ray_pre.i.2));
let p2x = p2.get_n(xi) - (sx * p2.get_n(zi)); let p2x = p2.get_n(ray_pre.i.0) - (ray_pre.s.0 * p2.get_n(ray_pre.i.2));
let p2y = p2.get_n(yi) - (sy * p2.get_n(zi)); let p2y = p2.get_n(ray_pre.i.1) - (ray_pre.s.1 * p2.get_n(ray_pre.i.2));
// Calculate scaled barycentric coordinates. // Calculate scaled barycentric coordinates.
let mut e0 = (p1x * p2y) - (p1y * p2x); let mut e0 = (p1x * p2y) - (p1y * p2x);
@ -74,14 +97,14 @@ pub fn intersect_ray(ray: &Ray, tri: (Point, Point, Point)) -> Option<(f32, f32,
} }
// Calculate t of hitpoint. // Calculate t of hitpoint.
let p0z = sz * p0.get_n(zi); let p0z = ray_pre.s.2 * p0.get_n(ray_pre.i.2);
let p1z = sz * p1.get_n(zi); let p1z = ray_pre.s.2 * p1.get_n(ray_pre.i.2);
let p2z = sz * p2.get_n(zi); let p2z = ray_pre.s.2 * p2.get_n(ray_pre.i.2);
let t_scaled = (e0 * p0z) + (e1 * p1z) + (e2 * p2z); let t_scaled = (e0 * p0z) + (e1 * p1z) + (e2 * p2z);
// Check if the hitpoint t is within ray min/max t. // Check if the hitpoint t is within ray min/max t.
if (det > 0.0 && (t_scaled <= 0.0 || t_scaled > (ray.max_t * det))) if (det > 0.0 && (t_scaled <= 0.0 || t_scaled > (ray_max_t * det)))
|| (det < 0.0 && (t_scaled >= 0.0 || t_scaled < (ray.max_t * det))) || (det < 0.0 && (t_scaled >= 0.0 || t_scaled < (ray_max_t * det)))
{ {
return None; return None;
} }

View File

@ -8,12 +8,14 @@ use crate::{
boundable::Boundable, boundable::Boundable,
lerp::lerp_slice, lerp::lerp_slice,
math::{cross, dot, Matrix4x4, Normal, Point}, math::{cross, dot, Matrix4x4, Normal, Point},
ray::{AccelRay, Ray}, ray::{RayBatch, RayStack},
shading::SurfaceShader, shading::SurfaceShader,
}; };
use super::{triangle, Surface, SurfaceIntersection, SurfaceIntersectionData}; use super::{triangle, Surface, SurfaceIntersection, SurfaceIntersectionData};
const MAX_LEAF_TRIANGLE_COUNT: usize = 3;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub struct TriangleMesh<'a> { pub struct TriangleMesh<'a> {
time_sample_count: usize, time_sample_count: usize,
@ -93,7 +95,7 @@ impl<'a> TriangleMesh<'a> {
}; };
// Build BVH // Build BVH
let accel = BVH4::from_objects(arena, &mut indices[..], 3, |tri| { let accel = BVH4::from_objects(arena, &mut indices[..], MAX_LEAF_TRIANGLE_COUNT, |tri| {
&bounds &bounds
[(tri.3 as usize * time_sample_count)..((tri.3 as usize + 1) * time_sample_count)] [(tri.3 as usize * time_sample_count)..((tri.3 as usize + 1) * time_sample_count)]
}); });
@ -117,8 +119,8 @@ impl<'a> Boundable for TriangleMesh<'a> {
impl<'a> Surface for TriangleMesh<'a> { impl<'a> Surface for TriangleMesh<'a> {
fn intersect_rays( fn intersect_rays(
&self, &self,
accel_rays: &mut [AccelRay], rays: &mut RayBatch,
wrays: &[Ray], ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection], isects: &mut [SurfaceIntersection],
shader: &SurfaceShader, shader: &SurfaceShader,
space: &[Matrix4x4], space: &[Matrix4x4],
@ -131,35 +133,66 @@ impl<'a> Surface for TriangleMesh<'a> {
}; };
self.accel self.accel
.traverse(&mut accel_rays[..], self.indices, |tri_indices, rs| { .traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
let tri_count = idx_range.end - idx_range.start;
// Build the triangle cache if we can!
let is_cached = ray_stack.ray_count_in_next_task() >= tri_count
&& self.time_sample_count == 1
&& space.len() <= 1;
let mut tri_cache = [unsafe { std::mem::uninitialized() }; MAX_LEAF_TRIANGLE_COUNT];
if is_cached {
for tri_idx in idx_range.clone() {
let i = tri_idx - idx_range.start;
let tri_indices = self.indices[tri_idx];
// For static triangles with static transforms, cache them. // For static triangles with static transforms, cache them.
let is_cached = self.time_sample_count == 1 && space.len() <= 1; tri_cache[i] = (
let mut tri = if is_cached {
let tri = (
self.vertices[tri_indices.0 as usize], self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize], self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize], self.vertices[tri_indices.2 as usize],
); );
if space.is_empty() { if !space.is_empty() {
tri tri_cache[i].0 = tri_cache[i].0 * static_mat_space;
} else { tri_cache[i].1 = tri_cache[i].1 * static_mat_space;
( tri_cache[i].2 = tri_cache[i].2 * static_mat_space;
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
)
} }
}
}
// Test each ray against the triangles.
ray_stack.do_next_task(|ray_idx| {
let ray_idx = ray_idx as usize;
if rays.is_done(ray_idx) {
return;
}
let ray_time = rays.time(ray_idx);
// Calculate the ray space, if necessary.
let mat_space = if space.len() > 1 {
// Per-ray transform, for motion blur
lerp_slice(space, ray_time).inverse()
} else { } else {
unsafe { std::mem::uninitialized() } static_mat_space
}; };
// Test each ray against the current triangle. // Iterate through the triangles and test the ray against them.
for r in rs { let mut non_shadow_hit = false;
let wr = &wrays[r.id as usize]; let mut hit_tri = unsafe { std::mem::uninitialized() };
let mut hit_tri_indices = unsafe { std::mem::uninitialized() };
let mut hit_tri_data = unsafe { std::mem::uninitialized() };
let ray_pre = triangle::RayTriPrecompute::new(rays.dir(ray_idx));
for tri_idx in idx_range.clone() {
let tri_indices = self.indices[tri_idx];
// Get triangle if necessary // Get triangle if necessary
if !is_cached { let tri = if is_cached {
tri = if self.time_sample_count == 1 { let i = tri_idx - idx_range.start;
tri_cache[i]
} else {
let mut tri = if self.time_sample_count == 1 {
// No deformation motion blur, so fast-path it. // No deformation motion blur, so fast-path it.
( (
self.vertices[tri_indices.0 as usize], self.vertices[tri_indices.0 as usize],
@ -178,65 +211,69 @@ impl<'a> Surface for TriangleMesh<'a> {
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)]; ..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let p0 = lerp_slice(p0_slice, wr.time); let p0 = lerp_slice(p0_slice, ray_time);
let p1 = lerp_slice(p1_slice, wr.time); let p1 = lerp_slice(p1_slice, ray_time);
let p2 = lerp_slice(p2_slice, wr.time); let p2 = lerp_slice(p2_slice, ray_time);
(p0, p1, p2) (p0, p1, p2)
}; };
if !space.is_empty() {
tri.0 = tri.0 * mat_space;
tri.1 = tri.1 * mat_space;
tri.2 = tri.2 * mat_space;
} }
// Transform triangle if necessary, and get transform space. tri
let mat_space = if !space.is_empty() {
if space.len() > 1 {
// Per-ray transform, for motion blur
let mat_space = lerp_slice(space, wr.time).inverse();
tri = (tri.0 * mat_space, tri.1 * mat_space, tri.2 * mat_space);
mat_space
} else {
// Same transform for all rays
if !is_cached {
tri = (
tri.0 * static_mat_space,
tri.1 * static_mat_space,
tri.2 * static_mat_space,
);
}
static_mat_space
}
} else {
// No transforms
Matrix4x4::new()
}; };
// Test ray against triangle // Test ray against triangle
if let Some((t, b0, b1, b2)) = triangle::intersect_ray(wr, tri) { if let Some((t, b0, b1, b2)) = triangle::intersect_ray(
if t < r.max_t { rays.orig(ray_idx),
if r.is_occlusion() { ray_pre,
isects[r.id as usize] = SurfaceIntersection::Occlude; rays.max_t(ray_idx),
r.mark_done(); tri,
) {
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
break;
} else { } else {
non_shadow_hit = true;
rays.set_max_t(ray_idx, t);
hit_tri = tri;
hit_tri_indices = tri_indices;
hit_tri_data = (t, b0, b1, b2);
}
}
}
// Calculate intersection data if necessary.
if non_shadow_hit {
let (t, b0, b1, b2) = hit_tri_data;
// Calculate intersection point and error magnitudes // Calculate intersection point and error magnitudes
let (pos, pos_err) = triangle::surface_point(tri, (b0, b1, b2)); let (pos, pos_err) = triangle::surface_point(hit_tri, (b0, b1, b2));
// Calculate geometric surface normal // Calculate geometric surface normal
let geo_normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal(); let geo_normal =
cross(hit_tri.0 - hit_tri.1, hit_tri.0 - hit_tri.2).into_normal();
// Calculate interpolated surface normal, if any // Calculate interpolated surface normal, if any
let shading_normal = if let Some(normals) = self.normals { let shading_normal = if let Some(normals) = self.normals {
let n0_slice = &normals[(tri_indices.0 as usize let n0_slice = &normals[(hit_tri_indices.0 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)]; ..((hit_tri_indices.0 as usize + 1) * self.time_sample_count)];
let n1_slice = &normals[(tri_indices.1 as usize let n1_slice = &normals[(hit_tri_indices.1 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)]; ..((hit_tri_indices.1 as usize + 1) * self.time_sample_count)];
let n2_slice = &normals[(tri_indices.2 as usize let n2_slice = &normals[(hit_tri_indices.2 as usize
* self.time_sample_count) * self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)]; ..((hit_tri_indices.2 as usize + 1) * self.time_sample_count)];
let n0 = lerp_slice(n0_slice, wr.time).normalized(); let n0 = lerp_slice(n0_slice, ray_time).normalized();
let n1 = lerp_slice(n1_slice, wr.time).normalized(); let n1 = lerp_slice(n1_slice, ray_time).normalized();
let n2 = lerp_slice(n2_slice, wr.time).normalized(); let n2 = lerp_slice(n2_slice, ray_time).normalized();
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
if dot(s_nor, geo_normal) >= 0.0 { if dot(s_nor, geo_normal) >= 0.0 {
@ -249,7 +286,7 @@ impl<'a> Surface for TriangleMesh<'a> {
}; };
let intersection_data = SurfaceIntersectionData { let intersection_data = SurfaceIntersectionData {
incoming: wr.dir, incoming: rays.dir(ray_idx),
t: t, t: t,
pos: pos, pos: pos,
pos_err: pos_err, pos_err: pos_err,
@ -260,15 +297,13 @@ impl<'a> Surface for TriangleMesh<'a> {
}; };
// Fill in intersection data // Fill in intersection data
isects[r.id as usize] = SurfaceIntersection::Hit { isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data, intersection_data: intersection_data,
closure: shader.shade(&intersection_data, wr.time), closure: shader.shade(&intersection_data, ray_time),
}; };
r.max_t = t;
}
}
}
} }
}); });
ray_stack.pop_task();
});
} }
} }

View File

@ -1,10 +1,11 @@
use std::iter; use std::iter;
use crate::{ use crate::{
algorithm::partition, accel::ray_code,
color::{rec709_to_xyz, Color}, color::{rec709_to_xyz, Color},
lerp::lerp_slice, lerp::lerp_slice,
ray::{AccelRay, Ray}, math::Matrix4x4,
ray::{RayBatch, RayStack},
scene::{Assembly, InstanceType, Object}, scene::{Assembly, InstanceType, Object},
shading::{SimpleSurfaceShader, SurfaceShader}, shading::{SimpleSurfaceShader, SurfaceShader},
surface::SurfaceIntersection, surface::SurfaceIntersection,
@ -12,14 +13,16 @@ use crate::{
}; };
pub struct Tracer<'a> { pub struct Tracer<'a> {
rays: Vec<AccelRay>, ray_trace_count: u64,
ray_stack: RayStack,
inner: TracerInner<'a>, inner: TracerInner<'a>,
} }
impl<'a> Tracer<'a> { impl<'a> Tracer<'a> {
pub fn from_assembly(assembly: &'a Assembly) -> Tracer<'a> { pub fn from_assembly(assembly: &'a Assembly) -> Tracer<'a> {
Tracer { Tracer {
rays: Vec::new(), ray_trace_count: 0,
ray_stack: RayStack::new(),
inner: TracerInner { inner: TracerInner {
root: assembly, root: assembly,
xform_stack: TransformStack::new(), xform_stack: TransformStack::new(),
@ -28,17 +31,13 @@ impl<'a> Tracer<'a> {
} }
} }
pub fn trace<'b>(&'b mut self, wrays: &[Ray]) -> &'b [SurfaceIntersection] { pub fn trace<'b>(&'b mut self, rays: &mut RayBatch) -> &'b [SurfaceIntersection] {
self.rays.clear(); self.ray_trace_count += rays.len() as u64;
self.rays.reserve(wrays.len()); self.inner.trace(rays, &mut self.ray_stack)
let mut ids = 0..(wrays.len() as u32); }
self.rays.extend(
wrays
.iter()
.map(|wr| AccelRay::new(wr, ids.next().unwrap())),
);
self.inner.trace(wrays, &mut self.rays[..]) pub fn rays_traced(&self) -> u64 {
self.ray_trace_count
} }
} }
@ -49,16 +48,37 @@ struct TracerInner<'a> {
} }
impl<'a> TracerInner<'a> { impl<'a> TracerInner<'a> {
fn trace<'b>(&'b mut self, wrays: &[Ray], rays: &mut [AccelRay]) -> &'b [SurfaceIntersection] { fn trace<'b>(
&'b mut self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
) -> &'b [SurfaceIntersection] {
ray_stack.clear();
// Ready the isects // Ready the isects
self.isects.clear(); self.isects.clear();
self.isects.reserve(wrays.len()); self.isects.reserve(rays.len());
self.isects self.isects
.extend(iter::repeat(SurfaceIntersection::Miss).take(wrays.len())); .extend(iter::repeat(SurfaceIntersection::Miss).take(rays.len()));
let mut ray_sets = split_rays_by_direction(&mut rays[..]); // Prep the accel part of the rays.
for ray_set in ray_sets.iter_mut().filter(|ray_set| !ray_set.is_empty()) { {
self.trace_assembly(self.root, wrays, ray_set); let ident = Matrix4x4::new();
for i in 0..rays.len() {
rays.update_local(i, &ident);
}
}
// Divide the rays into 8 different lanes by direction.
ray_stack.ensure_lane_count(8);
for i in 0..rays.len() {
ray_stack.push_ray_index(i, ray_code(rays.dir(i)));
}
ray_stack.push_lanes_to_tasks(&[0, 1, 2, 3, 4, 5, 6, 7]);
// Trace each of the 8 lanes separately.
while !ray_stack.is_empty() {
self.trace_assembly(self.root, rays, ray_stack);
} }
&self.isects &self.isects
@ -67,82 +87,43 @@ impl<'a> TracerInner<'a> {
fn trace_assembly<'b>( fn trace_assembly<'b>(
&'b mut self, &'b mut self,
assembly: &Assembly, assembly: &Assembly,
wrays: &[Ray], rays: &mut RayBatch,
accel_rays: &mut [AccelRay], ray_stack: &mut RayStack,
) { ) {
assembly assembly
.object_accel .object_accel
.traverse(&mut accel_rays[..], &assembly.instances[..], |inst, rs| { .traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
let inst = &assembly.instances[idx_range.start];
// Transform rays if needed // Transform rays if needed
if let Some((xstart, xend)) = inst.transform_indices { if let Some((xstart, xend)) = inst.transform_indices {
// Push transforms to stack // Push transforms to stack
self.xform_stack.push(&assembly.xforms[xstart..xend]); self.xform_stack.push(&assembly.xforms[xstart..xend]);
// Do transforms // Do transforms
// TODO: re-divide rays based on direction (maybe?).
let xforms = self.xform_stack.top(); let xforms = self.xform_stack.top();
for ray in &mut rs[..] { ray_stack.do_next_task(|ray_idx| {
let id = ray.id; let t = rays.time(ray_idx);
let t = ray.time; rays.update_local(ray_idx, &lerp_slice(xforms, t));
ray.update_from_xformed_world_ray( });
&wrays[id as usize], ray_stack.duplicate_next_task();
&lerp_slice(xforms, t),
);
}
} }
// Trace rays // Trace rays
{
// This is kind of weird looking, but what we're doing here is
// splitting the rays up based on direction if they were
// transformed, and not splitting them up if they weren't
// transformed.
// But to keep the actual tracing code in one place (DRY),
// we map both cases to an array slice that contains slices of
// ray arrays. Gah... that's confusing even when explained.
// TODO: do this in a way that's less confusing. Probably split
// the tracing code out into a trace_instance() method or
// something.
let mut tmp = if inst.transform_indices.is_some() {
split_rays_by_direction(rs)
} else {
[
&mut rs[..],
&mut [],
&mut [],
&mut [],
&mut [],
&mut [],
&mut [],
&mut [],
]
};
let ray_sets = if inst.transform_indices.is_some() {
&mut tmp[..]
} else {
&mut tmp[..1]
};
// Loop through the split ray slices and trace them
for ray_set in ray_sets.iter_mut().filter(|ray_set| !ray_set.is_empty()) {
match inst.instance_type { match inst.instance_type {
InstanceType::Object => { InstanceType::Object => {
self.trace_object( self.trace_object(
&assembly.objects[inst.data_index], &assembly.objects[inst.data_index],
inst.surface_shader_index inst.surface_shader_index
.map(|i| assembly.surface_shaders[i]), .map(|i| assembly.surface_shaders[i]),
wrays, rays,
ray_set, ray_stack,
); );
} }
InstanceType::Assembly => { InstanceType::Assembly => {
self.trace_assembly( self.trace_assembly(&assembly.assemblies[inst.data_index], rays, ray_stack);
&assembly.assemblies[inst.data_index],
wrays,
ray_set,
);
}
}
} }
} }
@ -154,19 +135,15 @@ impl<'a> TracerInner<'a> {
// Undo transforms // Undo transforms
let xforms = self.xform_stack.top(); let xforms = self.xform_stack.top();
if !xforms.is_empty() { if !xforms.is_empty() {
for ray in &mut rs[..] { ray_stack.pop_do_next_task(|ray_idx| {
let id = ray.id; let t = rays.time(ray_idx);
let t = ray.time; rays.update_local(ray_idx, &lerp_slice(xforms, t));
ray.update_from_xformed_world_ray( });
&wrays[id as usize],
&lerp_slice(xforms, t),
);
}
} else { } else {
for ray in &mut rs[..] { let ident = Matrix4x4::new();
let id = ray.id; ray_stack.pop_do_next_task(|ray_idx| {
ray.update_from_world_ray(&wrays[id as usize]); rays.update_local(ray_idx, &ident);
} });
} }
} }
}); });
@ -176,8 +153,8 @@ impl<'a> TracerInner<'a> {
&'b mut self, &'b mut self,
obj: &Object, obj: &Object,
surface_shader: Option<&SurfaceShader>, surface_shader: Option<&SurfaceShader>,
wrays: &[Ray], rays: &mut RayBatch,
rays: &mut [AccelRay], ray_stack: &mut RayStack,
) { ) {
match *obj { match *obj {
Object::Surface(surface) => { Object::Surface(surface) => {
@ -188,7 +165,7 @@ impl<'a> TracerInner<'a> {
surface.intersect_rays( surface.intersect_rays(
rays, rays,
wrays, ray_stack,
&mut self.isects, &mut self.isects,
shader, shader,
self.xform_stack.top(), self.xform_stack.top(),
@ -203,7 +180,7 @@ impl<'a> TracerInner<'a> {
surface.intersect_rays( surface.intersect_rays(
rays, rays,
wrays, ray_stack,
&mut self.isects, &mut self.isects,
&bogus_shader, &bogus_shader,
self.xform_stack.top(), self.xform_stack.top(),
@ -212,27 +189,3 @@ impl<'a> TracerInner<'a> {
} }
} }
} }
fn split_rays_by_direction(rays: &mut [AccelRay]) -> [&mut [AccelRay]; 8] {
// | | | | | | | | |
// s1 s2 s3 s4 s5 s6 s7
let s4 = partition(&mut rays[..], |r| r.dir_inv.x() >= 0.0);
let s2 = partition(&mut rays[..s4], |r| r.dir_inv.y() >= 0.0);
let s6 = s4 + partition(&mut rays[s4..], |r| r.dir_inv.y() >= 0.0);
let s1 = partition(&mut rays[..s2], |r| r.dir_inv.z() >= 0.0);
let s3 = s2 + partition(&mut rays[s2..s4], |r| r.dir_inv.z() >= 0.0);
let s5 = s4 + partition(&mut rays[s4..s6], |r| r.dir_inv.z() >= 0.0);
let s7 = s6 + partition(&mut rays[s6..], |r| r.dir_inv.z() >= 0.0);
let (rest, rs7) = rays.split_at_mut(s7);
let (rest, rs6) = rest.split_at_mut(s6);
let (rest, rs5) = rest.split_at_mut(s5);
let (rest, rs4) = rest.split_at_mut(s4);
let (rest, rs3) = rest.split_at_mut(s3);
let (rest, rs2) = rest.split_at_mut(s2);
let (rs0, rs1) = rest.split_at_mut(s1);
[rs0, rs1, rs2, rs3, rs4, rs5, rs6, rs7]
}

View File

@ -620,6 +620,29 @@ mod x86_64_sse {
} }
impl Bool4 { impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Bool4 {
use std::arch::x86_64::_mm_set_ps;
Bool4 {
data: unsafe {
_mm_set_ps(
if d { 1.0 } else { 0.0 },
if c { 1.0 } else { 0.0 },
if b { 1.0 } else { 0.0 },
if a { 1.0 } else { 0.0 },
)
},
}
}
#[inline(always)]
pub fn new_false() -> Bool4 {
use std::arch::x86_64::_mm_set1_ps;
Bool4 {
data: unsafe { _mm_set1_ps(0.0) },
}
}
/// Returns the value of the nth element. /// Returns the value of the nth element.
#[inline(always)] #[inline(always)]
pub fn get_n(&self, n: usize) -> bool { pub fn get_n(&self, n: usize) -> bool {
@ -637,24 +660,34 @@ mod x86_64_sse {
self.get_n(0) self.get_n(0)
} }
/// Returns the value of the 1th element. /// Returns the value of the 1st element.
#[inline(always)] #[inline(always)]
pub fn get_1(&self) -> bool { pub fn get_1(&self) -> bool {
self.get_n(1) self.get_n(1)
} }
/// Returns the value of the 2th element. /// Returns the value of the 2nd element.
#[inline(always)] #[inline(always)]
pub fn get_2(&self) -> bool { pub fn get_2(&self) -> bool {
self.get_n(2) self.get_n(2)
} }
/// Returns the value of the 3th element. /// Returns the value of the 3rd element.
#[inline(always)] #[inline(always)]
pub fn get_3(&self) -> bool { pub fn get_3(&self) -> bool {
self.get_n(3) self.get_n(3)
} }
/// Returns whether all four bools are false.
///
/// This is the `NOT` operation on the result of `OR`ing all the
/// contained bools. If even one bool is true, this returns false.
#[inline(always)]
pub fn is_all_false(&self) -> bool {
let a = unsafe { *(&self.data as *const __m128 as *const u128) };
a == 0
}
#[inline] #[inline]
pub fn to_bitmask(&self) -> u8 { pub fn to_bitmask(&self) -> u8 {
let a = unsafe { *(&self.data as *const __m128 as *const u8).offset(0) }; let a = unsafe { *(&self.data as *const __m128 as *const u8).offset(0) };
@ -1236,21 +1269,25 @@ mod fallback {
det det
} }
/// Essentially a tuple of four bools, which will use SIMD operations /// Essentially a tuple of four bools.
/// where possible on a platform.
#[cfg(feature = "simd_perf")]
#[derive(Debug, Copy, Clone)]
pub struct Bool4 {
data: bool32fx4,
}
#[cfg(not(feature = "simd_perf"))]
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Bool4 { pub struct Bool4 {
data: [bool; 4], data: [bool; 4],
} }
impl Bool4 { impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Bool4 {
Bool4 { data: [a, b, c, d] }
}
#[inline(always)]
pub fn new_false() -> Bool4 {
Bool4 {
data: [false, false, false, false],
}
}
/// Returns the value of the nth element. /// Returns the value of the nth element.
#[inline(always)] #[inline(always)]
pub fn get_n(self, n: usize) -> bool { pub fn get_n(self, n: usize) -> bool {
@ -1285,6 +1322,15 @@ mod fallback {
self.get_n(3) self.get_n(3)
} }
/// Returns whether all four bools are false.
///
/// This is the `NOT` operation on the result of `OR`ing all the
/// contained bools. If even one bool is true, this returns false.
#[inline(always)]
pub fn is_all_false(&self) -> bool {
!(self.data[0] | self.data[1] | self.data[2] | self.data[3])
}
#[inline] #[inline]
pub fn to_bitmask(self) -> u8 { pub fn to_bitmask(self) -> u8 {
(self.get_0() as u8) (self.get_0() as u8)
@ -1565,4 +1611,10 @@ mod tests {
assert_eq!(r, 0b00001010); assert_eq!(r, 0b00001010);
} }
#[test]
fn bool4_is_all_false() {
assert_eq!(true, Bool4::new(false, false, false, false).is_all_false());
assert_eq!(false, Bool4::new(false, false, true, false).is_all_false());
}
} }