Improved ray batch data layout.

Gives a small performance boost.
This commit is contained in:
Nathan Vegdahl 2019-06-25 18:49:10 +09:00
parent 5dd8eb919b
commit 50f9fd851b

View File

@ -20,31 +20,37 @@ pub struct Ray {
pub max_t: f32, pub max_t: f32,
} }
/// A batch of rays, stored in SoA layout. /// The hot (frequently accessed) parts of ray data.
#[derive(Debug, Copy, Clone)]
struct RayHot {
orig_local: Point, // Local-space ray origin
dir_inv_local: Vector, // Local-space 1.0/ray direction
max_t: f32,
time: f32,
flags: FlagType,
}
/// The cold (infrequently accessed) parts of ray data.
#[derive(Debug, Copy, Clone)]
struct RayCold {
orig: Point, // World-space ray origin
dir: Vector, // World-space ray direction
wavelength: f32,
}
/// A batch of rays, separated into hot and cold parts.
#[derive(Debug)] #[derive(Debug)]
pub struct RayBatch { pub struct RayBatch {
orig_world: Vec<Point>, hot: Vec<RayHot>,
dir_world: Vec<Vector>, cold: Vec<RayCold>,
orig_accel: Vec<Point>,
dir_inv_accel: Vec<Vector>,
max_t: Vec<f32>,
time: Vec<f32>,
wavelength: Vec<f32>,
flags: Vec<FlagType>,
} }
impl RayBatch { impl RayBatch {
/// Creates a new empty ray batch. /// Creates a new empty ray batch.
pub fn new() -> RayBatch { pub fn new() -> RayBatch {
RayBatch { RayBatch {
orig_world: Vec::new(), hot: Vec::new(),
dir_world: Vec::new(), cold: Vec::new(),
orig_accel: Vec::new(),
dir_inv_accel: Vec::new(),
max_t: Vec::new(),
time: Vec::new(),
wavelength: Vec::new(),
flags: Vec::new(),
} }
} }
@ -52,87 +58,60 @@ impl RayBatch {
/// `n` rays. /// `n` rays.
pub fn with_capacity(n: usize) -> RayBatch { pub fn with_capacity(n: usize) -> RayBatch {
RayBatch { RayBatch {
orig_world: Vec::with_capacity(n), hot: Vec::with_capacity(n),
dir_world: Vec::with_capacity(n), cold: Vec::with_capacity(n),
orig_accel: Vec::with_capacity(n),
dir_inv_accel: Vec::with_capacity(n),
max_t: Vec::with_capacity(n),
time: Vec::with_capacity(n),
wavelength: Vec::with_capacity(n),
flags: Vec::with_capacity(n),
} }
} }
pub fn push(&mut self, ray: Ray, is_occlusion: bool) { pub fn push(&mut self, ray: Ray, is_occlusion: bool) {
self.orig_world.push(ray.orig); self.hot.push(RayHot {
self.dir_world.push(ray.dir); orig_local: ray.orig, // Bogus, to place-hold.
self.orig_accel.push(ray.orig); // Bogus, to place-hold. dir_inv_local: ray.dir, // Bogus, to place-hold.
self.dir_inv_accel.push(ray.dir); // Bogus, to place-hold. max_t: ray.max_t,
self.time.push(ray.time); time: ray.time,
self.wavelength.push(ray.wavelength); flags: if is_occlusion { OCCLUSION_FLAG } else { 0 },
if is_occlusion { });
self.max_t.push(1.0); self.cold.push(RayCold {
self.flags.push(OCCLUSION_FLAG); orig: ray.orig,
} else { dir: ray.dir,
self.max_t.push(std::f32::INFINITY); wavelength: ray.wavelength,
self.flags.push(0); });
}
} }
pub fn swap(&mut self, a: usize, b: usize) { pub fn swap(&mut self, a: usize, b: usize) {
if a != b { self.hot.swap(a, b);
self.orig_world.swap(a, b); self.cold.swap(a, b);
self.dir_world.swap(a, b);
self.orig_accel.swap(a, b);
self.dir_inv_accel.swap(a, b);
self.max_t.swap(a, b);
self.time.swap(a, b);
self.wavelength.swap(a, b);
self.flags.swap(a, b);
}
} }
pub fn set_from_ray(&mut self, ray: &Ray, is_shadow: bool, idx: usize) { pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) {
self.orig_world[idx] = ray.orig; self.hot[idx].orig_local = ray.orig;
self.dir_world[idx] = ray.dir; self.hot[idx].dir_inv_local = Vector {
self.orig_accel[idx] = ray.orig;
self.dir_inv_accel[idx] = Vector {
co: Float4::splat(1.0) / ray.dir.co, co: Float4::splat(1.0) / ray.dir.co,
}; };
self.max_t[idx] = ray.max_t; self.hot[idx].max_t = ray.max_t;
self.time[idx] = ray.time; self.hot[idx].time = ray.time;
self.wavelength[idx] = ray.wavelength; self.hot[idx].flags = if is_occlusion { OCCLUSION_FLAG } else { 0 };
self.time[idx] = ray.time;
self.flags[idx] = if is_shadow { OCCLUSION_FLAG } else { 0 }; self.cold[idx].orig = ray.orig;
self.cold[idx].dir = ray.dir;
self.cold[idx].wavelength = ray.wavelength;
} }
pub fn truncate(&mut self, len: usize) { pub fn truncate(&mut self, len: usize) {
self.orig_world.truncate(len); self.hot.truncate(len);
self.dir_world.truncate(len); self.cold.truncate(len);
self.orig_accel.truncate(len);
self.dir_inv_accel.truncate(len);
self.max_t.truncate(len);
self.time.truncate(len);
self.wavelength.truncate(len);
self.flags.truncate(len);
} }
/// Clear all rays, settings the size of the batch back to zero. /// Clear all rays, settings the size of the batch back to zero.
/// ///
/// Capacity is maintained. /// Capacity is maintained.
pub fn clear(&mut self) { pub fn clear(&mut self) {
self.orig_world.clear(); self.hot.clear();
self.dir_world.clear(); self.cold.clear();
self.orig_accel.clear();
self.dir_inv_accel.clear();
self.max_t.clear();
self.time.clear();
self.wavelength.clear();
self.flags.clear();
} }
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.orig_world.len() self.hot.len()
} }
/// Updates the accel data of the given ray (at index `idx`) with the /// Updates the accel data of the given ray (at index `idx`) with the
@ -141,9 +120,9 @@ impl RayBatch {
/// This should be called when entering (and exiting) traversal of a /// This should be called when entering (and exiting) traversal of a
/// new transform space. /// new transform space.
pub fn update_local(&mut self, idx: usize, xform: &Matrix4x4) { pub fn update_local(&mut self, idx: usize, xform: &Matrix4x4) {
self.orig_accel[idx] = self.orig_world[idx] * *xform; self.hot[idx].orig_local = self.cold[idx].orig * *xform;
self.dir_inv_accel[idx] = Vector { self.hot[idx].dir_inv_local = Vector {
co: Float4::splat(1.0) / (self.dir_world[idx] * *xform).co, co: Float4::splat(1.0) / (self.cold[idx].dir * *xform).co,
}; };
} }
@ -152,66 +131,66 @@ impl RayBatch {
#[inline(always)] #[inline(always)]
pub fn orig(&self, idx: usize) -> Point { pub fn orig(&self, idx: usize) -> Point {
self.orig_world[idx] self.cold[idx].orig
} }
#[inline(always)] #[inline(always)]
pub fn dir(&self, idx: usize) -> Vector { pub fn dir(&self, idx: usize) -> Vector {
self.dir_world[idx] self.cold[idx].dir
} }
#[inline(always)] #[inline(always)]
pub fn orig_local(&self, idx: usize) -> Point { pub fn orig_local(&self, idx: usize) -> Point {
self.orig_accel[idx] self.hot[idx].orig_local
} }
#[inline(always)] #[inline(always)]
pub fn dir_inv_local(&self, idx: usize) -> Vector { pub fn dir_inv_local(&self, idx: usize) -> Vector {
self.dir_inv_accel[idx] self.hot[idx].dir_inv_local
} }
#[inline(always)] #[inline(always)]
pub fn time(&self, idx: usize) -> f32 { pub fn time(&self, idx: usize) -> f32 {
self.time[idx] self.hot[idx].time
} }
#[inline(always)] #[inline(always)]
pub fn max_t(&self, idx: usize) -> f32 { pub fn max_t(&self, idx: usize) -> f32 {
self.max_t[idx] self.hot[idx].max_t
} }
#[inline(always)] #[inline(always)]
pub fn set_max_t(&mut self, idx: usize, new_max_t: f32) { pub fn set_max_t(&mut self, idx: usize, new_max_t: f32) {
self.max_t[idx] = new_max_t; self.hot[idx].max_t = new_max_t;
} }
#[inline(always)] #[inline(always)]
pub fn wavelength(&self, idx: usize) -> f32 { pub fn wavelength(&self, idx: usize) -> f32 {
self.wavelength[idx] self.cold[idx].wavelength
} }
/// Returns whether the given ray (at index `idx`) is an occlusion ray. /// Returns whether the given ray (at index `idx`) is an occlusion ray.
#[inline(always)] #[inline(always)]
pub fn is_occlusion(&self, idx: usize) -> bool { pub fn is_occlusion(&self, idx: usize) -> bool {
(self.flags[idx] & OCCLUSION_FLAG) != 0 (self.hot[idx].flags & OCCLUSION_FLAG) != 0
} }
/// Returns whether the given ray (at index `idx`) has finished traversal. /// Returns whether the given ray (at index `idx`) has finished traversal.
#[inline(always)] #[inline(always)]
pub fn is_done(&self, idx: usize) -> bool { pub fn is_done(&self, idx: usize) -> bool {
(self.flags[idx] & DONE_FLAG) != 0 (self.hot[idx].flags & DONE_FLAG) != 0
} }
/// Marks the given ray (at index `idx`) as an occlusion ray. /// Marks the given ray (at index `idx`) as an occlusion ray.
#[inline(always)] #[inline(always)]
pub fn mark_occlusion(&mut self, idx: usize) { pub fn mark_occlusion(&mut self, idx: usize) {
self.flags[idx] |= OCCLUSION_FLAG self.hot[idx].flags |= OCCLUSION_FLAG
} }
/// Marks the given ray (at index `idx`) as having finished traversal. /// Marks the given ray (at index `idx`) as having finished traversal.
#[inline(always)] #[inline(always)]
pub fn mark_done(&mut self, idx: usize) { pub fn mark_done(&mut self, idx: usize) {
self.flags[idx] |= DONE_FLAG self.hot[idx].flags |= DONE_FLAG
} }
} }