diff --git a/Cargo.lock b/Cargo.lock index efa4183..a16b557 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,12 +177,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "glam" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "411e0584defa447c328f25c756ba3d0685727ecc126b46c3c1176001141cd4b6" - [[package]] name = "half" version = "1.7.1" @@ -339,7 +333,6 @@ dependencies = [ "copy_in_place", "crossbeam", "fastapprox", - "glam", "half", "halton", "kioku", @@ -604,7 +597,7 @@ checksum = "26e3528b09b1f1b1e152342a4462d1e80d568dc5623a0772252a6e584a53d550" name = "spectral_upsampling" version = "0.1.0" dependencies = [ - "glam", + "rmath", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index cfafd46..3ef777e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ png_encode_mini = "0.1.2" rustc-serialize = "0.3" scoped_threadpool = "0.1" time = "0.1" -glam = "0.15" fastapprox = "0.3" # Local crate dependencies diff --git a/psychoblend/assembly.py b/psychoblend/assembly.py index aecb954..73df05f 100644 --- a/psychoblend/assembly.py +++ b/psychoblend/assembly.py @@ -314,7 +314,7 @@ class Instance: w.indent() w.write("Data [$%s]\n" % self.data_name) for mat in self.time_xforms: - w.write("Transform [%s]\n" % mat2str(mat.inverted())) + w.write("Transform [%s]\n" % mat2str(mat)) for ms in self.ob.material_slots: if ms != None: w.write("SurfaceShaderBind [$%s]\n" % escape_name(ms.material.name)) diff --git a/psychoblend/world.py b/psychoblend/world.py index c886fa4..bcaa645 100644 --- a/psychoblend/world.py +++ b/psychoblend/world.py @@ -82,7 +82,7 @@ class Camera: mat = self.ob.matrix_world.copy() matz = Matrix() matz[2][2] = -1 - self.xforms += [mat * matz] + self.xforms += [(mat * matz).inverted()] def export(self, render_engine, w): render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name) diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs index 3b78077..3c3b169 100644 --- a/src/accel/bvh4.rs +++ b/src/accel/bvh4.rs @@ -6,7 +6,7 @@ use std::mem::{transmute, MaybeUninit}; -use glam::BVec4A; +use rmath::wide4::Bool4; use kioku::Arena; @@ -123,12 +123,12 @@ impl<'a> BVH4<'a> { traversal_code, } => { node_tests += ray_stack.ray_count_in_next_task() as u64; - let mut all_hits = BVec4A::default(); + let mut all_hits = Bool4::new_false(); // Ray testing ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| { if rays.is_done(ray_idx) { - BVec4A::default() + Bool4::new_false() } else { let hits = if bounds.len() == 1 { bounds[0].intersect_ray( diff --git a/src/bbox.rs b/src/bbox.rs index f4a2ab6..68e0c4a 100644 --- a/src/bbox.rs +++ b/src/bbox.rs @@ -7,7 +7,7 @@ use std::{ use crate::{ lerp::{lerp, lerp_slice, Lerp}, - math::{fast_minf32, Point, Transform, Vector}, + math::{fast_minf32, Point, Vector, Xform, XformFull}, }; const BBOX_MAXT_ADJUST: f32 = 1.000_000_24; @@ -41,12 +41,12 @@ impl BBox { // Returns whether the given ray intersects with the bbox. pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> bool { // Calculate slab intersections - let t1 = (self.min.co - orig.co) * dir_inv.co; - let t2 = (self.max.co - orig.co) * dir_inv.co; + let t1 = (self.min.0 - orig.0) * dir_inv.0; + let t2 = (self.max.0 - orig.0) * dir_inv.0; // Find the far and near intersection - let far_t = t1.max(t2).extend(std::f32::INFINITY); - let near_t = t1.min(t2).extend(0.0); + let far_t = t1.max(t2).set_d(std::f32::INFINITY); + let near_t = t1.min(t2).set_d(0.0); let far_hit_t = fast_minf32(far_t.min_element() * BBOX_MAXT_ADJUST, max_t); let near_hit_t = near_t.max_element(); @@ -54,8 +54,10 @@ impl BBox { near_hit_t <= far_hit_t } - // Creates a new BBox transformed into a different space. - pub fn transformed(&self, xform: Transform) -> BBox { + // Creates a new BBox transformed from its local space to the + // given space. + #[must_use] + pub fn xform(&self, xform: &XformFull) -> BBox { // BBox corners let vs = [ Point::new(self.min.x(), self.min.y(), self.min.z()), @@ -71,7 +73,7 @@ impl BBox { // Transform BBox corners and make new bbox let mut b = BBox::new(); for v in &vs { - let v = *v * xform; + let v = v.xform(&xform); b.min = v.min(b.min); b.max = v.max(b.max); } @@ -103,12 +105,8 @@ impl BitOr for BBox { fn bitor(self, rhs: BBox) -> BBox { BBox::from_points( - Point { - co: self.min.co.min(rhs.min.co), - }, - Point { - co: self.max.co.max(rhs.max.co), - }, + Point(self.min.0.min(rhs.min.0)), + Point(self.max.0.max(rhs.max.0)), ) } } @@ -124,14 +122,7 @@ impl BitOr for BBox { type Output = BBox; fn bitor(self, rhs: Point) -> BBox { - BBox::from_points( - Point { - co: self.min.co.min(rhs.co), - }, - Point { - co: self.max.co.max(rhs.co), - }, - ) + BBox::from_points(Point(self.min.0.min(rhs.0)), Point(self.max.0.max(rhs.0))) } } @@ -150,7 +141,11 @@ impl Lerp for BBox { } } -pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out: &mut Vec) { +pub fn transform_bbox_slice_from( + bbs_in: &[BBox], + xforms: &[Xform], + bbs_out: &mut Vec, +) -> Result<(), ()> { bbs_out.clear(); // Transform the bounding boxes @@ -158,17 +153,19 @@ pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out: bbs_out.extend_from_slice(bbs_in); } else if bbs_in.len() == xforms.len() { for (bb, xf) in Iterator::zip(bbs_in.iter(), xforms.iter()) { - bbs_out.push(bb.transformed(xf.inverse())); + bbs_out.push(bb.xform(&xf.into_full().ok_or(())?)); } } else if bbs_in.len() > xforms.len() { let s = (bbs_in.len() - 1) as f32; for (i, bb) in bbs_in.iter().enumerate() { - bbs_out.push(bb.transformed(lerp_slice(xforms, i as f32 / s).inverse())); + bbs_out.push(bb.xform(&lerp_slice(xforms, i as f32 / s).into_full().ok_or(())?)); } } else if bbs_in.len() < xforms.len() { let s = (xforms.len() - 1) as f32; for (i, xf) in xforms.iter().enumerate() { - bbs_out.push(lerp_slice(bbs_in, i as f32 / s).transformed(xf.inverse())); + bbs_out.push(lerp_slice(bbs_in, i as f32 / s).xform(&xf.into_full().ok_or(())?)); } } + + Ok(()) } diff --git a/src/bbox4.rs b/src/bbox4.rs index 9464388..c0e5861 100644 --- a/src/bbox4.rs +++ b/src/bbox4.rs @@ -9,16 +9,16 @@ use crate::{ math::{Point, Vector}, }; -use glam::{BVec4A, Vec4}; +use rmath::wide4::{Bool4, Float4}; const BBOX_MAXT_ADJUST: f32 = 1.000_000_24; /// A SIMD set of 4 3D axis-aligned bounding boxes. #[derive(Debug, Copy, Clone)] pub struct BBox4 { - pub x: (Vec4, Vec4), // (min, max) - pub y: (Vec4, Vec4), // (min, max) - pub z: (Vec4, Vec4), // (min, max) + pub x: (Float4, Float4), // (min, max) + pub y: (Float4, Float4), // (min, max) + pub z: (Float4, Float4), // (min, max) } impl BBox4 { @@ -26,16 +26,16 @@ impl BBox4 { pub fn new() -> BBox4 { BBox4 { x: ( - Vec4::splat(std::f32::INFINITY), - Vec4::splat(std::f32::NEG_INFINITY), + Float4::splat(std::f32::INFINITY), + Float4::splat(std::f32::NEG_INFINITY), ), y: ( - Vec4::splat(std::f32::INFINITY), - Vec4::splat(std::f32::NEG_INFINITY), + Float4::splat(std::f32::INFINITY), + Float4::splat(std::f32::NEG_INFINITY), ), z: ( - Vec4::splat(std::f32::INFINITY), - Vec4::splat(std::f32::NEG_INFINITY), + Float4::splat(std::f32::INFINITY), + Float4::splat(std::f32::NEG_INFINITY), ), } } @@ -45,30 +45,30 @@ impl BBox4 { pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 { BBox4 { x: ( - Vec4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), - Vec4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()), + Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), + Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()), ), y: ( - Vec4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), - Vec4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()), + Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), + Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()), ), z: ( - Vec4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()), - Vec4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()), + Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()), + Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()), ), } } // Returns whether the given ray intersects with the bboxes. - pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> BVec4A { + pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> Bool4 { // Get the ray data into SIMD format. - let ro_x = Vec4::splat(orig.co[0]); - let ro_y = Vec4::splat(orig.co[1]); - let ro_z = Vec4::splat(orig.co[2]); - let rdi_x = Vec4::splat(dir_inv.co[0]); - let rdi_y = Vec4::splat(dir_inv.co[1]); - let rdi_z = Vec4::splat(dir_inv.co[2]); - let max_t = Vec4::splat(max_t); + let ro_x = orig.0.aaaa(); + let ro_y = orig.0.bbbb(); + let ro_z = orig.0.cccc(); + let rdi_x = dir_inv.0.aaaa(); + let rdi_y = dir_inv.0.bbbb(); + let rdi_z = dir_inv.0.cccc(); + let max_t = Float4::splat(max_t); // Slab tests let t1_x = (self.x.0 - ro_x) * rdi_x; @@ -87,10 +87,11 @@ impl BBox4 { let t_near_z = t1_z.min(t2_z); // Calculate over-all far t hit. - let far_t = (t_far_x.min(t_far_y.min(t_far_z)) * Vec4::splat(BBOX_MAXT_ADJUST)).min(max_t); + let far_t = + (t_far_x.min(t_far_y.min(t_far_z)) * Float4::splat(BBOX_MAXT_ADJUST)).min(max_t); // Calculate over-all near t hit. - let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Vec4::splat(0.0))); + let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Float4::splat(0.0))); // Hit results near_t.cmplt(far_t) diff --git a/src/camera.rs b/src/camera.rs index 788e207..4f65a7f 100644 --- a/src/camera.rs +++ b/src/camera.rs @@ -4,14 +4,14 @@ use kioku::Arena; use crate::{ lerp::lerp_slice, - math::{Point, Transform, Vector}, + math::{Point, Vector, Xform}, ray::Ray, sampling::square_to_circle, }; #[derive(Copy, Clone, Debug)] pub struct Camera<'a> { - transforms: &'a [Transform], + transforms: &'a [Xform], fovs: &'a [f32], tfovs: &'a [f32], aperture_radii: &'a [f32], @@ -21,7 +21,7 @@ pub struct Camera<'a> { impl<'a> Camera<'a> { pub fn new( arena: &'a Arena, - transforms: &[Transform], + transforms: &[Xform], fovs: &[f32], mut aperture_radii: &[f32], mut focus_distances: &[f32], @@ -73,7 +73,7 @@ impl<'a> Camera<'a> { pub fn generate_ray(&self, x: f32, y: f32, time: f32, wavelength: f32, u: f32, v: f32) -> Ray { // Get time-interpolated camera settings - let transform = lerp_slice(self.transforms, time); + let transform = lerp_slice(self.transforms, time).into_full_fast().unwrap(); let tfov = lerp_slice(self.tfovs, time); let aperture_radius = lerp_slice(self.aperture_radii, time); let focus_distance = lerp_slice(self.focus_distances, time); @@ -93,8 +93,8 @@ impl<'a> Camera<'a> { .normalized(); Ray { - orig: orig * transform, - dir: dir * transform, + orig: orig.xform_inv_fast(&transform), + dir: dir.xform_inv_fast(&transform), time: time, wavelength: wavelength, max_t: std::f32::INFINITY, diff --git a/src/color.rs b/src/color.rs index 3b1ec50..9e68b70 100644 --- a/src/color.rs +++ b/src/color.rs @@ -1,11 +1,11 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign}; +use crate::math::Float4; pub use color::{ rec709_e_to_xyz, rec709_to_xyz, xyz_to_aces_ap0, xyz_to_aces_ap0_e, xyz_to_rec709, xyz_to_rec709_e, }; use compact::fluv::fluv32; -use glam::Vec4; use half::f16; use spectral_upsampling::meng::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE}; @@ -31,10 +31,10 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 { } } -/// Returns all wavelengths of a hero wavelength set as a Vec4 +/// Returns all wavelengths of a hero wavelength set as a Float4 #[inline(always)] -fn wavelengths(hero_wavelength: f32) -> Vec4 { - Vec4::new( +fn wavelengths(hero_wavelength: f32) -> Float4 { + Float4::new( nth_wavelength(hero_wavelength, 0), nth_wavelength(hero_wavelength, 1), nth_wavelength(hero_wavelength, 2), @@ -94,7 +94,7 @@ impl Color { } => { SpectralSample::from_parts( // TODO: make this SIMD - Vec4::new( + Float4::new( plancks_law(temperature, wls[0]) * factor, plancks_law(temperature, wls[1]) * factor, plancks_law(temperature, wls[2]) * factor, @@ -109,7 +109,7 @@ impl Color { } => { SpectralSample::from_parts( // TODO: make this SIMD - Vec4::new( + Float4::new( plancks_law_normalized(temperature, wls[0]) * factor, plancks_law_normalized(temperature, wls[1]) * factor, plancks_law_normalized(temperature, wls[2]) * factor, @@ -386,7 +386,7 @@ fn plancks_law_normalized(temperature: f32, wavelength: f32) -> f32 { #[derive(Copy, Clone, Debug)] pub struct SpectralSample { - pub e: Vec4, + pub e: Float4, hero_wavelength: f32, } @@ -394,7 +394,7 @@ impl SpectralSample { pub fn new(wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { - e: Vec4::splat(0.0), + e: Float4::splat(0.0), hero_wavelength: wavelength, } } @@ -403,12 +403,12 @@ impl SpectralSample { pub fn from_value(value: f32, wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { - e: Vec4::splat(value), + e: Float4::splat(value), hero_wavelength: wavelength, } } - pub fn from_parts(e: Vec4, wavelength: f32) -> SpectralSample { + pub fn from_parts(e: Float4, wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { e: e, @@ -599,8 +599,8 @@ impl DivAssign for XYZ { /// the method in the paper "Physically Meaningful Rendering using Tristimulus /// Colours" by Meng et al. #[inline(always)] -fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Vec4) -> Vec4 { - spectrum_xyz_to_p_4(wavelengths, xyz) * Vec4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE) +fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 { + spectrum_xyz_to_p_4(wavelengths, xyz) * Float4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE) // aces_to_spectrum_p4(wavelengths, xyz_to_aces_ap0_e(xyz)) } diff --git a/src/lerp.rs b/src/lerp.rs index 65a6479..8ffea7e 100644 --- a/src/lerp.rs +++ b/src/lerp.rs @@ -1,6 +1,6 @@ #![allow(dead_code)] -use math3d::{Normal, Point, Transform, Vector}; +use rmath::{wide4::Float4, Normal, Point, Vector, Xform}; /// Trait for allowing a type to be linearly interpolated. pub trait Lerp: Copy { @@ -100,36 +100,34 @@ impl Lerp for [T; 4] { } } -impl Lerp for glam::Vec4 { - fn lerp(self, other: glam::Vec4, alpha: f32) -> glam::Vec4 { +impl Lerp for Float4 { + fn lerp(self, other: Self, alpha: f32) -> Self { (self * (1.0 - alpha)) + (other * alpha) } } -impl Lerp for Transform { - fn lerp(self, other: Transform, alpha: f32) -> Transform { +impl Lerp for Xform { + fn lerp(self, other: Self, alpha: f32) -> Self { (self * (1.0 - alpha)) + (other * alpha) } } impl Lerp for Normal { - fn lerp(self, other: Normal, alpha: f32) -> Normal { + fn lerp(self, other: Self, alpha: f32) -> Self { (self * (1.0 - alpha)) + (other * alpha) } } impl Lerp for Point { - fn lerp(self, other: Point, alpha: f32) -> Point { - let s = self; - let o = other; - Point { - co: (s.co * (1.0 - alpha)) + (o.co * alpha), - } + fn lerp(self, other: Self, alpha: f32) -> Self { + let a = self.0; + let b = other.0; + Point((a * (1.0 - alpha)) + (b * alpha)) } } impl Lerp for Vector { - fn lerp(self, other: Vector, alpha: f32) -> Vector { + fn lerp(self, other: Self, alpha: f32) -> Self { (self * (1.0 - alpha)) + (other * alpha) } } diff --git a/src/light/mod.rs b/src/light/mod.rs index a45c567..817e91d 100644 --- a/src/light/mod.rs +++ b/src/light/mod.rs @@ -6,7 +6,7 @@ use std::fmt::Debug; use crate::{ color::SpectralSample, - math::{Normal, Point, Transform, Vector}, + math::{Normal, Point, Vector, XformFull}, surface::Surface, }; @@ -34,7 +34,7 @@ pub trait SurfaceLight: Surface { /// - The pdf of the sample. fn sample_from_point( &self, - space: &Transform, + space: &XformFull, arr: Point, u: f32, v: f32, diff --git a/src/light/rectangle_light.rs b/src/light/rectangle_light.rs index 5460dfc..ab183c1 100644 --- a/src/light/rectangle_light.rs +++ b/src/light/rectangle_light.rs @@ -5,7 +5,7 @@ use crate::{ boundable::Boundable, color::{Color, SpectralSample}, lerp::lerp_slice, - math::{cross, dot, Normal, Point, Transform, Vector}, + math::{cross, dot, Normal, Point, Vector, Xform, XformFull}, ray::{RayBatch, RayStack}, sampling::{ spherical_triangle_solid_angle, triangle_surface_area, uniform_sample_spherical_triangle, @@ -51,7 +51,7 @@ impl<'a> RectangleLight<'a> { // more efficiently by inlining it there. fn sample_pdf( &self, - space: &Transform, + space: &XformFull, arr: Point, sample_dir: Vector, hit_point: Point, @@ -64,11 +64,10 @@ impl<'a> RectangleLight<'a> { let dim = lerp_slice(self.dimensions, time); // Get the four corners of the rectangle, transformed into world space - let space_inv = space.inverse(); - let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv; - let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv; - let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv; - let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv; + let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space); + let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space); + let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space); + let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space); // Get the four corners of the rectangle, projected on to the unit // sphere centered around arr. @@ -82,7 +81,7 @@ impl<'a> RectangleLight<'a> { let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3); // World-space surface normal - let normal = Normal::new(0.0, 0.0, 1.0) * space_inv; + let normal = Normal::new(0.0, 0.0, 1.0).xform(space); // PDF if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD { @@ -97,7 +96,7 @@ impl<'a> RectangleLight<'a> { // fn outgoing( // &self, - // space: &Transform, + // space: &XformFull, // dir: Vector, // u: f32, // v: f32, @@ -120,7 +119,7 @@ impl<'a> RectangleLight<'a> { impl<'a> SurfaceLight for RectangleLight<'a> { fn sample_from_point( &self, - space: &Transform, + space: &XformFull, arr: Point, u: f32, v: f32, @@ -135,11 +134,10 @@ impl<'a> SurfaceLight for RectangleLight<'a> { let surface_area_inv: f64 = 1.0 / surface_area; // Get the four corners of the rectangle, transformed into world space - let space_inv = space.inverse(); - let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv; - let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv; - let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv; - let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv; + let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space); + let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space); + let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space); + let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space); // Get the four corners of the rectangle relative to arr. let lp1 = p1 - arr; @@ -158,7 +156,7 @@ impl<'a> SurfaceLight for RectangleLight<'a> { let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3); // Calculate world-space surface normal - let normal = Normal::new(0.0, 0.0, 1.0) * space_inv; + let normal = Normal::new(0.0, 0.0, 1.0).xform(space); if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD { // Simple sampling for more distant lights @@ -215,18 +213,16 @@ impl<'a> SurfaceLight for RectangleLight<'a> { }; // Project shadow_vec back onto the light's surface - let arr_local = arr * *space; - let shadow_vec_local = shadow_vec * *space; + let arr_local = arr.xform_inv(space); + let shadow_vec_local = shadow_vec.xform_inv(space); let shadow_vec_local = shadow_vec_local * (-arr_local.z() / shadow_vec_local.z()); let mut sample_point_local = arr_local + shadow_vec_local; { let x = sample_point_local.x().max(dim.0 * -0.5).min(dim.0 * 0.5); let y = sample_point_local.y().max(dim.1 * -0.5).min(dim.1 * 0.5); - sample_point_local.set_x(x); - sample_point_local.set_y(y); - sample_point_local.set_z(0.0); + sample_point_local = Point::new(x, y, 0.0); } - let sample_point = sample_point_local * space_inv; + let sample_point = sample_point_local.xform(space); let point_err = 0.0001; // TODO: this is a hack, do properly. // Calculate pdf and light energy @@ -261,7 +257,7 @@ impl<'a> Surface for RectangleLight<'a> { ray_stack: &mut RayStack, isects: &mut [SurfaceIntersection], shader: &dyn SurfaceShader, - space: &[Transform], + space: &[Xform], ) { let _ = shader; // Silence 'unused' warning @@ -275,13 +271,17 @@ impl<'a> Surface for RectangleLight<'a> { let dim = lerp_slice(self.dimensions, time); let xform = lerp_slice(space, time); - let space_inv = xform.inverse(); + let space = if let Some(xform) = xform.into_full() { + xform + } else { + return; + }; // Get the four corners of the rectangle, transformed into world space - let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv; - let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv; - let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv; - let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv; + let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(&space); + let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(&space); + let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(&space); + let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(&space); // Test against two triangles that make up the light let ray_pre = triangle::RayTriPrecompute::new(dir); @@ -302,9 +302,9 @@ impl<'a> Surface for RectangleLight<'a> { pos_err: pos_err, nor: normal, nor_g: normal, - local_space: xform, + local_space: space, sample_pdf: self.sample_pdf( - &xform, + &space, orig, dir, pos, diff --git a/src/light/sphere_light.rs b/src/light/sphere_light.rs index 03ea40a..a7ef6ab 100644 --- a/src/light/sphere_light.rs +++ b/src/light/sphere_light.rs @@ -7,7 +7,7 @@ use crate::{ boundable::Boundable, color::{Color, SpectralSample}, lerp::lerp_slice, - math::{coordinate_system_from_vector, dot, Normal, Point, Transform, Vector}, + math::{coordinate_system_from_vector, dot, Normal, Point, Vector, Xform, XformFull}, ray::{RayBatch, RayStack}, sampling::{uniform_sample_cone, uniform_sample_cone_pdf, uniform_sample_sphere}, shading::surface_closure::SurfaceClosure, @@ -50,7 +50,7 @@ impl<'a> SphereLight<'a> { // more efficiently by inlining it there. fn sample_pdf( &self, - space: &Transform, + space: &XformFull, arr: Point, sample_dir: Vector, sample_u: f32, @@ -61,7 +61,7 @@ impl<'a> SphereLight<'a> { // We're not using these, silence warnings let _ = (sample_dir, sample_u, sample_v, wavelength); - let arr = arr * *space; + let arr = arr.xform_inv(space); let pos = Point::new(0.0, 0.0, 0.0); let radius: f64 = lerp_slice(self.radii, time) as f64; @@ -84,7 +84,7 @@ impl<'a> SphereLight<'a> { impl<'a> SurfaceLight for SphereLight<'a> { fn sample_from_point( &self, - space: &Transform, + space: &XformFull, arr: Point, u: f32, v: f32, @@ -92,12 +92,9 @@ impl<'a> SurfaceLight for SphereLight<'a> { time: f32, ) -> (SpectralSample, (Point, Normal, f32), f32) { // TODO: track fp error due to transforms - let arr = arr * *space; + let arr = arr.xform_inv(space); let pos = Point::new(0.0, 0.0, 0.0); - // Precalculate local->world space transform matrix - let inv_space = space.inverse(); - // Calculate time interpolated values let radius: f64 = lerp_slice(self.radii, time) as f64; let col = lerp_slice(self.colors, time); @@ -115,7 +112,7 @@ impl<'a> SurfaceLight for SphereLight<'a> { // TODO: do this properly. This is a total hack. let sample_point_err = { let v = Vector::new(radius as f32, radius as f32, radius as f32); - let v2 = v * inv_space; + let v2 = v.xform(space); v2.length() * SAMPLE_POINT_FUDGE }; @@ -159,8 +156,8 @@ impl<'a> SurfaceLight for SphereLight<'a> { let normal = (arr + sample_vec).into_vector().normalized(); let point = normal * radius as f32; ( - point.into_point() * inv_space, - normal.into_normal() * inv_space, + point.into_point().xform(space), + normal.into_normal().xform(space), ) }; let pdf = uniform_sample_cone_pdf(cos_theta_max); @@ -177,8 +174,8 @@ impl<'a> SurfaceLight for SphereLight<'a> { let normal = (arr + sample_vec).into_vector().normalized(); let point = normal * radius as f32; ( - point.into_point() * inv_space, - normal.into_normal() * inv_space, + point.into_point().xform(space), + normal.into_normal().xform(space), ) }; let pdf = 1.0 / (4.0 * PI_64); @@ -210,7 +207,7 @@ impl<'a> Surface for SphereLight<'a> { ray_stack: &mut RayStack, isects: &mut [SurfaceIntersection], shader: &dyn SurfaceShader, - space: &[Transform], + space: &[Xform], ) { let _ = shader; // Silence 'unused' warning @@ -218,14 +215,18 @@ impl<'a> Surface for SphereLight<'a> { let time = rays.time(ray_idx); // Get the transform space - let xform = lerp_slice(space, time); + let xform = if let Some(xform) = lerp_slice(space, time).into_full() { + xform + } else { + return; + }; // Get the radius of the sphere at the ray's time let radius = lerp_slice(self.radii, time); // Radius of the sphere // Get the ray origin and direction in local space let orig = rays.orig_local(ray_idx).into_vector(); - let dir = rays.dir(ray_idx) * xform; + let dir = rays.dir(ray_idx).xform_inv(&xform); // Code adapted to Rust from https://github.com/Tecla/Rayito // Ray-sphere intersection can result in either zero, one or two points @@ -286,18 +287,16 @@ impl<'a> Surface for SphereLight<'a> { isects[ray_idx] = SurfaceIntersection::Occlude; rays.mark_done(ray_idx); } else { - let inv_xform = xform.inverse(); - // Position is calculated from the local-space ray and t, and then // re-projected onto the surface of the sphere. let t_pos = orig + (dir * t); let unit_pos = t_pos.normalized(); - let pos = (unit_pos * radius * inv_xform).into_point(); + let pos = (unit_pos * radius).xform(&xform).into_point(); // TODO: proper error bounds. let pos_err = 0.001; - let normal = unit_pos.into_normal() * inv_xform; + let normal = unit_pos.into_normal().xform(&xform); let intersection_data = SurfaceIntersectionData { incoming: rays.dir(ray_idx), diff --git a/src/math.rs b/src/math.rs index fec2f06..b29f6b1 100644 --- a/src/math.rs +++ b/src/math.rs @@ -2,7 +2,9 @@ use std::f32; -pub use math3d::{cross, dot, CrossProduct, DotProduct, Normal, Point, Transform, Vector}; +pub use rmath::{ + cross, dot, wide4::Float4, CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull, +}; /// Clamps a value between a min and max. pub fn clamp(v: T, lower: T, upper: T) -> T { diff --git a/src/parse/psy.rs b/src/parse/psy.rs index 4d2f6f5..b6ea518 100644 --- a/src/parse/psy.rs +++ b/src/parse/psy.rs @@ -10,7 +10,7 @@ use crate::{ camera::Camera, color::{rec709_e_to_xyz, Color}, light::WorldLightSource, - math::Transform, + math::Xform, renderer::Renderer, scene::Scene, scene::World, @@ -553,17 +553,17 @@ fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result, Ps } } -pub fn parse_matrix(contents: &str) -> Result { +pub fn parse_matrix(contents: &str) -> Result { if let IResult::Ok((leftover, ns)) = all_consuming(tuple(( ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, )))(contents) { if leftover.is_empty() { - return Ok(Transform::new_from_values( + return Ok(Xform::new( // We throw away the last row, since it's not necessarily affine. // TODO: is there a more correct way to handle this? - ns.0, ns.4, ns.8, ns.12, ns.1, ns.5, ns.9, ns.13, ns.2, ns.6, ns.10, ns.14, + ns.0, ns.1, ns.2, ns.4, ns.5, ns.6, ns.8, ns.9, ns.10, ns.12, ns.13, ns.14, )); } } diff --git a/src/ray.rs b/src/ray.rs index daf29ab..035cdf2 100644 --- a/src/ray.rs +++ b/src/ray.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] -use glam::BVec4A; +use rmath::wide4::Bool4; -use crate::math::{Point, Transform, Vector}; +use crate::math::{Point, Vector, XformFull}; type RayIndexType = u16; type FlagType = u8; @@ -85,9 +85,7 @@ impl RayBatch { pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) { self.hot[idx].orig_local = ray.orig; - self.hot[idx].dir_inv_local = Vector { - co: ray.dir.co.recip(), - }; + self.hot[idx].dir_inv_local = Vector(ray.dir.0.recip()); self.hot[idx].max_t = ray.max_t; self.hot[idx].time = ray.time; self.hot[idx].flags = if is_occlusion { OCCLUSION_FLAG } else { 0 }; @@ -115,15 +113,13 @@ impl RayBatch { } /// Updates the accel data of the given ray (at index `idx`) with the - /// given world-to-local-space transform matrix. + /// given transform. /// /// This should be called when entering (and exiting) traversal of a /// new transform space. - pub fn update_local(&mut self, idx: usize, xform: &Transform) { - self.hot[idx].orig_local = self.cold[idx].orig * *xform; - self.hot[idx].dir_inv_local = Vector { - co: (self.cold[idx].dir * *xform).co.recip(), - }; + pub fn update_local(&mut self, idx: usize, xform: &XformFull) { + self.hot[idx].orig_local = self.cold[idx].orig.xform_inv(xform); + self.hot[idx].dir_inv_local = Vector((self.cold[idx].dir.xform_inv(xform)).0.recip()); } //========================================================== @@ -349,7 +345,7 @@ impl RayStack { /// indicated lanes. pub fn pop_do_next_task_and_push_rays(&mut self, output_lane_count: usize, mut handle_ray: F) where - F: FnMut(usize) -> BVec4A, + F: FnMut(usize) -> Bool4, { // Pop the task and do necessary bookkeeping. let task = self.tasks.pop().unwrap(); diff --git a/src/renderer.rs b/src/renderer.rs index 9cbd1e6..8325173 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -9,8 +9,6 @@ use std::{ use crossbeam::sync::MsQueue; use scoped_threadpool::Pool; -use glam::Vec4; - use crate::{ accel::ACCEL_NODE_RAY_TESTS, color::{map_0_1_to_wavelength, SpectralSample, XYZ}, @@ -18,7 +16,7 @@ use crate::{ hash::hash_u32, hilbert, image::Image, - math::{probit, upper_power_of_two}, + math::{probit, upper_power_of_two, Float4}, mis::power_heuristic, ray::{Ray, RayBatch}, scene::{Scene, SceneLightSample}, @@ -379,12 +377,12 @@ pub struct LightPath { wavelength: f32, next_bounce_ray: Option, - next_attenuation_fac: Vec4, + next_attenuation_fac: Float4, closure_sample_pdf: f32, - light_attenuation: Vec4, - pending_color_addition: Vec4, - color: Vec4, + light_attenuation: Float4, + pending_color_addition: Float4, + color: Float4, } #[allow(clippy::new_ret_no_self)] @@ -412,12 +410,12 @@ impl LightPath { wavelength: wavelength, next_bounce_ray: None, - next_attenuation_fac: Vec4::splat(1.0), + next_attenuation_fac: Float4::splat(1.0), closure_sample_pdf: 1.0, - light_attenuation: Vec4::splat(1.0), - pending_color_addition: Vec4::splat(0.0), - color: Vec4::splat(0.0), + light_attenuation: Float4::splat(1.0), + pending_color_addition: Float4::splat(0.0), + color: Float4::splat(0.0), }, scene.camera.generate_ray( image_plane_co.0, diff --git a/src/scene/assembly.rs b/src/scene/assembly.rs index e9d8ce9..90a296d 100644 --- a/src/scene/assembly.rs +++ b/src/scene/assembly.rs @@ -10,7 +10,7 @@ use crate::{ color::SpectralSample, lerp::lerp_slice, light::SurfaceLight, - math::{Normal, Point, Transform}, + math::{Normal, Point, Xform, XformFull}, shading::SurfaceShader, surface::{Surface, SurfaceIntersection}, transform_stack::TransformStack, @@ -21,7 +21,7 @@ pub struct Assembly<'a> { // Instance list pub instances: &'a [Instance], pub light_instances: &'a [Instance], - pub xforms: &'a [Transform], + pub xforms: &'a [Xform], // Surface shader list pub surface_shaders: &'a [&'a dyn SurfaceShader], @@ -58,15 +58,20 @@ impl<'a> Assembly<'a> { } = *intr { let sel_xform = if !xform_stack.top().is_empty() { - lerp_slice(xform_stack.top(), time) + if let Some(xform) = lerp_slice(xform_stack.top(), time).into_full() { + xform + } else { + return None; + } } else { - Transform::new() + XformFull::identity() }; + if let Some((light_i, sel_pdf, whittled_n)) = self.light_accel.select( - idata.incoming * sel_xform, - idata.pos * sel_xform, - idata.nor * sel_xform, - idata.nor_g * sel_xform, + idata.incoming.xform_inv(&sel_xform), + idata.pos.xform_inv(&sel_xform), + idata.nor.xform_inv(&sel_xform), + idata.nor_g.xform_inv(&sel_xform), &closure, time, n, @@ -76,12 +81,12 @@ impl<'a> Assembly<'a> { InstanceType::Object => { match self.objects[inst.data_index] { Object::SurfaceLight(light) => { - // Get the world-to-object space transform of the light + // Get the transform of the light. let xform = if let Some((a, b)) = inst.transform_indices { let pxforms = xform_stack.top(); let xform = lerp_slice(&self.xforms[a..b], time); if !pxforms.is_empty() { - lerp_slice(pxforms, time) * xform + lerp_slice(pxforms, time).compose(&xform) } else { xform } @@ -90,15 +95,20 @@ impl<'a> Assembly<'a> { if !pxforms.is_empty() { lerp_slice(pxforms, time) } else { - Transform::new() + Xform::identity() } - }; + } + .into_full(); // Sample the light - let (color, sample_geo, pdf) = light.sample_from_point( - &xform, idata.pos, uvw.0, uvw.1, wavelength, time, - ); - return Some((color, sample_geo, pdf, sel_pdf)); + if let Some(xform) = xform { + let (color, sample_geo, pdf) = light.sample_from_point( + &xform, idata.pos, uvw.0, uvw.1, wavelength, time, + ); + return Some((color, sample_geo, pdf, sel_pdf)); + } else { + return None; + } } _ => unimplemented!(), @@ -106,7 +116,7 @@ impl<'a> Assembly<'a> { } InstanceType::Assembly => { - // Push the world-to-object space transforms of the assembly onto + // Push the transform of the assembly onto // the transform stack. if let Some((a, b)) = inst.transform_indices { xform_stack.push(&self.xforms[a..b]); @@ -152,7 +162,7 @@ pub struct AssemblyBuilder<'a> { // Instance list instances: Vec, - xforms: Vec, + xforms: Vec, // Shader list surface_shaders: Vec<&'a dyn SurfaceShader>, @@ -224,7 +234,7 @@ impl<'a> AssemblyBuilder<'a> { &mut self, name: &str, surface_shader_name: Option<&str>, - xforms: Option<&[Transform]>, + xforms: Option<&[Xform]>, ) { // Make sure name exists if !self.name_exists(name) { @@ -380,7 +390,7 @@ impl<'a> AssemblyBuilder<'a> { // Transform the bounding boxes, if necessary if let Some((xstart, xend)) = inst.transform_indices { let xf = &self.xforms[xstart..xend]; - transform_bbox_slice_from(&bbs, xf, &mut bbs2); + transform_bbox_slice_from(&bbs, xf, &mut bbs2).unwrap(); } else { bbs2.clear(); bbs2.extend(bbs); diff --git a/src/shading/surface_closure.rs b/src/shading/surface_closure.rs index b1f3741..6e0aa16 100644 --- a/src/shading/surface_closure.rs +++ b/src/shading/surface_closure.rs @@ -2,12 +2,10 @@ use std::f32::consts::PI as PI_32; -use glam::Vec4; - use crate::{ color::{Color, SpectralSample}, lerp::{lerp, Lerp}, - math::{clamp, dot, zup_to_vec, Normal, Vector}, + math::{clamp, dot, zup_to_vec, Float4, Normal, Vector}, sampling::cosine_sample_hemisphere, }; @@ -512,7 +510,7 @@ mod ggx_closure { rev_fresnel, ); - SpectralSample::from_parts(Vec4::new(c0, c1, c2, c3), wavelength) + SpectralSample::from_parts(Float4::new(c0, c1, c2, c3), wavelength) }; // Calculate everything else diff --git a/src/surface/micropoly_batch.rs b/src/surface/micropoly_batch.rs index ccb2029..54de7a4 100644 --- a/src/surface/micropoly_batch.rs +++ b/src/surface/micropoly_batch.rs @@ -9,7 +9,7 @@ use crate::{ bbox::BBox, boundable::Boundable, lerp::lerp_slice, - math::{cross, dot, Normal, Point, Transform}, + math::{cross, dot, Normal, Point, Xform, XformFull}, ray::{RayBatch, RayStack}, shading::SurfaceClosure, }; @@ -150,13 +150,17 @@ impl<'a> MicropolyBatch<'a> { rays: &mut RayBatch, ray_stack: &mut RayStack, isects: &mut [SurfaceIntersection], - space: &[Transform], + space: &[Xform], ) { // Precalculate transform for non-motion blur cases let static_mat_space = if space.len() == 1 { - lerp_slice(space, 0.0).inverse() + if let Some(xform) = space[0].into_full() { + xform + } else { + return; + } } else { - Transform::new() + XformFull::identity() }; self.accel @@ -182,11 +186,11 @@ impl<'a> MicropolyBatch<'a> { ); if !space.is_empty() { (*tri_cache[i].as_mut_ptr()).0 = - (*tri_cache[i].as_mut_ptr()).0 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).0.xform(&static_mat_space); (*tri_cache[i].as_mut_ptr()).1 = - (*tri_cache[i].as_mut_ptr()).1 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).1.xform(&static_mat_space); (*tri_cache[i].as_mut_ptr()).2 = - (*tri_cache[i].as_mut_ptr()).2 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).2.xform(&static_mat_space); } } } @@ -205,7 +209,11 @@ impl<'a> MicropolyBatch<'a> { // Calculate the ray space, if necessary. let mat_space = if space.len() > 1 { // Per-ray transform, for motion blur - lerp_slice(space, ray_time).inverse() + if let Some(xform) = lerp_slice(space, ray_time).into_full() { + xform + } else { + return; + } } else { static_mat_space }; @@ -251,9 +259,9 @@ impl<'a> MicropolyBatch<'a> { }; if !space.is_empty() { - tri.0 = tri.0 * mat_space; - tri.1 = tri.1 * mat_space; - tri.2 = tri.2 * mat_space; + tri.0 = tri.0.xform(&mat_space); + tri.1 = tri.1.xform(&mat_space); + tri.2 = tri.2.xform(&mat_space); } tri @@ -311,7 +319,7 @@ impl<'a> MicropolyBatch<'a> { let n1 = lerp_slice(n1_slice, ray_time).normalized(); let n2 = lerp_slice(n2_slice, ray_time).normalized(); - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space); if dot(s_nor, geo_normal) >= 0.0 { s_nor } else { diff --git a/src/surface/mod.rs b/src/surface/mod.rs index a718f4d..6b20132 100644 --- a/src/surface/mod.rs +++ b/src/surface/mod.rs @@ -10,7 +10,7 @@ use std::fmt::Debug; use crate::{ boundable::Boundable, - math::{Normal, Point, Transform, Vector}, + math::{Normal, Point, Vector, Xform, XformFull}, ray::{RayBatch, RayStack}, shading::surface_closure::SurfaceClosure, shading::SurfaceShader, @@ -25,7 +25,7 @@ pub trait Surface: Boundable + Debug + Sync { ray_stack: &mut RayStack, isects: &mut [SurfaceIntersection], shader: &dyn SurfaceShader, - space: &[Transform], + space: &[Xform], ); } @@ -80,13 +80,13 @@ pub enum SurfaceIntersection { #[derive(Debug, Copy, Clone)] pub struct SurfaceIntersectionData { - pub incoming: Vector, // Direction of the incoming ray - pub pos: Point, // Position of the intersection + pub incoming: Vector, // Direction of the incoming ray. + pub pos: Point, // Position of the intersection. pub pos_err: f32, // Error magnitude of the intersection position. Imagine // a cube centered around `pos` with dimensions of `2 * pos_err`. - pub nor: Normal, // Shading normal - pub nor_g: Normal, // True geometric normal - pub local_space: Transform, // Matrix from global space to local space - pub t: f32, // Ray t-value at the intersection point - pub sample_pdf: f32, // The PDF of getting this point by explicitly sampling the surface + pub nor: Normal, // Shading normal. + pub nor_g: Normal, // True geometric normal. + pub local_space: XformFull, // Matrix from local to world space. + pub t: f32, // Ray t-value at the intersection point. + pub sample_pdf: f32, // The PDF of getting this point by explicitly sampling the surface. } diff --git a/src/surface/triangle.rs b/src/surface/triangle.rs index 5e0f60f..25dd17a 100644 --- a/src/surface/triangle.rs +++ b/src/surface/triangle.rs @@ -162,7 +162,7 @@ pub fn surface_point(tri: (Point, Point, Point), bary: (f32, f32, f32)) -> (Poin + (tri.1.into_vector().abs() * bary.1) + (tri.2.into_vector().abs() * bary.2)) * fp_gamma(7)) - .co + .0 .max_element(); (pos, pos_err) diff --git a/src/surface/triangle_mesh.rs b/src/surface/triangle_mesh.rs index 6b16ab6..ae82dac 100644 --- a/src/surface/triangle_mesh.rs +++ b/src/surface/triangle_mesh.rs @@ -7,7 +7,7 @@ use crate::{ bbox::BBox, boundable::Boundable, lerp::lerp_slice, - math::{cross, dot, Normal, Point, Transform}, + math::{cross, dot, Normal, Point, Xform, XformFull}, ray::{RayBatch, RayStack}, shading::SurfaceShader, }; @@ -128,13 +128,17 @@ impl<'a> Surface for TriangleMesh<'a> { ray_stack: &mut RayStack, isects: &mut [SurfaceIntersection], shader: &dyn SurfaceShader, - space: &[Transform], + space: &[Xform], ) { // Precalculate transform for non-motion blur cases let static_mat_space = if space.len() == 1 { - lerp_slice(space, 0.0).inverse() + if let Some(xform) = lerp_slice(space, 0.0).into_full() { + xform + } else { + return; + } } else { - Transform::new() + XformFull::identity() }; self.accel @@ -160,11 +164,11 @@ impl<'a> Surface for TriangleMesh<'a> { ); if !space.is_empty() { (*tri_cache[i].as_mut_ptr()).0 = - (*tri_cache[i].as_mut_ptr()).0 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).0.xform(&static_mat_space); (*tri_cache[i].as_mut_ptr()).1 = - (*tri_cache[i].as_mut_ptr()).1 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).1.xform(&static_mat_space); (*tri_cache[i].as_mut_ptr()).2 = - (*tri_cache[i].as_mut_ptr()).2 * static_mat_space; + (*tri_cache[i].as_mut_ptr()).2.xform(&static_mat_space); } } } @@ -183,7 +187,11 @@ impl<'a> Surface for TriangleMesh<'a> { // Calculate the ray space, if necessary. let mat_space = if space.len() > 1 { // Per-ray transform, for motion blur - lerp_slice(space, ray_time).inverse() + if let Some(xform) = lerp_slice(space, ray_time).into_full() { + xform + } else { + return; + } } else { static_mat_space }; @@ -229,9 +237,9 @@ impl<'a> Surface for TriangleMesh<'a> { }; if !space.is_empty() { - tri.0 = tri.0 * mat_space; - tri.1 = tri.1 * mat_space; - tri.2 = tri.2 * mat_space; + tri.0 = tri.0.xform(&mat_space); + tri.1 = tri.1.xform(&mat_space); + tri.2 = tri.2.xform(&mat_space); } tri @@ -289,7 +297,7 @@ impl<'a> Surface for TriangleMesh<'a> { let n1 = lerp_slice(n1_slice, ray_time).normalized(); let n2 = lerp_slice(n2_slice, ray_time).normalized(); - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space; + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space); if dot(s_nor, geo_normal) >= 0.0 { s_nor } else { diff --git a/src/tracer.rs b/src/tracer.rs index 8e85db6..88833d3 100644 --- a/src/tracer.rs +++ b/src/tracer.rs @@ -4,7 +4,7 @@ use crate::{ accel::ray_code, color::{rec709_to_xyz, Color}, lerp::lerp_slice, - math::Transform, + math::XformFull, ray::{RayBatch, RayStack}, scene::{Assembly, InstanceType, Object}, shading::{SimpleSurfaceShader, SurfaceShader}, @@ -63,7 +63,7 @@ impl<'a> TracerInner<'a> { // Prep the accel part of the rays. { - let ident = Transform::new(); + let ident = XformFull::identity(); for i in 0..rays.len() { rays.update_local(i, &ident); } @@ -105,7 +105,15 @@ impl<'a> TracerInner<'a> { let xforms = self.xform_stack.top(); ray_stack.do_next_task(|ray_idx| { let t = rays.time(ray_idx); - rays.update_local(ray_idx, &lerp_slice(xforms, t)); + rays.update_local( + ray_idx, + &if let Some(xform) = lerp_slice(xforms, t).into_full() { + xform + } else { + // TODO: filter out ray instead. + XformFull::identity() + }, + ); }); ray_stack.duplicate_next_task(); } @@ -137,10 +145,18 @@ impl<'a> TracerInner<'a> { if !xforms.is_empty() { ray_stack.pop_do_next_task(|ray_idx| { let t = rays.time(ray_idx); - rays.update_local(ray_idx, &lerp_slice(xforms, t)); + rays.update_local( + ray_idx, + &if let Some(xform) = lerp_slice(xforms, t).into_full() { + xform + } else { + // TODO: filter out ray instead. + XformFull::identity() + }, + ); }); } else { - let ident = Transform::new(); + let ident = XformFull::identity(); ray_stack.pop_do_next_task(|ray_idx| { rays.update_local(ray_idx, &ident); }); diff --git a/src/transform_stack.rs b/src/transform_stack.rs index 8219799..ce5356d 100644 --- a/src/transform_stack.rs +++ b/src/transform_stack.rs @@ -3,10 +3,10 @@ use std::{ mem::{transmute, MaybeUninit}, }; -use crate::{algorithm::merge_slices_to, math::Transform}; +use crate::{algorithm::merge_slices_to, math::Xform}; pub struct TransformStack { - stack: Vec>, + stack: Vec>, stack_indices: Vec, } @@ -30,11 +30,11 @@ impl TransformStack { self.stack_indices.push(0); } - pub fn push(&mut self, xforms: &[Transform]) { + pub fn push(&mut self, xforms: &[Xform]) { assert!(!xforms.is_empty()); if self.stack.is_empty() { - let xforms: &[MaybeUninit] = unsafe { transmute(xforms) }; + let xforms: &[MaybeUninit] = unsafe { transmute(xforms) }; self.stack.extend(xforms); } else { let sil = self.stack_indices.len(); @@ -54,7 +54,7 @@ impl TransformStack { unsafe { transmute(&xfs1[i1..i2]) }, xforms, xfs2, - |xf1, xf2| *xf1 * *xf2, + |xf1, xf2| xf2.compose(xf1), ); } @@ -73,7 +73,7 @@ impl TransformStack { self.stack_indices.pop(); } - pub fn top(&self) -> &[Transform] { + pub fn top(&self) -> &[Xform] { let sil = self.stack_indices.len(); let i1 = self.stack_indices[sil - 2]; let i2 = self.stack_indices[sil - 1]; diff --git a/sub_crates/rmath/src/normal.rs b/sub_crates/rmath/src/normal.rs index 950ac38..64160a8 100644 --- a/sub_crates/rmath/src/normal.rs +++ b/sub_crates/rmath/src/normal.rs @@ -56,6 +56,16 @@ impl Normal { self.0.c() } + #[inline(always)] + pub fn get_n(self, i: usize) -> f32 { + match i { + 0 => self.x(), + 1 => self.y(), + 2 => self.z(), + _ => panic!("Out of bounds index into 3D vector."), + } + } + #[inline(always)] #[must_use] pub fn set_x(self, x: f32) -> Self { diff --git a/sub_crates/rmath/src/point.rs b/sub_crates/rmath/src/point.rs index 669335b..f260d74 100644 --- a/sub_crates/rmath/src/point.rs +++ b/sub_crates/rmath/src/point.rs @@ -47,6 +47,16 @@ impl Point { self.0.c() } + #[inline(always)] + pub fn get_n(self, i: usize) -> f32 { + match i { + 0 => self.x(), + 1 => self.y(), + 2 => self.z(), + _ => panic!("Out of bounds index into 3D vector."), + } + } + #[inline(always)] #[must_use] pub fn set_x(self, x: f32) -> Self { diff --git a/sub_crates/rmath/src/vector.rs b/sub_crates/rmath/src/vector.rs index 8cdf424..21491a2 100644 --- a/sub_crates/rmath/src/vector.rs +++ b/sub_crates/rmath/src/vector.rs @@ -37,6 +37,11 @@ impl Vector { Self(self.0 / self.length()) } + #[inline(always)] + pub fn abs(self) -> Self { + Self(self.0.abs()) + } + #[inline(always)] pub fn into_point(self) -> Point { Point(self.0) @@ -62,6 +67,16 @@ impl Vector { self.0.c() } + #[inline(always)] + pub fn get_n(self, i: usize) -> f32 { + match i { + 0 => self.x(), + 1 => self.y(), + 2 => self.z(), + _ => panic!("Out of bounds index into 3D vector."), + } + } + #[inline(always)] #[must_use] pub fn set_x(self, x: f32) -> Self { diff --git a/sub_crates/rmath/src/wide4.rs b/sub_crates/rmath/src/wide4.rs index 6398d41..4e449e2 100644 --- a/sub_crates/rmath/src/wide4.rs +++ b/sub_crates/rmath/src/wide4.rs @@ -9,7 +9,7 @@ use crate::{difference_of_products, two_prod, two_sum}; pub use fallback::{Bool4, Float4}; mod fallback { - use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Sub}; + use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub}; use crate::FMulAdd; @@ -65,28 +65,44 @@ mod fallback { ]) } - // /// Horizontal minimum. - // #[inline(always)] - // pub fn hmin(self) -> f32 { - // let a = self.0[0].min(self.0[1]); - // let b = self.0[2].min(self.0[3]); - // a.min(b) - // } + /// Horizontal minimum. + #[inline(always)] + pub fn min_element(self) -> f32 { + let a = self.0[0].min(self.0[1]); + let b = self.0[2].min(self.0[3]); + a.min(b) + } - // /// Horizontal maximum. - // #[inline(always)] - // pub fn hmax(self) -> f32 { - // let a = self.0[0].max(self.0[1]); - // let b = self.0[2].max(self.0[3]); - // a.max(b) - // } + /// Horizontal maximum. + #[inline(always)] + pub fn max_element(self) -> f32 { + let a = self.0[0].max(self.0[1]); + let b = self.0[2].max(self.0[3]); + a.max(b) + } + + /// 1.0 / self + #[inline(always)] + pub fn recip(self) -> Self { + Float4::splat(1.0) / self + } + + #[inline(always)] + pub fn abs(self) -> Self { + Float4::new( + self.a().abs(), + self.b().abs(), + self.c().abs(), + self.d().abs(), + ) + } //----------------------------------------------------- // Comparisons. /// Less than. #[inline(always)] - pub fn lt(self, rhs: Self) -> Bool4 { + pub fn cmplt(self, rhs: Self) -> Bool4 { Bool4([ self.0[0] < rhs.0[0], self.0[1] < rhs.0[1], @@ -97,7 +113,7 @@ mod fallback { /// Less than or equal. #[inline(always)] - pub fn lte(self, rhs: Self) -> Bool4 { + pub fn cmplte(self, rhs: Self) -> Bool4 { Bool4([ self.0[0] <= rhs.0[0], self.0[1] <= rhs.0[1], @@ -108,7 +124,7 @@ mod fallback { /// Greater than. #[inline(always)] - pub fn gt(self, rhs: Self) -> Bool4 { + pub fn cmpgt(self, rhs: Self) -> Bool4 { Bool4([ self.0[0] > rhs.0[0], self.0[1] > rhs.0[1], @@ -119,7 +135,7 @@ mod fallback { /// Greater than or equal. #[inline(always)] - pub fn gte(self, rhs: Self) -> Bool4 { + pub fn cmpgte(self, rhs: Self) -> Bool4 { Bool4([ self.0[0] >= rhs.0[0], self.0[1] >= rhs.0[1], @@ -130,7 +146,7 @@ mod fallback { /// Equal. #[inline(always)] - pub fn eq(self, rhs: Self) -> Bool4 { + pub fn cmpeq(self, rhs: Self) -> Bool4 { Bool4([ self.0[0] == rhs.0[0], self.0[1] == rhs.0[1], @@ -232,6 +248,15 @@ mod fallback { } } + impl Index for Float4 { + type Output = f32; + + #[inline(always)] + fn index(&self, idx: usize) -> &f32 { + &self.0[idx] + } + } + impl Add for Float4 { type Output = Self; @@ -339,18 +364,34 @@ mod fallback { pub struct Bool4([bool; 4]); impl Bool4 { + #[inline(always)] + pub fn new_false() -> Self { + Self([false, false, false, false]) + } + #[inline(always)] pub fn to_bools(self) -> [bool; 4] { self.0 } + /// Note: `a` goes to the least significant bit. #[inline(always)] - pub fn to_bitmask(self) -> u8 { + pub fn bitmask(self) -> u8 { self.0[0] as u8 | ((self.0[1] as u8) << 1) | ((self.0[2] as u8) << 2) | ((self.0[3] as u8) << 3) } + + #[inline(always)] + pub fn any(self) -> bool { + self.0[0] | &self.0[1] | self.0[2] | self.0[3] + } + + #[inline(always)] + pub fn all(self) -> bool { + self.0[0] & &self.0[1] & self.0[2] & self.0[3] + } } impl BitAnd for Bool4 { @@ -698,6 +739,12 @@ impl Float4 { } } +impl From for (f32, f32, f32, f32) { + fn from(v: Float4) -> (f32, f32, f32, f32) { + (v.a(), v.b(), v.c(), v.d()) + } +} + impl AddAssign for Float4 { #[inline(always)] fn add_assign(&mut self, rhs: Self) { @@ -743,7 +790,7 @@ impl DivAssign for Float4 { impl PartialEq for Float4 { #[inline(always)] fn eq(&self, rhs: &Self) -> bool { - Self::eq(*self, *rhs).to_bitmask() == 0b1111 + self.cmpeq(*rhs).bitmask() == 0b1111 } } diff --git a/sub_crates/rmath/src/xform.rs b/sub_crates/rmath/src/xform.rs index 1e476ec..a6479c1 100644 --- a/sub_crates/rmath/src/xform.rs +++ b/sub_crates/rmath/src/xform.rs @@ -184,6 +184,24 @@ pub struct XformFull { pub t: Float4, // Forward translation. } +impl XformFull { + pub fn identity() -> Self { + Self { + m: [ + Float4::new(1.0, 0.0, 0.0, 0.0), + Float4::new(0.0, 1.0, 0.0, 0.0), + Float4::new(0.0, 0.0, 1.0, 0.0), + ], + m_inv: [ + Float4::new(1.0, 0.0, 0.0, 0.0), + Float4::new(0.0, 1.0, 0.0, 0.0), + Float4::new(0.0, 0.0, 1.0, 0.0), + ], + t: Float4::splat(0.0), + } + } +} + //------------------------------------------------------------- #[cfg(test)] diff --git a/sub_crates/spectral_upsampling/Cargo.toml b/sub_crates/spectral_upsampling/Cargo.toml index 88be212..939bcf1 100644 --- a/sub_crates/spectral_upsampling/Cargo.toml +++ b/sub_crates/spectral_upsampling/Cargo.toml @@ -10,4 +10,4 @@ name = "spectral_upsampling" path = "src/lib.rs" [dependencies] -glam = "0.15" \ No newline at end of file +rmath = { path = "../rmath" } \ No newline at end of file diff --git a/sub_crates/spectral_upsampling/src/jakob.rs b/sub_crates/spectral_upsampling/src/jakob.rs index 9a8e07c..9288dd0 100644 --- a/sub_crates/spectral_upsampling/src/jakob.rs +++ b/sub_crates/spectral_upsampling/src/jakob.rs @@ -6,7 +6,7 @@ /// The provides similar color matching as full Jakob, at the expense of /// somewhat lower quality spectrums, and the inability to precalculate /// the coefficents for even more efficient evaluation later on. -use glam::Vec4; +use rmath::wide4::Float4; /// How many polynomial coefficients? const RGB2SPEC_N_COEFFS: usize = 3; @@ -15,7 +15,7 @@ const RGB2SPEC_N_COEFFS: usize = 3; include!(concat!(env!("OUT_DIR"), "/jakob_table_inc.rs")); #[inline] -pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { +pub fn rec709_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { small_rgb_to_spectrum_p4( REC709_TABLE, REC709_TABLE_RES, @@ -26,7 +26,7 @@ pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { } #[inline] -pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { +pub fn rec2020_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { small_rgb_to_spectrum_p4( REC2020_TABLE, REC2020_TABLE_RES, @@ -37,7 +37,7 @@ pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { } #[inline] -pub fn aces_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { +pub fn aces_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { small_rgb_to_spectrum_p4( ACES_TABLE, ACES_TABLE_RES, @@ -56,9 +56,9 @@ fn small_rgb_to_spectrum_p4( table: &[[(f32, f32, f32); 2]], table_res: usize, table_mid_value: f32, - lambdas: Vec4, + lambdas: Float4, rgb: (f32, f32, f32), -) -> Vec4 { +) -> Float4 { // Determine largest RGB component, and calculate the other two // components scaled for lookups. let (i, max_val, x, y) = if rgb.0 > rgb.1 && rgb.0 > rgb.2 { @@ -71,7 +71,7 @@ fn small_rgb_to_spectrum_p4( if max_val == 0.0 { // If max_val is zero, just return zero. This avoids NaN's from // divide by zero. This is also correct, since it's black. - return Vec4::splat(0.0); + return Float4::splat(0.0); } let x = x * 63.0 / max_val; let y = y * 63.0 / max_val; @@ -91,20 +91,20 @@ fn small_rgb_to_spectrum_p4( // Convert to SIMD format for faster interpolation. let a0 = [ - Vec4::new(a0[0].0, a0[0].1, a0[0].2, 0.0), - Vec4::new(a0[1].0, a0[1].1, a0[1].2, 0.0), + Float4::new(a0[0].0, a0[0].1, a0[0].2, 0.0), + Float4::new(a0[1].0, a0[1].1, a0[1].2, 0.0), ]; let a1 = [ - Vec4::new(a1[0].0, a1[0].1, a1[0].2, 0.0), - Vec4::new(a1[1].0, a1[1].1, a1[1].2, 0.0), + Float4::new(a1[0].0, a1[0].1, a1[0].2, 0.0), + Float4::new(a1[1].0, a1[1].1, a1[1].2, 0.0), ]; let a2 = [ - Vec4::new(a2[0].0, a2[0].1, a2[0].2, 0.0), - Vec4::new(a2[1].0, a2[1].1, a2[1].2, 0.0), + Float4::new(a2[0].0, a2[0].1, a2[0].2, 0.0), + Float4::new(a2[1].0, a2[1].1, a2[1].2, 0.0), ]; let a3 = [ - Vec4::new(a3[0].0, a3[0].1, a3[0].2, 0.0), - Vec4::new(a3[1].0, a3[1].1, a3[1].2, 0.0), + Float4::new(a3[0].0, a3[0].1, a3[0].2, 0.0), + Float4::new(a3[1].0, a3[1].1, a3[1].2, 0.0), ]; // Do interpolation. @@ -133,22 +133,22 @@ fn small_rgb_to_spectrum_p4( // Coefficient -> eval functions #[inline(always)] -fn rgb2spec_fma_4(a: Vec4, b: Vec4, c: Vec4) -> Vec4 { +fn rgb2spec_fma_4(a: Float4, b: Float4, c: Float4) -> Float4 { (a * b) + c } -fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Vec4) -> Vec4 { - let co0 = Vec4::splat(coeff[0]); - let co1 = Vec4::splat(coeff[1]); - let co2 = Vec4::splat(coeff[2]); +fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Float4) -> Float4 { + let co0 = Float4::splat(coeff[0]); + let co1 = Float4::splat(coeff[1]); + let co2 = Float4::splat(coeff[2]); let x = rgb2spec_fma_4(rgb2spec_fma_4(co0, lambda, co1), lambda, co2); let y = { // TODO: replace this with a SIMD sqrt op. - let (x, y, z, w) = rgb2spec_fma_4(x, x, Vec4::splat(1.0)).into(); - Vec4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip() + let (x, y, z, w) = rgb2spec_fma_4(x, x, Float4::splat(1.0)).into(); + Float4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip() }; - rgb2spec_fma_4(Vec4::splat(0.5) * x, y, Vec4::splat(0.5)) + rgb2spec_fma_4(Float4::splat(0.5) * x, y, Float4::splat(0.5)) } diff --git a/sub_crates/spectral_upsampling/src/meng.rs b/sub_crates/spectral_upsampling/src/meng.rs index f3ad3e7..d75c89d 100644 --- a/sub_crates/spectral_upsampling/src/meng.rs +++ b/sub_crates/spectral_upsampling/src/meng.rs @@ -6,7 +6,7 @@ use std::f32; -use glam::Vec4; +use rmath::wide4::Float4; mod meng_spectra_tables; @@ -174,7 +174,7 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 { /// /// Works on 4 wavelengths at once via SIMD. #[inline] -pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { +pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 { assert!(lambdas.min_element() >= SPECTRUM_SAMPLE_MIN); assert!(lambdas.max_element() <= SPECTRUM_SAMPLE_MAX); @@ -184,7 +184,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { if norm < f32::MAX { norm } else { - return Vec4::splat(0.0); + return Float4::splat(0.0); } }; @@ -197,7 +197,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { || uv.1 < 0.0 || uv.1 >= SPECTRUM_GRID_HEIGHT as f32 { - return Vec4::splat(0.0); + return Float4::splat(0.0); } let uvi = (uv.0 as i32, uv.1 as i32); @@ -214,11 +214,11 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { // If the cell has no points, nothing we can do, so return 0.0 if num == 0 { - return Vec4::splat(0.0); + return Float4::splat(0.0); } // Normalize lambda to spectrum table index range. - let sb: Vec4 = (lambdas - Vec4::splat(SPECTRUM_SAMPLE_MIN)) + let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN)) / (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN) * (SPECTRUM_NUM_SAMPLES as f32 - 1.0); debug_assert!(sb.min_element() >= 0.0); @@ -226,7 +226,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { // Get the spectral values for the vertices of the grid cell. // TODO: use integer SIMD intrinsics to make this part faster. - let mut p = [Vec4::splat(0.0); 6]; + let mut p = [Float4::splat(0.0); 6]; let sb0: [i32; 4] = [sb[0] as i32, sb[1] as i32, sb[2] as i32, sb[3] as i32]; assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES); let sb1: [i32; 4] = [ @@ -235,27 +235,27 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { (sb[2] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), (sb[3] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), ]; - let sbf = sb - Vec4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32); + let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32); for i in 0..(num as usize) { debug_assert!(idx[i] >= 0); let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum; - let p0 = Vec4::new( + let p0 = Float4::new( spectrum[sb0[0] as usize], spectrum[sb0[1] as usize], spectrum[sb0[2] as usize], spectrum[sb0[3] as usize], ); - let p1 = Vec4::new( + let p1 = Float4::new( spectrum[sb1[0] as usize], spectrum[sb1[1] as usize], spectrum[sb1[2] as usize], spectrum[sb1[3] as usize], ); - p[i] = p0 * (Vec4::splat(1.0) - sbf) + p1 * sbf; + p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf; } // Linearly interpolate the spectral power of the cell vertices. - let mut interpolated_p = Vec4::splat(0.0); + let mut interpolated_p = Float4::splat(0.0); if inside { // Fast path for normal inner quads: let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);