diff --git a/Cargo.lock b/Cargo.lock index 7e4b4ef..0323c91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,14 @@ dependencies = [ "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "approx" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "atty" version = "0.2.11" @@ -110,10 +118,6 @@ name = "crossbeam" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "float4" -version = "0.1.0" - [[package]] name = "fnv" version = "1.0.6" @@ -124,6 +128,14 @@ name = "fuchsia-cprng" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "glam" +version = "0.7.1" +source = "git+https://github.com/bitshifter/glam-rs.git?rev=0f314f99#0f314f990710ff9357e5896de2b55ec82fe88e0d" +dependencies = [ + "approx 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "half" version = "1.3.0" @@ -147,7 +159,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "math3d" version = "0.1.0" dependencies = [ - "float4 0.1.0", + "approx 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "glam 0.7.1 (git+https://github.com/bitshifter/glam-rs.git?rev=0f314f99)", ] [[package]] @@ -246,7 +259,7 @@ dependencies = [ "color 0.1.0", "copy_in_place 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "float4 0.1.0", + "glam 0.7.1 (git+https://github.com/bitshifter/glam-rs.git?rev=0f314f99)", "half 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "halton 0.1.0", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -449,7 +462,7 @@ version = "0.1.0" name = "spectral_upsampling" version = "0.1.0" dependencies = [ - "float4 0.1.0", + "glam 0.7.1 (git+https://github.com/bitshifter/glam-rs.git?rev=0f314f99)", ] [[package]] @@ -551,6 +564,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum approx 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0e60b75072ecd4168020818c0107f2857bb6c4e64252d8d3983f6263b40a5c3" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" "checksum base64 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "489d6c0ed21b11d038c31b6ceccca973e65d73ba3bd8ecb9a2babf5546164643" @@ -567,6 +581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +"checksum glam 0.7.1 (git+https://github.com/bitshifter/glam-rs.git?rev=0f314f99)" = "" "checksum half 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9353c2a89d550b58fa0061d8ed8d002a7d8cdf2494eb0e432859bd3a9e543836" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" "checksum libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)" = "42914d39aad277d9e176efbdad68acb1d5443ab65afe0e0e4f0d49352a950880" diff --git a/Cargo.toml b/Cargo.toml index 14ee2ac..f2fe96e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,6 @@ members = [ "sub_crates/bvh_order", "sub_crates/color", - "sub_crates/float4", "sub_crates/halton", "sub_crates/math3d", "sub_crates/mem_arena", @@ -36,6 +35,7 @@ png_encode_mini = "0.1.2" rustc-serialize = "0.3" scoped_threadpool = "0.1" time = "0.1" +glam = {git="https://github.com/bitshifter/glam-rs.git", rev="0f314f99", default-features=false, features=["approx"]} # Local crate dependencies [dependencies.bvh_order] @@ -44,9 +44,6 @@ path = "sub_crates/bvh_order" [dependencies.color] path = "sub_crates/color" -[dependencies.float4] -path = "sub_crates/float4" - [dependencies.halton] path = "sub_crates/halton" diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs index 92fce91..d87d22c 100644 --- a/src/accel/bvh4.rs +++ b/src/accel/bvh4.rs @@ -6,6 +6,8 @@ use std::mem::{transmute, MaybeUninit}; +use glam::Vec4Mask; + use mem_arena::MemArena; use crate::{ @@ -23,7 +25,6 @@ use super::{ }; use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE}; -use float4::Bool4; pub fn ray_code(dir: Vector) -> usize { let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0]; @@ -122,12 +123,12 @@ impl<'a> BVH4<'a> { traversal_code, } => { node_tests += ray_stack.ray_count_in_next_task() as u64; - let mut all_hits = Bool4::new_false(); + let mut all_hits = Vec4Mask::default(); // Ray testing ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| { if rays.is_done(ray_idx) { - Bool4::new_false() + Vec4Mask::default() } else { let hits = if bounds.len() == 1 { bounds[0].intersect_ray( @@ -148,7 +149,7 @@ impl<'a> BVH4<'a> { }); // If there were any intersections, create tasks. - if !all_hits.is_all_false() { + if all_hits.any() { let order_code = traversal_table[traversal_code as usize]; let mut lane_count = 0; let mut i = children.len() as u8; diff --git a/src/bbox.rs b/src/bbox.rs index a4a43bb..bd573ac 100644 --- a/src/bbox.rs +++ b/src/bbox.rs @@ -45,12 +45,12 @@ impl BBox { let t2 = (self.max.co - orig.co) * dir_inv.co; // Find the far and near intersection - let mut far_t = t1.v_max(t2); - let mut near_t = t1.v_min(t2); - far_t.set_3(std::f32::INFINITY); - near_t.set_3(0.0); - let far_hit_t = fast_minf32(far_t.h_min() * BBOX_MAXT_ADJUST, max_t); - let near_hit_t = near_t.h_max(); + let mut far_t = t1.max(t2); + let mut near_t = t1.min(t2); + far_t.set_w(std::f32::INFINITY); + near_t.set_w(0.0); + let far_hit_t = fast_minf32(far_t.min_element() * BBOX_MAXT_ADJUST, max_t); + let near_hit_t = near_t.max_element(); // Did we hit? near_hit_t <= far_hit_t @@ -106,10 +106,10 @@ impl BitOr for BBox { fn bitor(self, rhs: BBox) -> BBox { BBox::from_points( Point { - co: self.min.co.v_min(rhs.min.co), + co: self.min.co.min(rhs.min.co), }, Point { - co: self.max.co.v_max(rhs.max.co), + co: self.max.co.max(rhs.max.co), }, ) } @@ -128,10 +128,10 @@ impl BitOr for BBox { fn bitor(self, rhs: Point) -> BBox { BBox::from_points( Point { - co: self.min.co.v_min(rhs.co), + co: self.min.co.min(rhs.co), }, Point { - co: self.max.co.v_max(rhs.co), + co: self.max.co.max(rhs.co), }, ) } diff --git a/src/bbox4.rs b/src/bbox4.rs index 71793a4..07cb456 100644 --- a/src/bbox4.rs +++ b/src/bbox4.rs @@ -9,16 +9,16 @@ use crate::{ math::{Point, Vector}, }; -use float4::{Bool4, Float4}; +use glam::{Vec4, Vec4Mask}; const BBOX_MAXT_ADJUST: f32 = 1.00000024; /// A SIMD set of 4 3D axis-aligned bounding boxes. #[derive(Debug, Copy, Clone)] pub struct BBox4 { - pub x: (Float4, Float4), // (min, max) - pub y: (Float4, Float4), // (min, max) - pub z: (Float4, Float4), // (min, max) + pub x: (Vec4, Vec4), // (min, max) + pub y: (Vec4, Vec4), // (min, max) + pub z: (Vec4, Vec4), // (min, max) } impl BBox4 { @@ -26,16 +26,16 @@ impl BBox4 { pub fn new() -> BBox4 { BBox4 { x: ( - Float4::splat(std::f32::INFINITY), - Float4::splat(std::f32::NEG_INFINITY), + Vec4::splat(std::f32::INFINITY), + Vec4::splat(std::f32::NEG_INFINITY), ), y: ( - Float4::splat(std::f32::INFINITY), - Float4::splat(std::f32::NEG_INFINITY), + Vec4::splat(std::f32::INFINITY), + Vec4::splat(std::f32::NEG_INFINITY), ), z: ( - Float4::splat(std::f32::INFINITY), - Float4::splat(std::f32::NEG_INFINITY), + Vec4::splat(std::f32::INFINITY), + Vec4::splat(std::f32::NEG_INFINITY), ), } } @@ -45,30 +45,30 @@ impl BBox4 { pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 { BBox4 { x: ( - Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), - Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()), + Vec4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()), + Vec4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()), ), y: ( - Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), - Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()), + Vec4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()), + Vec4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()), ), z: ( - Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()), - Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()), + Vec4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()), + Vec4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()), ), } } // Returns whether the given ray intersects with the bboxes. - pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> Bool4 { + pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> Vec4Mask { // Get the ray data into SIMD format. - let ro_x = orig.co.all_0(); - let ro_y = orig.co.all_1(); - let ro_z = orig.co.all_2(); - let rdi_x = dir_inv.co.all_0(); - let rdi_y = dir_inv.co.all_1(); - let rdi_z = dir_inv.co.all_2(); - let max_t = Float4::splat(max_t); + let ro_x = Vec4::splat(orig.co.x()); + let ro_y = Vec4::splat(orig.co.y()); + let ro_z = Vec4::splat(orig.co.z()); + let rdi_x = Vec4::splat(dir_inv.co.x()); + let rdi_y = Vec4::splat(dir_inv.co.y()); + let rdi_z = Vec4::splat(dir_inv.co.z()); + let max_t = Vec4::splat(max_t); // Slab tests let t1_x = (self.x.0 - ro_x) * rdi_x; @@ -79,24 +79,21 @@ impl BBox4 { let t2_z = (self.z.1 - ro_z) * rdi_z; // Get the far and near t hits for each axis. - let t_far_x = t1_x.v_max(t2_x); - let t_far_y = t1_y.v_max(t2_y); - let t_far_z = t1_z.v_max(t2_z); - let t_near_x = t1_x.v_min(t2_x); - let t_near_y = t1_y.v_min(t2_y); - let t_near_z = t1_z.v_min(t2_z); + let t_far_x = t1_x.max(t2_x); + let t_far_y = t1_y.max(t2_y); + let t_far_z = t1_z.max(t2_z); + let t_near_x = t1_x.min(t2_x); + let t_near_y = t1_y.min(t2_y); + let t_near_z = t1_z.min(t2_z); // Calculate over-all far t hit. - let far_t = - (t_far_x.v_min(t_far_y.v_min(t_far_z)) * Float4::splat(BBOX_MAXT_ADJUST)).v_min(max_t); + let far_t = (t_far_x.min(t_far_y.min(t_far_z)) * Vec4::splat(BBOX_MAXT_ADJUST)).min(max_t); // Calculate over-all near t hit. - let near_t = t_near_x - .v_max(t_near_y) - .v_max(t_near_z.v_max(Float4::splat(0.0))); + let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Vec4::splat(0.0))); // Hit results - near_t.lt(far_t) + near_t.cmplt(far_t) } } @@ -106,9 +103,9 @@ impl BitOr for BBox4 { fn bitor(self, rhs: BBox4) -> BBox4 { BBox4 { - x: (self.x.0.v_min(rhs.x.0), self.x.1.v_max(rhs.x.1)), - y: (self.y.0.v_min(rhs.y.0), self.y.1.v_max(rhs.y.1)), - z: (self.z.0.v_min(rhs.z.0), self.z.1.v_max(rhs.z.1)), + x: (self.x.0.min(rhs.x.0), self.x.1.max(rhs.x.1)), + y: (self.y.0.min(rhs.y.0), self.y.1.max(rhs.y.1)), + z: (self.z.0.min(rhs.z.0), self.z.1.max(rhs.z.1)), } } } diff --git a/src/color.rs b/src/color.rs index 1e25e36..891a465 100644 --- a/src/color.rs +++ b/src/color.rs @@ -4,7 +4,7 @@ pub use color::{ rec709_e_to_xyz, rec709_to_xyz, xyz_to_aces_ap0, xyz_to_aces_ap0_e, xyz_to_rec709, xyz_to_rec709_e, }; -use float4::Float4; +use glam::Vec4; use half::f16; use spectral_upsampling::meng::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE}; use trifloat::signed48; @@ -31,10 +31,10 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 { } } -/// Returns all wavelengths of a hero wavelength set as a Float4 +/// Returns all wavelengths of a hero wavelength set as a Vec4 #[inline(always)] -fn wavelengths(hero_wavelength: f32) -> Float4 { - Float4::new( +fn wavelengths(hero_wavelength: f32) -> Vec4 { + Vec4::new( nth_wavelength(hero_wavelength, 0), nth_wavelength(hero_wavelength, 1), nth_wavelength(hero_wavelength, 2), @@ -94,11 +94,11 @@ impl Color { } => { SpectralSample::from_parts( // TODO: make this SIMD - Float4::new( - plancks_law(temperature, wls.get_0()) * factor, - plancks_law(temperature, wls.get_1()) * factor, - plancks_law(temperature, wls.get_2()) * factor, - plancks_law(temperature, wls.get_3()) * factor, + Vec4::new( + plancks_law(temperature, wls.x()) * factor, + plancks_law(temperature, wls.y()) * factor, + plancks_law(temperature, wls.z()) * factor, + plancks_law(temperature, wls.w()) * factor, ), hero_wavelength, ) @@ -109,11 +109,11 @@ impl Color { } => { SpectralSample::from_parts( // TODO: make this SIMD - Float4::new( - plancks_law_normalized(temperature, wls.get_0()) * factor, - plancks_law_normalized(temperature, wls.get_1()) * factor, - plancks_law_normalized(temperature, wls.get_2()) * factor, - plancks_law_normalized(temperature, wls.get_3()) * factor, + Vec4::new( + plancks_law_normalized(temperature, wls.x()) * factor, + plancks_law_normalized(temperature, wls.y()) * factor, + plancks_law_normalized(temperature, wls.z()) * factor, + plancks_law_normalized(temperature, wls.w()) * factor, ), hero_wavelength, ) @@ -388,7 +388,7 @@ fn plancks_law_normalized(temperature: f32, wavelength: f32) -> f32 { #[derive(Copy, Clone, Debug)] pub struct SpectralSample { - pub e: Float4, + pub e: Vec4, hero_wavelength: f32, } @@ -396,7 +396,7 @@ impl SpectralSample { pub fn new(wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { - e: Float4::splat(0.0), + e: Vec4::splat(0.0), hero_wavelength: wavelength, } } @@ -405,12 +405,12 @@ impl SpectralSample { pub fn from_value(value: f32, wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { - e: Float4::splat(value), + e: Vec4::splat(value), hero_wavelength: wavelength, } } - pub fn from_parts(e: Float4, wavelength: f32) -> SpectralSample { + pub fn from_parts(e: Vec4, wavelength: f32) -> SpectralSample { debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX); SpectralSample { e: e, @@ -520,10 +520,10 @@ impl XYZ { } pub fn from_spectral_sample(ss: &SpectralSample) -> XYZ { - let xyz0 = XYZ::from_wavelength(ss.wl_n(0), ss.e.get_0()); - let xyz1 = XYZ::from_wavelength(ss.wl_n(1), ss.e.get_1()); - let xyz2 = XYZ::from_wavelength(ss.wl_n(2), ss.e.get_2()); - let xyz3 = XYZ::from_wavelength(ss.wl_n(3), ss.e.get_3()); + let xyz0 = XYZ::from_wavelength(ss.wl_n(0), ss.e.x()); + let xyz1 = XYZ::from_wavelength(ss.wl_n(1), ss.e.y()); + let xyz2 = XYZ::from_wavelength(ss.wl_n(2), ss.e.z()); + let xyz3 = XYZ::from_wavelength(ss.wl_n(3), ss.e.w()); (xyz0 + xyz1 + xyz2 + xyz3) * 0.75 } @@ -601,8 +601,8 @@ impl DivAssign for XYZ { /// the method in the paper "Physically Meaningful Rendering using Tristimulus /// Colours" by Meng et al. #[inline(always)] -fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 { - spectrum_xyz_to_p_4(wavelengths, xyz) * Float4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE) +fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Vec4) -> Vec4 { + spectrum_xyz_to_p_4(wavelengths, xyz) * Vec4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE) // aces_to_spectrum_p4(wavelengths, xyz_to_aces_ap0_e(xyz)) } diff --git a/src/lerp.rs b/src/lerp.rs index fbfa659..0449c1d 100644 --- a/src/lerp.rs +++ b/src/lerp.rs @@ -73,23 +73,15 @@ impl Lerp for (T, T) { } } -impl Lerp for float4::Float4 { - fn lerp(self, other: float4::Float4, alpha: f32) -> float4::Float4 { +impl Lerp for glam::Vec4 { + fn lerp(self, other: glam::Vec4, alpha: f32) -> glam::Vec4 { (self * (1.0 - alpha)) + (other * alpha) } } impl Lerp for Matrix4x4 { fn lerp(self, other: Matrix4x4, alpha: f32) -> Matrix4x4 { - let alpha_minus = 1.0 - alpha; - Matrix4x4 { - values: [ - (self[0] * alpha_minus) + (other[0] * alpha), - (self[1] * alpha_minus) + (other[1] * alpha), - (self[2] * alpha_minus) + (other[2] * alpha), - (self[3] * alpha_minus) + (other[3] * alpha), - ], - } + (self * (1.0 - alpha)) + (other * alpha) } } diff --git a/src/ray.rs b/src/ray.rs index 7c2bc83..f2055ac 100644 --- a/src/ray.rs +++ b/src/ray.rs @@ -1,6 +1,6 @@ #![allow(dead_code)] -use float4::{Bool4, Float4}; +use glam::{Vec4, Vec4Mask}; use crate::math::{Matrix4x4, Point, Vector}; @@ -86,7 +86,7 @@ impl RayBatch { pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) { self.hot[idx].orig_local = ray.orig; self.hot[idx].dir_inv_local = Vector { - co: Float4::splat(1.0) / ray.dir.co, + co: Vec4::splat(1.0) / ray.dir.co, }; self.hot[idx].max_t = ray.max_t; self.hot[idx].time = ray.time; @@ -122,7 +122,7 @@ impl RayBatch { pub fn update_local(&mut self, idx: usize, xform: &Matrix4x4) { self.hot[idx].orig_local = self.cold[idx].orig * *xform; self.hot[idx].dir_inv_local = Vector { - co: Float4::splat(1.0) / (self.cold[idx].dir * *xform).co, + co: Vec4::splat(1.0) / (self.cold[idx].dir * *xform).co, }; } @@ -349,7 +349,7 @@ impl RayStack { /// indicated lanes. pub fn pop_do_next_task_and_push_rays(&mut self, output_lane_count: usize, mut handle_ray: F) where - F: FnMut(usize) -> Bool4, + F: FnMut(usize) -> Vec4Mask, { // Pop the task and do necessary bookkeeping. let task = self.tasks.pop().unwrap(); @@ -372,9 +372,9 @@ impl RayStack { // Execute task. for i in task_range.0..task_range.1 { let ray_idx = *unsafe { self.lanes[task.lane].idxs.get_unchecked(i) }; - let push_mask = handle_ray(ray_idx as usize); + let push_mask = handle_ray(ray_idx as usize).bitmask(); for l in 0..output_lane_count { - if push_mask.get_n(l) { + if (push_mask & (1 << l)) != 0 { self.lanes[l as usize].idxs.push(ray_idx); } } diff --git a/src/renderer.rs b/src/renderer.rs index 50d3061..d956f26 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -9,7 +9,7 @@ use std::{ use crossbeam::sync::MsQueue; use scoped_threadpool::Pool; -use float4::Float4; +use glam::Vec4; use crate::{ accel::ACCEL_NODE_RAY_TESTS, @@ -374,12 +374,12 @@ pub struct LightPath { wavelength: f32, next_bounce_ray: Option, - next_attenuation_fac: Float4, + next_attenuation_fac: Vec4, closure_sample_pdf: f32, - light_attenuation: Float4, - pending_color_addition: Float4, - color: Float4, + light_attenuation: Vec4, + pending_color_addition: Vec4, + color: Vec4, } #[allow(clippy::new_ret_no_self)] @@ -405,12 +405,12 @@ impl LightPath { wavelength: wavelength, next_bounce_ray: None, - next_attenuation_fac: Float4::splat(1.0), + next_attenuation_fac: Vec4::splat(1.0), closure_sample_pdf: 1.0, - light_attenuation: Float4::splat(1.0), - pending_color_addition: Float4::splat(0.0), - color: Float4::splat(0.0), + light_attenuation: Vec4::splat(1.0), + pending_color_addition: Vec4::splat(0.0), + color: Vec4::splat(0.0), }, scene.camera.generate_ray( image_plane_co.0, @@ -565,7 +565,7 @@ impl LightPath { // If there's any possible contribution, set up for a // light ray. - if attenuation.e.h_max() <= 0.0 { + if attenuation.e.max_element() <= 0.0 { false } else { // Calculate and store the light that will be contributed @@ -599,7 +599,7 @@ impl LightPath { }; // Check if pdf is zero, to avoid NaN's. - if (pdf > 0.0) && (filter.e.h_max() > 0.0) { + if (pdf > 0.0) && (filter.e.max_element() > 0.0) { // Account for the additional light attenuation from // this bounce self.next_attenuation_fac = filter.e; diff --git a/src/shading/surface_closure.rs b/src/shading/surface_closure.rs index be14360..10713eb 100644 --- a/src/shading/surface_closure.rs +++ b/src/shading/surface_closure.rs @@ -2,7 +2,7 @@ use std::f32::consts::PI as PI_32; -use float4::Float4; +use glam::Vec4; use crate::{ color::{Color, SpectralSample}, @@ -492,27 +492,27 @@ mod ggx_closure { let spectrum_sample = col.to_spectral_sample(wavelength); let rev_fresnel = 1.0 - fresnel; let c0 = lerp( - schlick_fresnel_from_fac(spectrum_sample.e.get_0(), hb), - spectrum_sample.e.get_0(), + schlick_fresnel_from_fac(spectrum_sample.e.x(), hb), + spectrum_sample.e.x(), rev_fresnel, ); let c1 = lerp( - schlick_fresnel_from_fac(spectrum_sample.e.get_1(), hb), - spectrum_sample.e.get_1(), + schlick_fresnel_from_fac(spectrum_sample.e.y(), hb), + spectrum_sample.e.y(), rev_fresnel, ); let c2 = lerp( - schlick_fresnel_from_fac(spectrum_sample.e.get_2(), hb), - spectrum_sample.e.get_2(), + schlick_fresnel_from_fac(spectrum_sample.e.z(), hb), + spectrum_sample.e.z(), rev_fresnel, ); let c3 = lerp( - schlick_fresnel_from_fac(spectrum_sample.e.get_3(), hb), - spectrum_sample.e.get_3(), + schlick_fresnel_from_fac(spectrum_sample.e.w(), hb), + spectrum_sample.e.w(), rev_fresnel, ); - SpectralSample::from_parts(Float4::new(c0, c1, c2, c3), wavelength) + SpectralSample::from_parts(Vec4::new(c0, c1, c2, c3), wavelength) }; // Calculate everything else diff --git a/src/surface/triangle.rs b/src/surface/triangle.rs index 4aed3a3..5e0f60f 100644 --- a/src/surface/triangle.rs +++ b/src/surface/triangle.rs @@ -163,7 +163,7 @@ pub fn surface_point(tri: (Point, Point, Point), bary: (f32, f32, f32)) -> (Poin + (tri.2.into_vector().abs() * bary.2)) * fp_gamma(7)) .co - .h_max(); + .max_element(); (pos, pos_err) } diff --git a/sub_crates/float4/Cargo.toml b/sub_crates/float4/Cargo.toml deleted file mode 100644 index 3cc0324..0000000 --- a/sub_crates/float4/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "float4" -version = "0.1.0" -authors = ["Nathan Vegdahl "] -edition = "2018" -license = "MIT" - -[lib] -name = "float4" -path = "src/lib.rs" diff --git a/sub_crates/float4/src/lib.rs b/sub_crates/float4/src/lib.rs deleted file mode 100644 index 0f081b3..0000000 --- a/sub_crates/float4/src/lib.rs +++ /dev/null @@ -1,1620 +0,0 @@ -#![allow(dead_code)] - -/// Implementation of Float4 for x86_64 platforms with SSE support. -#[cfg(all(target_arch = "x86_64", target_feature = "sse"))] -mod x86_64_sse { - use std::{ - arch::x86_64::__m128, - cmp::PartialEq, - ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}, - }; - - #[derive(Debug, Copy, Clone)] - pub struct Float4 { - data: __m128, - } - - impl Float4 { - #[inline(always)] - pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { - use std::arch::x86_64::_mm_set_ps; - Float4 { - data: unsafe { _mm_set_ps(d, c, b, a) }, - } - } - - #[inline(always)] - pub fn splat(n: f32) -> Float4 { - use std::arch::x86_64::_mm_set1_ps; - Float4 { - data: unsafe { _mm_set1_ps(n) }, - } - } - - #[inline] - pub fn h_sum(&self) -> f32 { - #[cfg(target_feature = "sse3")] - { - use std::arch::x86_64::{ - _mm_add_ps, _mm_add_ss, _mm_cvtss_f32, _mm_movehdup_ps, _mm_movehl_ps, - }; - unsafe { - let v = self.data; - let shuf = _mm_movehdup_ps(v); - let sums = _mm_add_ps(v, shuf); - let shuf = _mm_movehl_ps(shuf, sums); - let sums = _mm_add_ss(sums, shuf); - _mm_cvtss_f32(sums) - } - } - #[cfg(not(target_feature = "sse3"))] - { - use std::arch::x86_64::{ - _mm_add_ps, _mm_add_ss, _mm_cvtss_f32, _mm_movehl_ps, _mm_shuffle_ps, - }; - unsafe { - let v = self.data; - let shuf = _mm_shuffle_ps(v, v, (2 << 6) | (3 << 4) | 1); - let sums = _mm_add_ps(v, shuf); - let shuf = _mm_movehl_ps(shuf, sums); - let sums = _mm_add_ss(sums, shuf); - _mm_cvtss_f32(sums) - } - } - } - - #[inline] - pub fn h_product(&self) -> f32 { - (self.get_0() * self.get_1()) * (self.get_2() * self.get_3()) - } - - #[inline] - pub fn h_min(&self) -> f32 { - let n1 = if self.get_0() < self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() < self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 < n2 { - n1 - } else { - n2 - } - } - - #[inline] - pub fn h_max(&self) -> f32 { - let n1 = if self.get_0() > self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() > self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 > n2 { - n1 - } else { - n2 - } - } - - #[inline(always)] - pub fn v_min(&self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_min_ps; - Float4 { - data: unsafe { _mm_min_ps(self.data, other.data) }, - } - } - - #[inline(always)] - pub fn v_max(&self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_max_ps; - Float4 { - data: unsafe { _mm_max_ps(self.data, other.data) }, - } - } - - #[inline(always)] - pub fn lt(&self, other: Float4) -> Bool4 { - use std::arch::x86_64::_mm_cmplt_ps; - Bool4 { - data: unsafe { _mm_cmplt_ps(self.data, other.data) }, - } - } - - #[inline(always)] - pub fn lte(&self, other: Float4) -> Bool4 { - use std::arch::x86_64::_mm_cmple_ps; - Bool4 { - data: unsafe { _mm_cmple_ps(self.data, other.data) }, - } - } - - #[inline(always)] - pub fn gt(&self, other: Float4) -> Bool4 { - use std::arch::x86_64::_mm_cmpgt_ps; - Bool4 { - data: unsafe { _mm_cmpgt_ps(self.data, other.data) }, - } - } - - #[inline(always)] - pub fn gte(&self, other: Float4) -> Bool4 { - use std::arch::x86_64::_mm_cmpge_ps; - Bool4 { - data: unsafe { _mm_cmpge_ps(self.data, other.data) }, - } - } - - /// Set the nth element to the given value. - #[inline(always)] - pub fn set_n(&mut self, n: usize, v: f32) { - assert!( - n <= 3, - "Attempted to set element of Float4 outside of bounds." - ); - - unsafe { *(&mut self.data as *mut std::arch::x86_64::__m128 as *mut f32).add(n) = v } - } - - /// Set the 0th element to the given value. - #[inline(always)] - pub fn set_0(&mut self, v: f32) { - self.set_n(0, v); - } - - /// Set the 1th element to the given value. - #[inline(always)] - pub fn set_1(&mut self, v: f32) { - self.set_n(1, v); - } - - /// Set the 2th element to the given value. - #[inline(always)] - pub fn set_2(&mut self, v: f32) { - self.set_n(2, v); - } - - /// Set the 3th element to the given value. - #[inline(always)] - pub fn set_3(&mut self, v: f32) { - self.set_n(3, v); - } - - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(&self, n: usize) -> f32 { - assert!( - n <= 3, - "Attempted to access element of Float4 outside of bounds." - ); - - unsafe { *(&self.data as *const std::arch::x86_64::__m128 as *const f32).add(n) } - } - - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(&self) -> f32 { - self.get_n(0) - } - - /// Returns the value of the 1th element. - #[inline(always)] - pub fn get_1(&self) -> f32 { - self.get_n(1) - } - - /// Returns the value of the 2th element. - #[inline(always)] - pub fn get_2(&self) -> f32 { - self.get_n(2) - } - - /// Returns the value of the 3th element. - #[inline(always)] - pub fn get_3(&self) -> f32 { - self.get_n(3) - } - - /// Returns a Float4 with all elements set to the value - /// of element 0. - #[inline(always)] - pub fn all_0(&self) -> Float4 { - use std::arch::x86_64::_mm_shuffle_ps; - Float4 { - data: unsafe { _mm_shuffle_ps(self.data, self.data, 0b00_00_00_00) }, - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 1. - #[inline(always)] - pub fn all_1(&self) -> Float4 { - use std::arch::x86_64::_mm_shuffle_ps; - Float4 { - data: unsafe { _mm_shuffle_ps(self.data, self.data, 0b01_01_01_01) }, - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 2. - #[inline(always)] - pub fn all_2(&self) -> Float4 { - use std::arch::x86_64::_mm_shuffle_ps; - Float4 { - data: unsafe { _mm_shuffle_ps(self.data, self.data, 0b10_10_10_10) }, - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 3. - #[inline(always)] - pub fn all_3(&self) -> Float4 { - use std::arch::x86_64::_mm_shuffle_ps; - Float4 { - data: unsafe { _mm_shuffle_ps(self.data, self.data, 0b11_11_11_11) }, - } - } - - /// Returns the square roots of all elements. - #[inline(always)] - pub fn sqrt(&self) -> Float4 { - use std::arch::x86_64::_mm_sqrt_ps; - Float4 { - data: unsafe { _mm_sqrt_ps(self.data) }, - } - } - - /// Performs a fused multiply add. - /// - /// i.e. self * b + c - #[inline(always)] - pub fn fmadd(&self, b: Float4, c: Float4) -> Float4 { - #[cfg(target_feature = "fma")] - { - use std::arch::x86_64::_mm_fmadd_ps; - Float4 { - data: unsafe { _mm_fmadd_ps(self.data, b.data, c.data) }, - } - } - #[cfg(not(target_feature = "fma"))] - { - (*self * b) + c - } - } - } - - impl PartialEq for Float4 { - #[inline] - fn eq(&self, other: &Float4) -> bool { - self.get_0() == other.get_0() - && self.get_1() == other.get_1() - && self.get_2() == other.get_2() - && self.get_3() == other.get_3() - } - } - - impl Add for Float4 { - type Output = Float4; - - #[inline(always)] - fn add(self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_add_ps; - Float4 { - data: unsafe { _mm_add_ps(self.data, other.data) }, - } - } - } - - impl AddAssign for Float4 { - #[inline(always)] - fn add_assign(&mut self, rhs: Float4) { - *self = *self + rhs; - } - } - - impl Sub for Float4 { - type Output = Float4; - - #[inline(always)] - fn sub(self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_sub_ps; - Float4 { - data: unsafe { _mm_sub_ps(self.data, other.data) }, - } - } - } - - impl SubAssign for Float4 { - #[inline(always)] - fn sub_assign(&mut self, rhs: Float4) { - *self = *self - rhs; - } - } - - impl Mul for Float4 { - type Output = Float4; - - #[inline(always)] - fn mul(self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_mul_ps; - Float4 { - data: unsafe { _mm_mul_ps(self.data, other.data) }, - } - } - } - - impl Mul for Float4 { - type Output = Float4; - - #[inline(always)] - fn mul(self, other: f32) -> Float4 { - self * Float4::splat(other) - } - } - - impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: Float4) { - *self = *self * rhs; - } - } - - impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: f32) { - *self = *self * rhs; - } - } - - impl Div for Float4 { - type Output = Float4; - - #[inline(always)] - fn div(self, other: Float4) -> Float4 { - use std::arch::x86_64::_mm_div_ps; - Float4 { - data: unsafe { _mm_div_ps(self.data, other.data) }, - } - } - } - - impl Div for Float4 { - type Output = Float4; - - #[inline(always)] - fn div(self, other: f32) -> Float4 { - self / Float4::splat(other) - } - } - - impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: Float4) { - *self = *self / rhs; - } - } - - impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: f32) { - *self = *self / rhs; - } - } - - // Free functions for Float4 - - #[inline(always)] - pub fn v_min(a: Float4, b: Float4) -> Float4 { - a.v_min(b) - } - - #[inline(always)] - pub fn v_max(a: Float4, b: Float4) -> Float4 { - a.v_max(b) - } - - /// Transposes a 4x4 matrix in-place. - #[inline(always)] - pub fn transpose(matrix: &mut [Float4; 4]) { - use std::arch::x86_64::_MM_TRANSPOSE4_PS; - - // The weird &mut/*mut gymnastics below are to get around - // the borrow-checker. We know statically that these references - // are non-overlapping, so it's safe. - unsafe { - _MM_TRANSPOSE4_PS( - &mut *(&mut matrix[0].data as *mut __m128), - &mut *(&mut matrix[1].data as *mut __m128), - &mut *(&mut matrix[2].data as *mut __m128), - &mut *(&mut matrix[3].data as *mut __m128), - ) - }; - } - - /// Inverts a 4x4 matrix and returns the determinate. - #[inline(always)] - pub fn invert(matrix: &mut [Float4; 4]) -> f32 { - // Code pulled from "Streaming SIMD Extensions - Inverse of 4x4 Matrix" - // by Intel. - // ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf - // Ported to Rust. - - // TODO: once __m64 and accompanying intrinsics are stabilized, switch - // to using those, commented out in the code below. - use std::arch::x86_64::{ - _mm_add_ps, - _mm_add_ss, - _mm_cvtss_f32, - _mm_mul_ps, - _mm_mul_ss, - _mm_rcp_ss, - // _mm_loadh_pi, - // _mm_loadl_pi, - // _mm_storeh_pi, - // _mm_storel_pi, - _mm_set_ps, - _mm_shuffle_ps, - _mm_sub_ps, - _mm_sub_ss, - }; - use std::mem::transmute; - - let mut minor0: __m128; - let mut minor1: __m128; - let mut minor2: __m128; - let mut minor3: __m128; - let row0: __m128; - let mut row1: __m128; - let mut row2: __m128; - let mut row3: __m128; - let mut det: __m128; - let mut tmp1: __m128; - - unsafe { - // tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4)); - tmp1 = _mm_set_ps( - matrix[1].get_1(), - matrix[1].get_0(), - matrix[0].get_1(), - matrix[0].get_0(), - ); - - // row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12)); - row1 = _mm_set_ps( - matrix[3].get_1(), - matrix[3].get_0(), - matrix[2].get_1(), - matrix[2].get_0(), - ); - - row0 = _mm_shuffle_ps(tmp1, row1, 0x88); - row1 = _mm_shuffle_ps(row1, tmp1, 0xDD); - - // tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6)); - tmp1 = _mm_set_ps( - matrix[1].get_3(), - matrix[1].get_2(), - matrix[0].get_3(), - matrix[0].get_2(), - ); - - // row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14)); - row3 = _mm_set_ps( - matrix[3].get_3(), - matrix[3].get_2(), - matrix[2].get_3(), - matrix[2].get_2(), - ); - - row2 = _mm_shuffle_ps(tmp1, row3, 0x88); - row3 = _mm_shuffle_ps(row3, tmp1, 0xDD); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(row2, row3); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - minor0 = _mm_mul_ps(row1, tmp1); - minor1 = _mm_mul_ps(row0, tmp1); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); - minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); - minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(row1, row2); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); - minor3 = _mm_mul_ps(row0, tmp1); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); - minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); - minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - row2 = _mm_shuffle_ps(row2, row2, 0x4E); - minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); - minor2 = _mm_mul_ps(row0, tmp1); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); - minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); - minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(row0, row1); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); - minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); - minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(row0, row3); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); - minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); - minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); - // ----------------------------------------------- - tmp1 = _mm_mul_ps(row0, row2); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); - minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); - minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); - tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); - minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); - minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); - // ----------------------------------------------- - det = _mm_mul_ps(row0, minor0); - det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); - det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); - tmp1 = _mm_rcp_ss(det); - det = _mm_sub_ss( - _mm_add_ss(tmp1, tmp1), - _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)), - ); - det = _mm_shuffle_ps(det, det, 0x00); - - minor0 = _mm_mul_ps(det, minor0); - - // _mm_storel_pi((__m64*)(src), minor0); - // _mm_storeh_pi((__m64*)(src+2), minor0); - let minor0 = transmute::<__m128, [f32; 4]>(minor0); - matrix[0].data = _mm_set_ps(minor0[3], minor0[2], minor0[1], minor0[0]); - - minor1 = _mm_mul_ps(det, minor1); - - // _mm_storel_pi((__m64*)(src+4), minor1); - // _mm_storeh_pi((__m64*)(src+6), minor1); - let minor1 = transmute::<__m128, [f32; 4]>(minor1); - matrix[1].data = _mm_set_ps(minor1[3], minor1[2], minor1[1], minor1[0]); - - minor2 = _mm_mul_ps(det, minor2); - - // _mm_storel_pi((__m64*)(src+ 8), minor2); - // _mm_storeh_pi((__m64*)(src+10), minor2); - let minor2 = transmute::<__m128, [f32; 4]>(minor2); - matrix[2].data = _mm_set_ps(minor2[3], minor2[2], minor2[1], minor2[0]); - - minor3 = _mm_mul_ps(det, minor3); - - // _mm_storel_pi((__m64*)(src+12), minor3); - // _mm_storeh_pi((__m64*)(src+14), minor3); - let minor3 = transmute::<__m128, [f32; 4]>(minor3); - matrix[3].data = _mm_set_ps(minor3[3], minor3[2], minor3[1], minor3[0]); - - _mm_cvtss_f32(det) - } - } - - /// Essentially a tuple of four bools, which will use SIMD operations - /// where possible on a platform. - #[derive(Debug, Copy, Clone)] - pub struct Bool4 { - data: __m128, - } - - impl Bool4 { - #[inline(always)] - pub fn new(a: bool, b: bool, c: bool, d: bool) -> Bool4 { - use std::arch::x86_64::_mm_set_ps; - Bool4 { - data: unsafe { - _mm_set_ps( - if d { 1.0 } else { 0.0 }, - if c { 1.0 } else { 0.0 }, - if b { 1.0 } else { 0.0 }, - if a { 1.0 } else { 0.0 }, - ) - }, - } - } - - #[inline(always)] - pub fn new_false() -> Bool4 { - use std::arch::x86_64::_mm_set1_ps; - Bool4 { - data: unsafe { _mm_set1_ps(0.0) }, - } - } - - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(&self, n: usize) -> bool { - assert!( - n <= 3, - "Attempted to access element of Bool4 outside of bounds." - ); - - 0 != unsafe { *(&self.data as *const std::arch::x86_64::__m128 as *const u32).add(n) } - } - - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(&self) -> bool { - self.get_n(0) - } - - /// Returns the value of the 1st element. - #[inline(always)] - pub fn get_1(&self) -> bool { - self.get_n(1) - } - - /// Returns the value of the 2nd element. - #[inline(always)] - pub fn get_2(&self) -> bool { - self.get_n(2) - } - - /// Returns the value of the 3rd element. - #[inline(always)] - pub fn get_3(&self) -> bool { - self.get_n(3) - } - - /// Returns whether all four bools are false. - /// - /// This is the `NOT` operation on the result of `OR`ing all the - /// contained bools. If even one bool is true, this returns false. - #[inline(always)] - pub fn is_all_false(&self) -> bool { - let a = unsafe { *(&self.data as *const __m128 as *const u128) }; - a == 0 - } - - #[inline] - pub fn to_bitmask(&self) -> u8 { - let a = unsafe { *(&self.data as *const __m128 as *const u8).offset(0) }; - let b = unsafe { *(&self.data as *const __m128 as *const u8).offset(4) }; - let c = unsafe { *(&self.data as *const __m128 as *const u8).offset(8) }; - let d = unsafe { *(&self.data as *const __m128 as *const u8).offset(12) }; - (a & 0b0000_0001) | (b & 0b0000_0010) | (c & 0b0000_0100) | (d & 0b0000_1000) - } - } - - impl BitAnd for Bool4 { - type Output = Bool4; - - #[inline(always)] - fn bitand(self, rhs: Bool4) -> Bool4 { - use std::arch::x86_64::_mm_and_ps; - Bool4 { - data: unsafe { _mm_and_ps(self.data, rhs.data) }, - } - } - } - - impl BitOr for Bool4 { - type Output = Bool4; - - #[inline(always)] - fn bitor(self, rhs: Bool4) -> Bool4 { - use std::arch::x86_64::_mm_or_ps; - Bool4 { - data: unsafe { _mm_or_ps(self.data, rhs.data) }, - } - } - } -} - -//=========================================================================== - -/// Implementation fo Float4 for any platform, foregoing any -/// platform-specific optimizations. -mod fallback { - use std::{ - cmp::PartialEq, - ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}, - }; - - #[derive(Debug, Copy, Clone)] - pub struct Float4 { - data: [f32; 4], - } - - impl Float4 { - #[inline(always)] - pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { - Float4 { data: [a, b, c, d] } - } - - #[inline(always)] - pub fn splat(n: f32) -> Float4 { - Float4 { data: [n, n, n, n] } - } - - #[inline] - pub fn h_sum(&self) -> f32 { - (self.get_0() + self.get_1()) + (self.get_2() + self.get_3()) - } - - #[inline] - pub fn h_product(&self) -> f32 { - (self.get_0() * self.get_1()) * (self.get_2() * self.get_3()) - } - - #[inline] - pub fn h_min(&self) -> f32 { - let n1 = if self.get_0() < self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() < self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 < n2 { - n1 - } else { - n2 - } - } - - #[inline] - pub fn h_max(&self) -> f32 { - let n1 = if self.get_0() > self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() > self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 > n2 { - n1 - } else { - n2 - } - } - - #[inline(always)] - pub fn v_min(&self, other: Float4) -> Float4 { - Float4::new( - if self.get_0() < other.get_0() { - self.get_0() - } else { - other.get_0() - }, - if self.get_1() < other.get_1() { - self.get_1() - } else { - other.get_1() - }, - if self.get_2() < other.get_2() { - self.get_2() - } else { - other.get_2() - }, - if self.get_3() < other.get_3() { - self.get_3() - } else { - other.get_3() - }, - ) - } - - #[inline(always)] - pub fn v_max(&self, other: Float4) -> Float4 { - Float4::new( - if self.get_0() > other.get_0() { - self.get_0() - } else { - other.get_0() - }, - if self.get_1() > other.get_1() { - self.get_1() - } else { - other.get_1() - }, - if self.get_2() > other.get_2() { - self.get_2() - } else { - other.get_2() - }, - if self.get_3() > other.get_3() { - self.get_3() - } else { - other.get_3() - }, - ) - } - - #[inline(always)] - pub fn lt(&self, other: Float4) -> Bool4 { - Bool4 { - data: [ - self.data[0] < other.data[0], - self.data[1] < other.data[1], - self.data[2] < other.data[2], - self.data[3] < other.data[3], - ], - } - } - - #[inline(always)] - pub fn lte(&self, other: Float4) -> Bool4 { - Bool4 { - data: [ - self.data[0] <= other.data[0], - self.data[1] <= other.data[1], - self.data[2] <= other.data[2], - self.data[3] <= other.data[3], - ], - } - } - - #[inline(always)] - pub fn gt(&self, other: Float4) -> Bool4 { - Bool4 { - data: [ - self.data[0] > other.data[0], - self.data[1] > other.data[1], - self.data[2] > other.data[2], - self.data[3] > other.data[3], - ], - } - } - - #[inline(always)] - pub fn gte(&self, other: Float4) -> Bool4 { - Bool4 { - data: [ - self.data[0] >= other.data[0], - self.data[1] >= other.data[1], - self.data[2] >= other.data[2], - self.data[3] >= other.data[3], - ], - } - } - - /// Set the nth element to the given value. - #[inline(always)] - pub fn set_n(&mut self, n: usize, v: f32) { - assert!( - n <= 3, - "Attempted to set element of Float4 outside of bounds." - ); - unsafe { - *self.data.get_unchecked_mut(n) = v; - } - } - - /// Set the 0th element to the given value. - #[inline(always)] - pub fn set_0(&mut self, v: f32) { - self.set_n(0, v); - } - - /// Set the 1th element to the given value. - #[inline(always)] - pub fn set_1(&mut self, v: f32) { - self.set_n(1, v); - } - - /// Set the 2th element to the given value. - #[inline(always)] - pub fn set_2(&mut self, v: f32) { - self.set_n(2, v); - } - - /// Set the 3th element to the given value. - #[inline(always)] - pub fn set_3(&mut self, v: f32) { - self.set_n(3, v); - } - - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(&self, n: usize) -> f32 { - assert!( - n <= 3, - "Attempted to access element of Float4 outside of bounds." - ); - unsafe { *self.data.get_unchecked(n) } - } - - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(&self) -> f32 { - self.get_n(0) - } - - /// Returns the value of the 1th element. - #[inline(always)] - pub fn get_1(&self) -> f32 { - self.get_n(1) - } - - /// Returns the value of the 2th element. - #[inline(always)] - pub fn get_2(&self) -> f32 { - self.get_n(2) - } - - /// Returns the value of the 3th element. - #[inline(always)] - pub fn get_3(&self) -> f32 { - self.get_n(3) - } - - /// Returns a Float4 with all elements set to the value - /// of element 0. - #[inline(always)] - pub fn all_0(&self) -> Float4 { - Float4 { - data: [self.data[0], self.data[0], self.data[0], self.data[0]], - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 1. - #[inline(always)] - pub fn all_1(&self) -> Float4 { - Float4 { - data: [self.data[1], self.data[1], self.data[1], self.data[1]], - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 2. - #[inline(always)] - pub fn all_2(&self) -> Float4 { - Float4 { - data: [self.data[2], self.data[2], self.data[2], self.data[2]], - } - } - - /// Returns a Float4 with all elements set to the value - /// of element 3. - #[inline(always)] - pub fn all_3(&self) -> Float4 { - Float4 { - data: [self.data[3], self.data[3], self.data[3], self.data[3]], - } - } - - /// Returns the square roots of all elements. - #[inline(always)] - pub fn sqrt(&self) -> Float4 { - Float4::new( - self.get_0().sqrt(), - self.get_1().sqrt(), - self.get_2().sqrt(), - self.get_3().sqrt(), - ) - } - - /// Performs a fused multiply add. - /// - /// i.e. self * b + c - #[inline(always)] - pub fn fmadd(&self, b: Float4, c: Float4) -> Float4 { - (*self * b) + c - } - } - - impl PartialEq for Float4 { - #[inline] - fn eq(&self, other: &Float4) -> bool { - self.get_0() == other.get_0() - && self.get_1() == other.get_1() - && self.get_2() == other.get_2() - && self.get_3() == other.get_3() - } - } - - impl Add for Float4 { - type Output = Float4; - - #[inline(always)] - fn add(self, other: Float4) -> Float4 { - Float4 { - data: [ - self.get_0() + other.get_0(), - self.get_1() + other.get_1(), - self.get_2() + other.get_2(), - self.get_3() + other.get_3(), - ], - } - } - } - - impl AddAssign for Float4 { - #[inline(always)] - fn add_assign(&mut self, rhs: Float4) { - *self = *self + rhs; - } - } - - impl Sub for Float4 { - type Output = Float4; - - #[inline(always)] - fn sub(self, other: Float4) -> Float4 { - Float4 { - data: [ - self.get_0() - other.get_0(), - self.get_1() - other.get_1(), - self.get_2() - other.get_2(), - self.get_3() - other.get_3(), - ], - } - } - } - - impl SubAssign for Float4 { - #[inline(always)] - fn sub_assign(&mut self, rhs: Float4) { - *self = *self - rhs; - } - } - - impl Mul for Float4 { - type Output = Float4; - - #[inline(always)] - fn mul(self, other: Float4) -> Float4 { - Float4 { - data: [ - self.get_0() * other.get_0(), - self.get_1() * other.get_1(), - self.get_2() * other.get_2(), - self.get_3() * other.get_3(), - ], - } - } - } - - impl Mul for Float4 { - type Output = Float4; - - #[inline(always)] - fn mul(self, other: f32) -> Float4 { - Float4 { - data: [ - self.get_0() * other, - self.get_1() * other, - self.get_2() * other, - self.get_3() * other, - ], - } - } - } - - impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: Float4) { - *self = *self * rhs; - } - } - - impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: f32) { - *self = *self * rhs; - } - } - - impl Div for Float4 { - type Output = Float4; - - #[inline(always)] - fn div(self, other: Float4) -> Float4 { - Float4 { - data: [ - self.get_0() / other.get_0(), - self.get_1() / other.get_1(), - self.get_2() / other.get_2(), - self.get_3() / other.get_3(), - ], - } - } - } - - impl Div for Float4 { - type Output = Float4; - - #[inline(always)] - fn div(self, other: f32) -> Float4 { - Float4 { - data: [ - self.get_0() / other, - self.get_1() / other, - self.get_2() / other, - self.get_3() / other, - ], - } - } - } - - impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: Float4) { - *self = *self / rhs; - } - } - - impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: f32) { - *self = *self / rhs; - } - } - - // Free functions for Float4 - #[inline(always)] - pub fn v_min(a: Float4, b: Float4) -> Float4 { - a.v_min(b) - } - - #[inline(always)] - pub fn v_max(a: Float4, b: Float4) -> Float4 { - a.v_max(b) - } - - /// Transposes a 4x4 matrix in-place - #[inline(always)] - pub fn transpose(matrix: &mut [Float4; 4]) { - let m = [ - Float4::new( - matrix[0].get_0(), - matrix[1].get_0(), - matrix[2].get_0(), - matrix[3].get_0(), - ), - Float4::new( - matrix[0].get_1(), - matrix[1].get_1(), - matrix[2].get_1(), - matrix[3].get_1(), - ), - Float4::new( - matrix[0].get_2(), - matrix[1].get_2(), - matrix[2].get_2(), - matrix[3].get_2(), - ), - Float4::new( - matrix[0].get_3(), - matrix[1].get_3(), - matrix[2].get_3(), - matrix[3].get_3(), - ), - ]; - - *matrix = m; - } - - /// Inverts a 4x4 matrix and returns the determinate. - #[inline(always)] - pub fn invert(matrix: &mut [Float4; 4]) -> f32 { - let m = *matrix; - - let s0 = (m[0].get_0() * m[1].get_1()) - (m[1].get_0() * m[0].get_1()); - let s1 = (m[0].get_0() * m[1].get_2()) - (m[1].get_0() * m[0].get_2()); - let s2 = (m[0].get_0() * m[1].get_3()) - (m[1].get_0() * m[0].get_3()); - let s3 = (m[0].get_1() * m[1].get_2()) - (m[1].get_1() * m[0].get_2()); - let s4 = (m[0].get_1() * m[1].get_3()) - (m[1].get_1() * m[0].get_3()); - let s5 = (m[0].get_2() * m[1].get_3()) - (m[1].get_2() * m[0].get_3()); - - let c5 = (m[2].get_2() * m[3].get_3()) - (m[3].get_2() * m[2].get_3()); - let c4 = (m[2].get_1() * m[3].get_3()) - (m[3].get_1() * m[2].get_3()); - let c3 = (m[2].get_1() * m[3].get_2()) - (m[3].get_1() * m[2].get_2()); - let c2 = (m[2].get_0() * m[3].get_3()) - (m[3].get_0() * m[2].get_3()); - let c1 = (m[2].get_0() * m[3].get_2()) - (m[3].get_0() * m[2].get_2()); - let c0 = (m[2].get_0() * m[3].get_1()) - (m[3].get_0() * m[2].get_1()); - - // We don't check for 0.0 determinant, as that is expected to be handled - // by the calling code. - let det = (s0 * c5) - (s1 * c4) + (s2 * c3) + (s3 * c2) - (s4 * c1) + (s5 * c0); - let invdet = 1.0 / det; - - *matrix = [ - Float4::new( - ((m[1].get_1() * c5) - (m[1].get_2() * c4) + (m[1].get_3() * c3)) * invdet, - ((-m[0].get_1() * c5) + (m[0].get_2() * c4) - (m[0].get_3() * c3)) * invdet, - ((m[3].get_1() * s5) - (m[3].get_2() * s4) + (m[3].get_3() * s3)) * invdet, - ((-m[2].get_1() * s5) + (m[2].get_2() * s4) - (m[2].get_3() * s3)) * invdet, - ), - Float4::new( - ((-m[1].get_0() * c5) + (m[1].get_2() * c2) - (m[1].get_3() * c1)) * invdet, - ((m[0].get_0() * c5) - (m[0].get_2() * c2) + (m[0].get_3() * c1)) * invdet, - ((-m[3].get_0() * s5) + (m[3].get_2() * s2) - (m[3].get_3() * s1)) * invdet, - ((m[2].get_0() * s5) - (m[2].get_2() * s2) + (m[2].get_3() * s1)) * invdet, - ), - Float4::new( - ((m[1].get_0() * c4) - (m[1].get_1() * c2) + (m[1].get_3() * c0)) * invdet, - ((-m[0].get_0() * c4) + (m[0].get_1() * c2) - (m[0].get_3() * c0)) * invdet, - ((m[3].get_0() * s4) - (m[3].get_1() * s2) + (m[3].get_3() * s0)) * invdet, - ((-m[2].get_0() * s4) + (m[2].get_1() * s2) - (m[2].get_3() * s0)) * invdet, - ), - Float4::new( - ((-m[1].get_0() * c3) + (m[1].get_1() * c1) - (m[1].get_2() * c0)) * invdet, - ((m[0].get_0() * c3) - (m[0].get_1() * c1) + (m[0].get_2() * c0)) * invdet, - ((-m[3].get_0() * s3) + (m[3].get_1() * s1) - (m[3].get_2() * s0)) * invdet, - ((m[2].get_0() * s3) - (m[2].get_1() * s1) + (m[2].get_2() * s0)) * invdet, - ), - ]; - - det - } - - /// Essentially a tuple of four bools. - #[derive(Debug, Copy, Clone)] - pub struct Bool4 { - data: [bool; 4], - } - - impl Bool4 { - #[inline(always)] - pub fn new(a: bool, b: bool, c: bool, d: bool) -> Bool4 { - Bool4 { data: [a, b, c, d] } - } - - #[inline(always)] - pub fn new_false() -> Bool4 { - Bool4 { - data: [false, false, false, false], - } - } - - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(self, n: usize) -> bool { - assert!( - n <= 3, - "Attempted to access element of Bool4 outside of bounds." - ); - unsafe { *self.data.get_unchecked(n) } - } - - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(self) -> bool { - self.get_n(0) - } - - /// Returns the value of the 1th element. - #[inline(always)] - pub fn get_1(self) -> bool { - self.get_n(1) - } - - /// Returns the value of the 2th element. - #[inline(always)] - pub fn get_2(self) -> bool { - self.get_n(2) - } - - /// Returns the value of the 3th element. - #[inline(always)] - pub fn get_3(self) -> bool { - self.get_n(3) - } - - /// Returns whether all four bools are false. - /// - /// This is the `NOT` operation on the result of `OR`ing all the - /// contained bools. If even one bool is true, this returns false. - #[inline(always)] - pub fn is_all_false(&self) -> bool { - !(self.data[0] | self.data[1] | self.data[2] | self.data[3]) - } - - #[inline] - pub fn to_bitmask(self) -> u8 { - (self.get_0() as u8) - | ((self.get_1() as u8) << 1) - | ((self.get_2() as u8) << 2) - | ((self.get_3() as u8) << 3) - } - } - - impl BitAnd for Bool4 { - type Output = Bool4; - - #[inline(always)] - fn bitand(self, rhs: Bool4) -> Bool4 { - Bool4 { - data: [ - self.data[0] && rhs.data[0], - self.data[1] && rhs.data[1], - self.data[2] && rhs.data[2], - self.data[3] && rhs.data[3], - ], - } - } - } - - impl BitOr for Bool4 { - type Output = Bool4; - - #[inline(always)] - fn bitor(self, rhs: Bool4) -> Bool4 { - Bool4 { - data: [ - self.data[0] || rhs.data[0], - self.data[1] || rhs.data[1], - self.data[2] || rhs.data[2], - self.data[3] || rhs.data[3], - ], - } - } - } -} - -//=========================================================================== - -#[cfg(all(target_arch = "x86_64", target_feature = "sse"))] -pub use crate::x86_64_sse::{invert, transpose, v_max, v_min, Bool4, Float4}; - -#[cfg(not(all(target_arch = "x86_64", target_feature = "sse")))] -pub use fallback::{invert, transpose, v_max, v_min, Bool4, Float4}; - -//=========================================================================== - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn get() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - - assert_eq!(f.get_0(), 1.0); - assert_eq!(f.get_1(), 2.0); - assert_eq!(f.get_2(), 3.0); - assert_eq!(f.get_3(), 4.0); - } - - #[test] - fn get_n() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - - assert_eq!(f.get_n(0), 1.0); - assert_eq!(f.get_n(1), 2.0); - assert_eq!(f.get_n(2), 3.0); - assert_eq!(f.get_n(3), 4.0); - } - - #[test] - fn set() { - let mut f = Float4::new(1.0, 2.0, 3.0, 4.0); - f.set_0(5.0); - f.set_1(6.0); - f.set_2(7.0); - f.set_3(8.0); - - assert_eq!(f.get_0(), 5.0); - assert_eq!(f.get_1(), 6.0); - assert_eq!(f.get_2(), 7.0); - assert_eq!(f.get_3(), 8.0); - } - - #[test] - fn set_n() { - let mut f = Float4::new(1.0, 2.0, 3.0, 4.0); - f.set_n(0, 5.0); - f.set_n(1, 6.0); - f.set_n(2, 7.0); - f.set_n(3, 8.0); - - assert_eq!(f.get_0(), 5.0); - assert_eq!(f.get_1(), 6.0); - assert_eq!(f.get_2(), 7.0); - assert_eq!(f.get_3(), 8.0); - } - - #[test] - fn all() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - - assert_eq!(f.all_0(), Float4::splat(1.0)); - assert_eq!(f.all_1(), Float4::splat(2.0)); - assert_eq!(f.all_2(), Float4::splat(3.0)); - assert_eq!(f.all_3(), Float4::splat(4.0)); - } - - #[test] - fn partial_eq_1() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(1.0, 2.0, 3.0, 4.0); - - assert!(f1 == f2); - } - - #[test] - fn partial_eq_2() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(1.0, 2.1, 3.0, 4.0); - - assert!(!(f1 == f2)); - } - - #[test] - fn h_sum() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - assert_eq!(f.h_sum(), 10.0); - } - - #[test] - fn h_product() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - assert_eq!(f.h_product(), 24.0); - } - - #[test] - fn h_min() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - assert_eq!(f.h_min(), 1.0); - } - - #[test] - fn h_max() { - let f = Float4::new(1.0, 2.0, 3.0, 4.0); - assert_eq!(f.h_max(), 4.0); - } - - #[test] - fn add() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(2.0, 3.0, 4.0, 5.0); - let f3 = Float4::new(3.0, 5.0, 7.0, 9.0); - - assert_eq!(f1 + f2, f3); - } - - #[test] - fn sub() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(2.0, 3.0, 4.0, 5.0); - let f3 = Float4::new(-1.0, -1.0, -1.0, -1.0); - - assert_eq!(f1 - f2, f3); - } - - #[test] - fn mul_component() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(2.0, 3.0, 4.0, 5.0); - let f3 = Float4::new(2.0, 6.0, 12.0, 20.0); - - assert_eq!(f1 * f2, f3); - } - - #[test] - fn mul_scalar() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let v = 3.0; - let f2 = Float4::new(3.0, 6.0, 9.0, 12.0); - - assert_eq!(f1 * v, f2); - } - - #[test] - fn div_component() { - let f1 = Float4::new(1.0, 3.0, 3.0, 6.0); - let f2 = Float4::new(2.0, 2.0, 4.0, 8.0); - let f3 = Float4::new(0.5, 1.5, 0.75, 0.75); - - assert_eq!(f1 / f2, f3); - } - - #[test] - fn div_scalar() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let v = 2.0; - let f2 = Float4::new(0.5, 1.0, 1.5, 2.0); - - assert_eq!(f1 / v, f2); - } - - #[test] - fn lt() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(0.5, 2.0, 3.5, 2.0); - - let r = f1.lt(f2); - - assert_eq!(r.get_0(), false); - assert_eq!(r.get_1(), false); - assert_eq!(r.get_2(), true); - assert_eq!(r.get_3(), false); - } - - #[test] - fn gt() { - let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); - let f2 = Float4::new(0.5, 2.0, 3.5, 2.0); - - let r = f1.gt(f2); - - assert_eq!(r.get_0(), true); - assert_eq!(r.get_1(), false); - assert_eq!(r.get_2(), false); - assert_eq!(r.get_3(), true); - } - - #[test] - fn matrix_transpose() { - let mut m1 = [ - Float4::new(1.0, 2.0, 3.0, 4.0), - Float4::new(5.0, 6.0, 7.0, 8.0), - Float4::new(9.0, 10.0, 11.0, 12.0), - Float4::new(13.0, 14.0, 15.0, 16.0), - ]; - let m2 = [ - Float4::new(1.0, 5.0, 9.0, 13.0), - Float4::new(2.0, 6.0, 10.0, 14.0), - Float4::new(3.0, 7.0, 11.0, 15.0), - Float4::new(4.0, 8.0, 12.0, 16.0), - ]; - - transpose(&mut m1); - - assert_eq!(m1, m2); - } - - #[test] - fn bool4_bitmask_01() { - let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); - let f2 = Float4::new(-1.0, -1.0, 1.0, -1.0); - let r = f1.lt(f2).to_bitmask(); - - assert_eq!(r, 0b00000100); - } - - #[test] - fn bool4_bitmask_02() { - let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); - let f2 = Float4::new(1.0, -1.0, 1.0, -1.0); - let r = f1.lt(f2).to_bitmask(); - - assert_eq!(r, 0b00000101); - } - - #[test] - fn bool4_bitmask_03() { - let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); - let f2 = Float4::new(-1.0, 1.0, -1.0, 1.0); - let r = f1.lt(f2).to_bitmask(); - - assert_eq!(r, 0b00001010); - } - - #[test] - fn bool4_is_all_false() { - assert_eq!(true, Bool4::new(false, false, false, false).is_all_false()); - assert_eq!(false, Bool4::new(false, false, true, false).is_all_false()); - } -} diff --git a/sub_crates/math3d/Cargo.toml b/sub_crates/math3d/Cargo.toml index 5547f6b..792c5b5 100644 --- a/sub_crates/math3d/Cargo.toml +++ b/sub_crates/math3d/Cargo.toml @@ -10,5 +10,6 @@ name = "math3d" path = "src/lib.rs" # Local crate dependencies -[dependencies.float4] -path = "../float4" \ No newline at end of file +[dependencies] +glam = {git="https://github.com/bitshifter/glam-rs.git", rev="0f314f99", default-features=false, features=["approx"]} +approx = "0.3" diff --git a/sub_crates/math3d/src/matrix.rs b/sub_crates/math3d/src/matrix.rs index 9b80c9c..e804064 100644 --- a/sub_crates/math3d/src/matrix.rs +++ b/sub_crates/math3d/src/matrix.rs @@ -1,29 +1,21 @@ #![allow(dead_code)] -use std::ops::{Index, IndexMut, Mul}; +use std::ops::{Add, Mul}; -use float4::{invert, transpose, Float4}; +use approx::RelativeEq; +use glam::{Mat4, Vec4}; use super::Point; /// A 4x4 matrix, used for transforms -#[derive(Debug, Copy, Clone)] -pub struct Matrix4x4 { - pub values: [Float4; 4], -} +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Matrix4x4(pub Mat4); impl Matrix4x4 { /// Creates a new identity matrix #[inline] pub fn new() -> Matrix4x4 { - Matrix4x4 { - values: [ - Float4::new(1.0, 0.0, 0.0, 0.0), - Float4::new(0.0, 1.0, 0.0, 0.0), - Float4::new(0.0, 0.0, 1.0, 0.0), - Float4::new(0.0, 0.0, 0.0, 1.0), - ], - } + Matrix4x4(Mat4::identity()) } /// Creates a new matrix with the specified values: @@ -52,108 +44,37 @@ impl Matrix4x4 { o: f32, p: f32, ) -> Matrix4x4 { - Matrix4x4 { - values: [ - Float4::new(a, b, c, d), - Float4::new(e, f, g, h), - Float4::new(i, j, k, l), - Float4::new(m, n, o, p), - ], - } + Matrix4x4(Mat4::new( + Vec4::new(a, e, i, m), + Vec4::new(b, f, j, n), + Vec4::new(c, g, k, o), + Vec4::new(d, h, l, p), + )) } #[inline] pub fn from_location(loc: Point) -> Matrix4x4 { - Matrix4x4 { - values: [ - Float4::new(1.0, 0.0, 0.0, loc.x()), - Float4::new(0.0, 1.0, 0.0, loc.y()), - Float4::new(0.0, 0.0, 1.0, loc.z()), - Float4::new(0.0, 0.0, 0.0, 1.0), - ], - } + Matrix4x4(Mat4::from_translation(loc.co.truncate())) } /// Returns whether the matrices are approximately equal to each other. - /// Each corresponding element in the matrices cannot have a relative error - /// exceeding `epsilon`. + /// Each corresponding element in the matrices cannot have a relative + /// error exceeding epsilon. #[inline] pub fn aprx_eq(&self, other: Matrix4x4, epsilon: f32) -> bool { - let mut result = true; - - for y in 0..4 { - for x in 0..4 { - // All of this stuff is just an approximate comparison - // of floating point numbers. See: - // http://floating-point-gui.de/errors/comparison/ - // It might be worth breaking this out into a separate funcion, - // but I'm not entirely sure where to put it. - let a = self[y].get_n(x); - let b = other[y].get_n(x); - let aabs = a.abs(); - let babs = b.abs(); - let diff = (a - b).abs(); - if a == b { - } else if (aabs <= std::f32::EPSILON) || (babs <= std::f32::EPSILON) { - result = result && (diff < std::f32::EPSILON); - } else { - let rel = 2.0 * diff / (aabs + babs); - println!("{}", rel); - result = result && (rel < epsilon); - } - } - } - - result + self.0.relative_eq(&other.0, std::f32::EPSILON, epsilon) } /// Returns the transpose of the matrix #[inline] pub fn transposed(&self) -> Matrix4x4 { - let mut m = *self; - transpose(&mut m.values); - m + Matrix4x4(self.0.transpose()) } /// Returns the inverse of the Matrix #[inline] - #[allow(clippy::float_cmp)] pub fn inverse(&self) -> Matrix4x4 { - let mut m = *self; - let det = invert(&mut m.values); - debug_assert_ne!(det, 0.0); - m - } -} - -impl Index for Matrix4x4 { - type Output = Float4; - - #[inline(always)] - fn index(&self, _index: usize) -> &Float4 { - &self.values[_index] - } -} - -impl IndexMut for Matrix4x4 { - #[inline(always)] - fn index_mut(&mut self, _index: usize) -> &mut Float4 { - &mut self.values[_index] - } -} - -impl PartialEq for Matrix4x4 { - #[inline] - fn eq(&self, other: &Matrix4x4) -> bool { - let mut result = true; - - for y in 0..4 { - for x in 0..4 { - result = result && (self[y].get_n(x) == other[y].get_n(x)); - } - } - - result + Matrix4x4(self.0.inverse()) } } @@ -164,40 +85,32 @@ impl Default for Matrix4x4 { } /// Multiply two matrices together -impl Mul for Matrix4x4 { - type Output = Matrix4x4; +impl Mul for Matrix4x4 { + type Output = Self; #[inline] - fn mul(self, other: Matrix4x4) -> Matrix4x4 { - let m = self.transposed(); - Matrix4x4 { - values: [ - Float4::new( - (m[0] * other[0]).h_sum(), - (m[1] * other[0]).h_sum(), - (m[2] * other[0]).h_sum(), - (m[3] * other[0]).h_sum(), - ), - Float4::new( - (m[0] * other[1]).h_sum(), - (m[1] * other[1]).h_sum(), - (m[2] * other[1]).h_sum(), - (m[3] * other[1]).h_sum(), - ), - Float4::new( - (m[0] * other[2]).h_sum(), - (m[1] * other[2]).h_sum(), - (m[2] * other[2]).h_sum(), - (m[3] * other[2]).h_sum(), - ), - Float4::new( - (m[0] * other[3]).h_sum(), - (m[1] * other[3]).h_sum(), - (m[2] * other[3]).h_sum(), - (m[3] * other[3]).h_sum(), - ), - ], - } + fn mul(self, other: Self) -> Self { + Self(other.0.mul_mat4(&self.0)) + } +} + +/// Multiply a matrix by a f32 +impl Mul for Matrix4x4 { + type Output = Self; + + #[inline] + fn mul(self, other: f32) -> Self { + Self(self.0 * other) + } +} + +/// Add two matrices together +impl Add for Matrix4x4 { + type Output = Self; + + #[inline] + fn add(self, other: Self) -> Self { + Self(self.0 + other.0) } } @@ -218,22 +131,24 @@ mod tests { } #[test] - fn aproximate_equality_test() { + fn approximate_equality_test() { let a = Matrix4x4::new(); let b = Matrix4x4::new_from_values( - 1.001, 0.0, 0.0, 0.0, 0.0, 1.001, 0.0, 0.0, 0.0, 0.0, 1.001, 0.0, 0.0, 0.0, 0.0, 1.001, + 1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, + 0.0, 1.000001, ); let c = Matrix4x4::new_from_values( - 1.003, 0.0, 0.0, 0.0, 0.0, 1.003, 0.0, 0.0, 0.0, 0.0, 1.003, 0.0, 0.0, 0.0, 0.0, 1.003, + 1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, + 0.0, 1.000003, ); let d = Matrix4x4::new_from_values( - -1.001, 0.0, 0.0, 0.0, 0.0, -1.001, 0.0, 0.0, 0.0, 0.0, -1.001, 0.0, 0.0, 0.0, 0.0, - -1.001, + -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0, 0.0, 0.0, + 0.0, -1.000001, ); - assert!(a.aprx_eq(b, 0.002)); - assert!(!a.aprx_eq(c, 0.002)); - assert!(!a.aprx_eq(d, 0.002)); + assert!(a.aprx_eq(b, 0.000001)); + assert!(!a.aprx_eq(c, 0.000001)); + assert!(!a.aprx_eq(d, 0.000001)); } #[test] @@ -260,7 +175,7 @@ mod tests { let b = a.inverse(); let c = Matrix4x4::new(); - assert!((a * b).aprx_eq(c, 0.00001)); + assert!((dbg!(a * b)).aprx_eq(dbg!(c), 0.0000001)); } #[test] diff --git a/sub_crates/math3d/src/normal.rs b/sub_crates/math3d/src/normal.rs index e1c9067..3a2fccd 100644 --- a/sub_crates/math3d/src/normal.rs +++ b/sub_crates/math3d/src/normal.rs @@ -5,42 +5,44 @@ use std::{ ops::{Add, Div, Mul, Neg, Sub}, }; -use float4::Float4; +use glam::Vec4; use super::{CrossProduct, DotProduct, Matrix4x4, Vector}; /// A surface normal in 3d homogeneous space. #[derive(Debug, Copy, Clone)] pub struct Normal { - pub co: Float4, + pub co: Vec4, } impl Normal { #[inline(always)] pub fn new(x: f32, y: f32, z: f32) -> Normal { Normal { - co: Float4::new(x, y, z, 0.0), + co: Vec4::new(x, y, z, 0.0), } } #[inline(always)] pub fn length(&self) -> f32 { - (self.co * self.co).h_sum().sqrt() + self.co.length() } #[inline(always)] pub fn length2(&self) -> f32 { - (self.co * self.co).h_sum() + self.co.length_squared() } #[inline(always)] pub fn normalized(&self) -> Normal { - *self / self.length() + Normal { + co: self.co.normalize(), + } } #[inline(always)] pub fn into_vector(self) -> Vector { - Vector::new(self.co.get_0(), self.co.get_1(), self.co.get_2()) + Vector { co: self.co } } #[inline(always)] @@ -55,32 +57,32 @@ impl Normal { #[inline(always)] pub fn x(&self) -> f32 { - self.co.get_0() + self.co.x() } #[inline(always)] pub fn y(&self) -> f32 { - self.co.get_1() + self.co.y() } #[inline(always)] pub fn z(&self) -> f32 { - self.co.get_2() + self.co.z() } #[inline(always)] pub fn set_x(&mut self, x: f32) { - self.co.set_0(x); + self.co.set_x(x); } #[inline(always)] pub fn set_y(&mut self, y: f32) { - self.co.set_1(y); + self.co.set_y(y); } #[inline(always)] pub fn set_z(&mut self, z: f32) { - self.co.set_2(z); + self.co.set_z(z); } } @@ -129,15 +131,10 @@ impl Mul for Normal { #[inline] fn mul(self, other: Matrix4x4) -> Normal { - let mat = other.inverse().transposed(); - Normal { - co: Float4::new( - (self.co * mat.values[0]).h_sum(), - (self.co * mat.values[1]).h_sum(), - (self.co * mat.values[2]).h_sum(), - 0.0, - ), - } + let mat = other.0.inverse().transpose(); + let mut co = mat.mul_vec4(self.co); + co.set_w(0.0); + Normal { co: co } } } @@ -164,7 +161,7 @@ impl Neg for Normal { impl DotProduct for Normal { #[inline(always)] fn dot(self, other: Normal) -> f32 { - (self.co * other.co).h_sum() + self.co.dot(other.co) } } @@ -172,12 +169,7 @@ impl CrossProduct for Normal { #[inline] fn cross(self, other: Normal) -> Normal { Normal { - co: Float4::new( - (self.co.get_1() * other.co.get_2()) - (self.co.get_2() * other.co.get_1()), - (self.co.get_2() * other.co.get_0()) - (self.co.get_0() * other.co.get_2()), - (self.co.get_0() * other.co.get_1()) - (self.co.get_1() * other.co.get_0()), - 0.0, - ), + co: self.co.truncate().cross(other.co.truncate()).extend(0.0), } } } @@ -186,6 +178,7 @@ impl CrossProduct for Normal { mod tests { use super::super::{CrossProduct, DotProduct, Matrix4x4}; use super::*; + use approx::UlpsEq; #[test] fn add() { @@ -220,8 +213,10 @@ mod tests { let m = Matrix4x4::new_from_values( 1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0, 13.0, 7.0, 15.0, 3.0, ); - let nm = Normal::new(-19.258825, 5.717648, -1.770588); - assert!(((n * m) - nm).length2() < 0.00001); + let mut nm = n * m; + nm.co.set_w(0.0); + let nm2 = Normal::new(-19.258825, 5.717648, -1.770588); + assert!(nm.co.ulps_eq(&nm2.co, 0.0, 4)); } #[test] diff --git a/sub_crates/math3d/src/point.rs b/sub_crates/math3d/src/point.rs index 075fb9c..998acc9 100644 --- a/sub_crates/math3d/src/point.rs +++ b/sub_crates/math3d/src/point.rs @@ -5,21 +5,21 @@ use std::{ ops::{Add, Mul, Sub}, }; -use float4::Float4; +use glam::Vec4; use super::{Matrix4x4, Vector}; /// A position in 3d homogeneous space. #[derive(Debug, Copy, Clone)] pub struct Point { - pub co: Float4, + pub co: Vec4, } impl Point { #[inline(always)] pub fn new(x: f32, y: f32, z: f32) -> Point { Point { - co: Float4::new(x, y, z, 1.0), + co: Vec4::new(x, y, z, 1.0), } } @@ -28,7 +28,7 @@ impl Point { #[inline(always)] pub fn norm(&self) -> Point { Point { - co: self.co / self.co.get_3(), + co: self.co / self.co.w(), } } @@ -38,7 +38,7 @@ impl Point { let n2 = other.norm(); Point { - co: n1.co.v_min(n2.co), + co: n1.co.min(n2.co), } } @@ -48,13 +48,15 @@ impl Point { let n2 = other.norm(); Point { - co: n1.co.v_max(n2.co), + co: n1.co.max(n2.co), } } #[inline(always)] pub fn into_vector(self) -> Vector { - Vector::new(self.co.get_0(), self.co.get_1(), self.co.get_2()) + let mut v = Vector { co: self.co }; + v.co.set_w(0.0); + v } #[inline(always)] @@ -69,32 +71,32 @@ impl Point { #[inline(always)] pub fn x(&self) -> f32 { - self.co.get_0() + self.co.x() } #[inline(always)] pub fn y(&self) -> f32 { - self.co.get_1() + self.co.y() } #[inline(always)] pub fn z(&self) -> f32 { - self.co.get_2() + self.co.z() } #[inline(always)] pub fn set_x(&mut self, x: f32) { - self.co.set_0(x); + self.co.set_x(x); } #[inline(always)] pub fn set_y(&mut self, y: f32) { - self.co.set_1(y); + self.co.set_y(y); } #[inline(always)] pub fn set_z(&mut self, z: f32) { - self.co.set_2(z); + self.co.set_z(z); } } @@ -144,12 +146,7 @@ impl Mul for Point { #[inline] fn mul(self, other: Matrix4x4) -> Point { Point { - co: Float4::new( - (self.co * other.values[0]).h_sum(), - (self.co * other.values[1]).h_sum(), - (self.co * other.values[2]).h_sum(), - (self.co * other.values[3]).h_sum(), - ), + co: other.0.mul_vec4(self.co), } } } @@ -163,7 +160,7 @@ mod tests { fn norm() { let mut p1 = Point::new(1.0, 2.0, 3.0); let p2 = Point::new(2.0, 4.0, 6.0); - p1.co.set_3(0.5); + p1.co.set_w(0.5); assert_eq!(p2, p1.norm()); } @@ -203,7 +200,7 @@ mod tests { 1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0, 2.0, 3.0, 1.0, 5.0, ); let mut pm = Point::new(15.5, 54.0, 70.0); - pm.co.set_3(18.5); + pm.co.set_w(18.5); assert_eq!(p * m, pm); } diff --git a/sub_crates/math3d/src/vector.rs b/sub_crates/math3d/src/vector.rs index 6c6f9c0..e584a09 100644 --- a/sub_crates/math3d/src/vector.rs +++ b/sub_crates/math3d/src/vector.rs @@ -5,37 +5,39 @@ use std::{ ops::{Add, Div, Mul, Neg, Sub}, }; -use float4::Float4; +use glam::Vec4; use super::{CrossProduct, DotProduct, Matrix4x4, Normal, Point}; /// A direction vector in 3d homogeneous space. #[derive(Debug, Copy, Clone)] pub struct Vector { - pub co: Float4, + pub co: Vec4, } impl Vector { #[inline(always)] pub fn new(x: f32, y: f32, z: f32) -> Vector { Vector { - co: Float4::new(x, y, z, 0.0), + co: Vec4::new(x, y, z, 0.0), } } #[inline(always)] pub fn length(&self) -> f32 { - (self.co * self.co).h_sum().sqrt() + self.co.length() } #[inline(always)] pub fn length2(&self) -> f32 { - (self.co * self.co).h_sum() + self.co.length_squared() } #[inline(always)] pub fn normalized(&self) -> Vector { - *self / self.length() + Vector { + co: self.co.normalize(), + } } #[inline(always)] @@ -65,32 +67,32 @@ impl Vector { #[inline(always)] pub fn x(&self) -> f32 { - self.co.get_0() + self.co.x() } #[inline(always)] pub fn y(&self) -> f32 { - self.co.get_1() + self.co.y() } #[inline(always)] pub fn z(&self) -> f32 { - self.co.get_2() + self.co.z() } #[inline(always)] pub fn set_x(&mut self, x: f32) { - self.co.set_0(x); + self.co.set_x(x); } #[inline(always)] pub fn set_y(&mut self, y: f32) { - self.co.set_1(y); + self.co.set_y(y); } #[inline(always)] pub fn set_z(&mut self, z: f32) { - self.co.set_2(z); + self.co.set_z(z); } } @@ -140,12 +142,7 @@ impl Mul for Vector { #[inline] fn mul(self, other: Matrix4x4) -> Vector { Vector { - co: Float4::new( - (self.co * other.values[0]).h_sum(), - (self.co * other.values[1]).h_sum(), - (self.co * other.values[2]).h_sum(), - (self.co * other.values[3]).h_sum(), - ), + co: other.0.mul_vec4(self.co), } } } @@ -173,7 +170,7 @@ impl Neg for Vector { impl DotProduct for Vector { #[inline(always)] fn dot(self, other: Vector) -> f32 { - (self.co * other.co).h_sum() + self.co.dot(other.co) } } @@ -181,12 +178,7 @@ impl CrossProduct for Vector { #[inline] fn cross(self, other: Vector) -> Vector { Vector { - co: Float4::new( - (self.co.get_1() * other.co.get_2()) - (self.co.get_2() * other.co.get_1()), - (self.co.get_2() * other.co.get_0()) - (self.co.get_0() * other.co.get_2()), - (self.co.get_0() * other.co.get_1()) - (self.co.get_1() * other.co.get_0()), - 0.0, - ), + co: self.co.truncate().cross(other.co.truncate()).extend(0.0), } } } @@ -230,7 +222,7 @@ mod tests { 1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0, 13.0, 7.0, 15.0, 3.0, ); let mut vm = Vector::new(14.0, 46.0, 58.0); - vm.co.set_3(90.5); + vm.co.set_w(90.5); assert_eq!(v * m, vm); } diff --git a/sub_crates/spectral_upsampling/Cargo.toml b/sub_crates/spectral_upsampling/Cargo.toml index 3dfa2d0..a9bf965 100644 --- a/sub_crates/spectral_upsampling/Cargo.toml +++ b/sub_crates/spectral_upsampling/Cargo.toml @@ -9,6 +9,5 @@ license = "MIT" name = "spectral_upsampling" path = "src/lib.rs" -# Local crate dependencies -[dependencies.float4] -path = "../float4" \ No newline at end of file +[dependencies] +glam = {git="https://github.com/bitshifter/glam-rs.git", rev="0f314f99", default-features=false, features=["approx"]} \ No newline at end of file diff --git a/sub_crates/spectral_upsampling/src/jakob.rs b/sub_crates/spectral_upsampling/src/jakob.rs index 76b41d7..8a15156 100644 --- a/sub_crates/spectral_upsampling/src/jakob.rs +++ b/sub_crates/spectral_upsampling/src/jakob.rs @@ -6,7 +6,7 @@ /// The provides similar color matching as full Jakob, at the expense of /// somewhat lower quality spectrums, and the inability to precalculate /// the coefficents for even more efficient evaluation later on. -use float4::Float4; +use glam::Vec4; /// How many polynomial coefficients? const RGB2SPEC_N_COEFFS: usize = 3; @@ -15,7 +15,7 @@ const RGB2SPEC_N_COEFFS: usize = 3; include!(concat!(env!("OUT_DIR"), "/jakob_table_inc.rs")); #[inline] -pub fn rec709_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { +pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { small_rgb_to_spectrum_p4( REC709_TABLE, REC709_TABLE_RES, @@ -26,7 +26,7 @@ pub fn rec709_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { } #[inline] -pub fn rec2020_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { +pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { small_rgb_to_spectrum_p4( REC2020_TABLE, REC2020_TABLE_RES, @@ -37,7 +37,7 @@ pub fn rec2020_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { } #[inline] -pub fn aces_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 { +pub fn aces_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 { small_rgb_to_spectrum_p4( ACES_TABLE, ACES_TABLE_RES, @@ -55,9 +55,9 @@ fn small_rgb_to_spectrum_p4( table: &[[(f32, f32, f32); 2]], table_res: usize, table_mid_value: f32, - lambdas: Float4, + lambdas: Vec4, rgb: (f32, f32, f32), -) -> Float4 { +) -> Vec4 { // Determine largest RGB component, and calculate the other two // components scaled for lookups. let (i, max_val, x, y) = if rgb.0 > rgb.1 && rgb.0 > rgb.2 { @@ -70,7 +70,7 @@ fn small_rgb_to_spectrum_p4( if max_val == 0.0 { // If max_val is zero, just return zero. This avoids NaN's from // divide by zero. This is also correct, since it's black. - return Float4::splat(0.0); + return Vec4::splat(0.0); } let x = x * 63.0 / max_val; let y = y * 63.0 / max_val; @@ -90,20 +90,20 @@ fn small_rgb_to_spectrum_p4( // Convert to SIMD format for faster interpolation. let a0 = [ - Float4::new(a0[0].0, a0[0].1, a0[0].2, 0.0), - Float4::new(a0[1].0, a0[1].1, a0[1].2, 0.0), + Vec4::new(a0[0].0, a0[0].1, a0[0].2, 0.0), + Vec4::new(a0[1].0, a0[1].1, a0[1].2, 0.0), ]; let a1 = [ - Float4::new(a1[0].0, a1[0].1, a1[0].2, 0.0), - Float4::new(a1[1].0, a1[1].1, a1[1].2, 0.0), + Vec4::new(a1[0].0, a1[0].1, a1[0].2, 0.0), + Vec4::new(a1[1].0, a1[1].1, a1[1].2, 0.0), ]; let a2 = [ - Float4::new(a2[0].0, a2[0].1, a2[0].2, 0.0), - Float4::new(a2[1].0, a2[1].1, a2[1].2, 0.0), + Vec4::new(a2[0].0, a2[0].1, a2[0].2, 0.0), + Vec4::new(a2[1].0, a2[1].1, a2[1].2, 0.0), ]; let a3 = [ - Float4::new(a3[0].0, a3[0].1, a3[0].2, 0.0), - Float4::new(a3[1].0, a3[1].1, a3[1].2, 0.0), + Vec4::new(a3[0].0, a3[0].1, a3[0].2, 0.0), + Vec4::new(a3[1].0, a3[1].1, a3[1].2, 0.0), ]; // Do interpolation. @@ -117,16 +117,14 @@ fn small_rgb_to_spectrum_p4( // Evaluate the spectral function and return the result. if max_val <= table_mid_value { - rgb2spec_eval_4([c[0].get_0(), c[0].get_1(), c[0].get_2()], lambdas) - * (1.0 / table_mid_value) - * max_val + rgb2spec_eval_4([c[0].x(), c[0].y(), c[0].z()], lambdas) * (1.0 / table_mid_value) * max_val } else if max_val < 1.0 { let n = (max_val - table_mid_value) / (1.0 - table_mid_value); - let s0 = rgb2spec_eval_4([c[0].get_0(), c[0].get_1(), c[0].get_2()], lambdas); - let s1 = rgb2spec_eval_4([c[1].get_0(), c[1].get_1(), c[1].get_2()], lambdas); + let s0 = rgb2spec_eval_4([c[0].x(), c[0].y(), c[0].z()], lambdas); + let s1 = rgb2spec_eval_4([c[1].x(), c[1].y(), c[1].z()], lambdas); (s0 * (1.0 - n)) + (s1 * n) } else { - rgb2spec_eval_4([c[1].get_0(), c[1].get_1(), c[1].get_2()], lambdas) * max_val + rgb2spec_eval_4([c[1].x(), c[1].y(), c[1].z()], lambdas) * max_val } } @@ -134,18 +132,22 @@ fn small_rgb_to_spectrum_p4( // Coefficient -> eval functions #[inline(always)] -fn rgb2spec_fma_4(a: Float4, b: Float4, c: Float4) -> Float4 { - a.fmadd(b, c) +fn rgb2spec_fma_4(a: Vec4, b: Vec4, c: Vec4) -> Vec4 { + (a * b) + c } -fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Float4) -> Float4 { - let co0 = Float4::splat(coeff[0]); - let co1 = Float4::splat(coeff[1]); - let co2 = Float4::splat(coeff[2]); +fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Vec4) -> Vec4 { + let co0 = Vec4::splat(coeff[0]); + let co1 = Vec4::splat(coeff[1]); + let co2 = Vec4::splat(coeff[2]); let x = rgb2spec_fma_4(rgb2spec_fma_4(co0, lambda, co1), lambda, co2); - let y = Float4::splat(1.0) / (rgb2spec_fma_4(x, x, Float4::splat(1.0))).sqrt(); + let y = { + // TODO: replace this with a SIMD sqrt op. + let (x, y, z, w) = rgb2spec_fma_4(x, x, Vec4::splat(1.0)).into(); + Vec4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).reciprocal() + }; - rgb2spec_fma_4(Float4::splat(0.5) * x, y, Float4::splat(0.5)) + rgb2spec_fma_4(Vec4::splat(0.5) * x, y, Vec4::splat(0.5)) } diff --git a/sub_crates/spectral_upsampling/src/meng.rs b/sub_crates/spectral_upsampling/src/meng.rs index 5953d1f..bc14eb2 100644 --- a/sub_crates/spectral_upsampling/src/meng.rs +++ b/sub_crates/spectral_upsampling/src/meng.rs @@ -6,7 +6,7 @@ use std::f32; -use float4::Float4; +use glam::Vec4; mod meng_spectra_tables; @@ -174,9 +174,9 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 { /// /// Works on 4 wavelengths at once via SIMD. #[inline] -pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 { - assert!(lambdas.h_min() >= SPECTRUM_SAMPLE_MIN); - assert!(lambdas.h_max() <= SPECTRUM_SAMPLE_MAX); +pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 { + assert!(lambdas.min_element() >= SPECTRUM_SAMPLE_MIN); + assert!(lambdas.max_element() <= SPECTRUM_SAMPLE_MAX); let inv_norm = xyz.0 + xyz.1 + xyz.2; let norm = { @@ -184,7 +184,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 { if norm < f32::MAX { norm } else { - return Float4::splat(0.0); + return Vec4::splat(0.0); } }; @@ -197,7 +197,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 { || uv.1 < 0.0 || uv.1 >= SPECTRUM_GRID_HEIGHT as f32 { - return Float4::splat(0.0); + return Vec4::splat(0.0); } let uvi = (uv.0 as i32, uv.1 as i32); @@ -214,53 +214,48 @@ pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 { // If the cell has no points, nothing we can do, so return 0.0 if num == 0 { - return Float4::splat(0.0); + return Vec4::splat(0.0); } // Normalize lambda to spectrum table index range. - let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN)) + let sb: Vec4 = (lambdas - Vec4::splat(SPECTRUM_SAMPLE_MIN)) / (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN) * (SPECTRUM_NUM_SAMPLES as f32 - 1.0); - debug_assert!(sb.h_min() >= 0.0); - debug_assert!(sb.h_max() <= SPECTRUM_NUM_SAMPLES as f32); + debug_assert!(sb.min_element() >= 0.0); + debug_assert!(sb.max_element() <= SPECTRUM_NUM_SAMPLES as f32); // Get the spectral values for the vertices of the grid cell. // TODO: use integer SIMD intrinsics to make this part faster. - let mut p = [Float4::splat(0.0); 6]; - let sb0: [i32; 4] = [ - sb.get_0() as i32, - sb.get_1() as i32, - sb.get_2() as i32, - sb.get_3() as i32, - ]; + let mut p = [Vec4::splat(0.0); 6]; + let sb0: [i32; 4] = [sb.x() as i32, sb.y() as i32, sb.z() as i32, sb.w() as i32]; assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES); let sb1: [i32; 4] = [ - (sb.get_0() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), - (sb.get_1() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), - (sb.get_2() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), - (sb.get_3() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), + (sb.x() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), + (sb.y() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), + (sb.z() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), + (sb.w() as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1), ]; - let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32); + let sbf = sb - Vec4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32); for i in 0..(num as usize) { debug_assert!(idx[i] >= 0); let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum; - let p0 = Float4::new( + let p0 = Vec4::new( spectrum[sb0[0] as usize], spectrum[sb0[1] as usize], spectrum[sb0[2] as usize], spectrum[sb0[3] as usize], ); - let p1 = Float4::new( + let p1 = Vec4::new( spectrum[sb1[0] as usize], spectrum[sb1[1] as usize], spectrum[sb1[2] as usize], spectrum[sb1[3] as usize], ); - p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf; + p[i] = p0 * (Vec4::splat(1.0) - sbf) + p1 * sbf; } // Linearly interpolate the spectral power of the cell vertices. - let mut interpolated_p = Float4::splat(0.0); + let mut interpolated_p = Vec4::splat(0.0); if inside { // Fast path for normal inner quads: let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);