diff --git a/sub_crates/rmath/src/lib.rs b/sub_crates/rmath/src/lib.rs index d097132..7d8cf7e 100644 --- a/sub_crates/rmath/src/lib.rs +++ b/sub_crates/rmath/src/lib.rs @@ -4,6 +4,7 @@ mod normal; mod point; +mod sealed; mod utils; mod vector; pub mod wide4; @@ -11,7 +12,9 @@ mod xform; use std::ops::{Add, Mul, Neg, Sub}; -pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform::XformFull}; +pub use self::{ + normal::Normal, point::Point, vector::Vector, xform::AsXform, xform::Xform, xform::XformFull, +}; /// Trait for calculating dot products. pub trait DotProduct { @@ -111,3 +114,16 @@ where let delta = sum - a; (sum, (a - (sum - delta)) + (b - delta)) } + +/// `a - b` but also returns a rounding error for precise composition +/// with other operations. +#[inline(always)] +fn two_diff(a: T, b: T) -> (T, T) +// (diff, rounding_err) +where + T: Copy + Add + Sub, +{ + let diff = a - b; + let delta = diff - a; + (diff, (a - (diff - delta)) - (b + delta)) +} diff --git a/sub_crates/rmath/src/normal.rs b/sub_crates/rmath/src/normal.rs index 64160a8..bd0c002 100644 --- a/sub_crates/rmath/src/normal.rs +++ b/sub_crates/rmath/src/normal.rs @@ -4,7 +4,7 @@ use std::cmp::PartialEq; use std::ops::{Add, Div, Mul, Neg, Sub}; use crate::wide4::Float4; -use crate::xform::XformFull; +use crate::xform::{AsXform, XformFull}; use crate::Vector; use crate::{CrossProduct, DotProduct}; @@ -87,20 +87,37 @@ impl Normal { //------------- // Transforms. + /// Forward-transform the normal. + #[inline(always)] pub fn xform(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m_inv))) + Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(&xform.inv_m))) } - pub fn xform_inv(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m))) + /// Inverse-transform the normal. + #[inline(always)] + pub fn xform_inv(self, xform: &T) -> Self { + Self( + self.0 + .vec_mul_3x3(&Float4::transpose_3x3(&xform.as_xform().m)), + ) } + /// Faster but less precise version of `xform()`. + #[inline(always)] pub fn xform_fast(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m_inv))) + Self( + self.0 + .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.inv_m)), + ) } - pub fn xform_inv_fast(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m))) + /// Faster but less precise version of `xform_inv()`. + #[inline(always)] + pub fn xform_inv_fast(self, xform: &T) -> Self { + Self( + self.0 + .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.as_xform().m)), + ) } } @@ -218,7 +235,7 @@ mod tests { fn xform() { let n = Normal::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(n.xform(&m), Normal::new(-4.0625, 1.78125, -0.03125)); @@ -229,7 +246,7 @@ mod tests { fn xform_fast() { let n = Normal::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(n.xform_fast(&m), Normal::new(-4.0625, 1.78125, -0.03125)); diff --git a/sub_crates/rmath/src/point.rs b/sub_crates/rmath/src/point.rs index f260d74..1b7e4f8 100644 --- a/sub_crates/rmath/src/point.rs +++ b/sub_crates/rmath/src/point.rs @@ -4,7 +4,7 @@ use std::ops::{Add, Sub}; use crate::vector::Vector; use crate::wide4::Float4; -use crate::xform::XformFull; +use crate::xform::{AsXform, XformFull}; /// A position in 3D space. #[derive(Debug, Copy, Clone)] @@ -78,20 +78,30 @@ impl Point { //------------- // Transforms. - pub fn xform(self, xform: &XformFull) -> Self { + /// Forward-transform the point. + #[inline(always)] + pub fn xform(self, xform: &T) -> Self { + let xform = xform.as_xform(); Self(self.0.vec_mul_affine(&xform.m, xform.t)) } + /// Inverse-transform the point. + #[inline(always)] pub fn xform_inv(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_affine_rev(&xform.m_inv, -xform.t)) + Self(self.0.vec_mul_affine_rev(&xform.inv_m, xform.fwd.t)) } - pub fn xform_fast(self, xform: &XformFull) -> Self { + /// Faster but less precise version of `xform()`. + #[inline(always)] + pub fn xform_fast(self, xform: &T) -> Self { + let xform = xform.as_xform(); Self(self.0.vec_mul_affine_fast(&xform.m, xform.t)) } + /// Faster but less precise version of `xform_inv()`. + #[inline(always)] pub fn xform_inv_fast(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_affine_rev_fast(&xform.m_inv, -xform.t)) + Self(self.0.vec_mul_affine_rev_fast(&xform.inv_m, xform.fwd.t)) } } @@ -158,7 +168,7 @@ mod tests { fn xform() { let p = Point::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(p.xform(&m), Point::new(15.5, 54.0, 70.0)); assert_eq!(p.xform(&m).xform_inv(&m), p); @@ -168,7 +178,7 @@ mod tests { fn xform_fast() { let p = Point::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(p.xform_fast(&m), Point::new(15.5, 54.0, 70.0)); assert_eq!(p.xform_fast(&m).xform_inv_fast(&m), p); diff --git a/sub_crates/rmath/src/sealed.rs b/sub_crates/rmath/src/sealed.rs new file mode 100644 index 0000000..8ee7071 --- /dev/null +++ b/sub_crates/rmath/src/sealed.rs @@ -0,0 +1,5 @@ +/// For sealing other traits. +/// +/// Even though this is marked as public, this module isn't, and +/// therefore this trait is not available outside the crate. +pub trait Sealed {} diff --git a/sub_crates/rmath/src/vector.rs b/sub_crates/rmath/src/vector.rs index 21491a2..87cc1ff 100644 --- a/sub_crates/rmath/src/vector.rs +++ b/sub_crates/rmath/src/vector.rs @@ -6,7 +6,7 @@ use std::ops::{Add, Div, Mul, Neg, Sub}; use crate::normal::Normal; use crate::point::Point; use crate::wide4::Float4; -use crate::xform::XformFull; +use crate::xform::{AsXform, XformFull}; use crate::{CrossProduct, DotProduct}; /// A direction vector in 3D space. @@ -98,20 +98,28 @@ impl Vector { //------------- // Transforms. - pub fn xform(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3(&xform.m)) + /// Forward-transform the vector. + #[inline(always)] + pub fn xform(self, xform: &T) -> Self { + Self(self.0.vec_mul_3x3(&xform.as_xform().m)) } + /// Inverse-transform the vector. + #[inline(always)] pub fn xform_inv(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3(&xform.m_inv)) + Self(self.0.vec_mul_3x3(&xform.inv_m)) } - pub fn xform_fast(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3_fast(&xform.m)) + /// Faster but less precise version of `xform()`. + #[inline(always)] + pub fn xform_fast(self, xform: &T) -> Self { + Self(self.0.vec_mul_3x3_fast(&xform.as_xform().m)) } + /// Faster but less precise version of `xform_inv()`. + #[inline(always)] pub fn xform_inv_fast(self, xform: &XformFull) -> Self { - Self(self.0.vec_mul_3x3_fast(&xform.m_inv)) + Self(self.0.vec_mul_3x3_fast(&xform.inv_m)) } } @@ -229,7 +237,7 @@ mod tests { fn xform() { let v = Vector::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(v.xform(&m), Vector::new(14.0, 46.0, 58.0)); @@ -240,7 +248,7 @@ mod tests { fn xform_fast() { let v = Vector::new(1.0, 2.5, 4.0); let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0) - .into_full() + .to_full() .unwrap(); assert_eq!(v.xform_fast(&m), Vector::new(14.0, 46.0, 58.0)); diff --git a/sub_crates/rmath/src/wide4/fallback.rs b/sub_crates/rmath/src/wide4/fallback.rs index 076bcf5..546a4af 100644 --- a/sub_crates/rmath/src/wide4/fallback.rs +++ b/sub_crates/rmath/src/wide4/fallback.rs @@ -38,22 +38,40 @@ impl Float4 { /// Vertical minimum. #[inline(always)] pub fn min(self, a: Self) -> Self { + // Custom min to match behavior of SSE. + #[inline(always)] + pub fn minf(a: f32, b: f32) -> f32 { + if a < b { + a + } else { + b + } + } Self([ - self.0[0].min(a.0[0]), - self.0[1].min(a.0[1]), - self.0[2].min(a.0[2]), - self.0[3].min(a.0[3]), + minf(self.0[0], a.0[0]), + minf(self.0[1], a.0[1]), + minf(self.0[2], a.0[2]), + minf(self.0[3], a.0[3]), ]) } /// Vertical maximum. #[inline(always)] pub fn max(self, a: Self) -> Self { + // Custom max to match behavior of SSE. + #[inline(always)] + pub fn maxf(a: f32, b: f32) -> f32 { + if a > b { + a + } else { + b + } + } Self([ - self.0[0].max(a.0[0]), - self.0[1].max(a.0[1]), - self.0[2].max(a.0[2]), - self.0[3].max(a.0[3]), + maxf(self.0[0], a.0[0]), + maxf(self.0[1], a.0[1]), + maxf(self.0[2], a.0[2]), + maxf(self.0[3], a.0[3]), ]) } @@ -352,11 +370,16 @@ impl FMulAdd for Float4 { //============================================================= // Bool4 -#[derive(Debug, Copy, Clone)] +#[derive(Copy, Clone)] #[repr(transparent)] pub struct Bool4([bool; 4]); impl Bool4 { + #[inline(always)] + pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self { + Bool4([a, b, c, d]) + } + #[inline(always)] pub fn new_false() -> Self { Self([false, false, false, false]) diff --git a/sub_crates/rmath/src/wide4/mod.rs b/sub_crates/rmath/src/wide4/mod.rs index a0ebe35..9e4d72c 100644 --- a/sub_crates/rmath/src/wide4/mod.rs +++ b/sub_crates/rmath/src/wide4/mod.rs @@ -1,15 +1,14 @@ use std::{ - cmp::PartialEq, + cmp::{Eq, PartialEq}, ops::{AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, SubAssign}, }; use crate::utils::ulps_eq; -use crate::{difference_of_products, two_prod, two_sum}; +use crate::{difference_of_products, two_diff, two_prod, two_sum}; //------------------------------------------------------------- // Which implementation to use. -#[cfg(not(any(target_arch = "x86_64")))] mod fallback; #[cfg(not(any(target_arch = "x86_64")))] pub use fallback::{Bool4, Float4}; @@ -43,9 +42,7 @@ impl Float4 { s2 + err2 } - /// 3D dot product (only uses the first 3 components). - /// - /// Faster but less precise version. + /// Faster but less precise version of `dot_3()`. #[inline(always)] pub fn dot_3_fast(a: Self, b: Self) -> f32 { let c = a * b; @@ -58,16 +55,14 @@ impl Float4 { difference_of_products(a.bcad(), b.cabd(), a.cabd(), b.bcad()) } - /// 3D cross product (only uses the first 3 components). - /// - /// Faster but less precise version. + /// Faster but less precise version `cross_3()`. #[inline(always)] pub fn cross_3_fast(a: Self, b: Self) -> Self { (a.bcad() * b.cabd()) - (a.cabd() * b.bcad()) } #[inline(always)] - pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] { + pub fn transpose_3x3(m: &[Self; 3]) -> [Self; 3] { [ // The fourth component in each row below is arbitrary, // but in this case chosen so that it matches the @@ -82,7 +77,7 @@ impl Float4 { /// /// Returns `None` if not invertible. #[inline] - pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> { + pub fn invert_3x3(m: &[Self; 3]) -> Option<[Self; 3]> { let m0_bca = m[0].bcad(); let m1_bca = m[1].bcad(); let m2_bca = m[2].bcad(); @@ -101,15 +96,13 @@ impl Float4 { if det == 0.0 { None } else { - Some(Self::transpose_3x3([abc / det, def / det, ghi / det])) + Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det])) } } - /// Invert a 3x3 matrix. Faster but less precise version. - /// - /// Returns `None` if not invertible. + /// Faster but less precise version of `invert_3x3()`. #[inline] - pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> { + pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> { let m0_bca = m[0].bcad(); let m1_bca = m[1].bcad(); let m2_bca = m[2].bcad(); @@ -128,7 +121,7 @@ impl Float4 { if det == 0.0 { None } else { - Some(Self::transpose_3x3([abc / det, def / det, ghi / det])) + Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det])) } } @@ -154,9 +147,7 @@ impl Float4 { s2 + err2 } - /// Multiplies a 3D vector with a 3x3 matrix. - /// - /// Faster but less precise version. + /// Faster but less precise version of `vec_mul_3x3()`. #[inline] pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self { let x = self.aaaa(); @@ -193,9 +184,7 @@ impl Float4 { s3 + err3 } - /// Transforms a 3d point by an affine transform. - /// - /// Faster but less precise version. + /// Faster but less precise version of `vec_mul_affine()`. #[inline] pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self { let x = self.aaaa(); @@ -205,25 +194,26 @@ impl Float4 { (x * m[0]) + (y * m[1]) + (z * m[2]) + t } - /// Transforms a 3d point by an affine transform, except it applies - /// the translation part before the 3x3 part. + /// Transforms a 3d point by an affine transform, except it does + /// `(vec - t) * inv_m` instead of `vec * m + t`. /// - /// This is primarily useful for performing efficient inverse transforms by - /// passing an inverted 3x3 part and a negated translation part. + /// This is useful for performing efficient inverse transforms while + /// only having to invert the 3x3 part of the transform itself. /// - /// `m` is the 3x3 part of the affine transform, `t` is the translation part. + /// `inv_m` is the inverse 3x3 part of the affine transform, `t` is + /// the forward translation part. #[inline] - pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self { - let (v, v_err) = two_sum(self, t); + pub fn vec_mul_affine_rev(self, inv_m: &[Self; 3], t: Self) -> Self { + let (v, v_err) = two_diff(self, t); let (x, x_err) = (v.aaaa(), v_err.aaaa()); let (y, y_err) = (v.bbbb(), v_err.bbbb()); let (z, z_err) = (v.cccc(), v_err.cccc()); // Products. - let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]); - let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]); - let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]); + let ((a, a_err1), a_err2) = (two_prod(x, inv_m[0]), x_err * inv_m[0]); + let ((b, b_err1), b_err2) = (two_prod(y, inv_m[1]), y_err * inv_m[1]); + let ((c, c_err1), c_err2) = (two_prod(z, inv_m[2]), z_err * inv_m[2]); let a_err = a_err1 + a_err2; let b_err = b_err1 + b_err2; let c_err = c_err1 + c_err2; @@ -238,19 +228,16 @@ impl Float4 { s2 + err2 } - /// Transforms a 3d point by an affine transform, except it applies - /// the translation part before the 3x3 part. - /// - /// Faster but less precise version. + /// Faster but less precise version of `vec_mul_affine_rev()`. #[inline] - pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self { - let v = self + t; + pub fn vec_mul_affine_rev_fast(self, inv_m: &[Self; 3], t: Self) -> Self { + let v = self - t; let x = v.aaaa(); let y = v.bbbb(); let z = v.cccc(); - (x * m[0]) + (y * m[1]) + (z * m[2]) + (x * inv_m[0]) + (y * inv_m[1]) + (z * inv_m[2]) } /// Returns whether the `Float4`s are approximately equal to each @@ -290,9 +277,7 @@ impl Float4 { ) } - /// Transforms one affine transform by another. - /// - /// Faster but less precise version. + /// Faster but less precise version of `affine_mul_affine()`. #[inline] pub fn affine_mul_affine_fast( m1: &[Self; 3], @@ -389,12 +374,34 @@ impl BitXorAssign for Bool4 { } } +impl PartialEq for Bool4 { + #[inline(always)] + fn eq(&self, rhs: &Self) -> bool { + self.bitmask() == rhs.bitmask() + } +} + +impl Eq for Bool4 {} + +impl std::fmt::Debug for Bool4 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + f.write_str("Bool4(")?; + f.debug_list().entries(self.to_bools().iter()).finish()?; + f.write_str(")")?; + + Ok(()) + } +} + //------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; + //------------ + // Float4 + #[test] fn approximate_equality_test() { let a = Float4::new(1.0, 2.0, 3.0, 4.0); @@ -420,6 +427,26 @@ mod tests { assert_eq!(v[3], 3.0); } + #[test] + fn get() { + let v = Float4::new(0.0, 1.0, 2.0, 3.0); + + assert_eq!(v.a(), 0.0); + assert_eq!(v.b(), 1.0); + assert_eq!(v.c(), 2.0); + assert_eq!(v.d(), 3.0); + } + + #[test] + fn set() { + let v = Float4::new(0.0, 1.0, 2.0, 3.0); + + assert_eq!(v.set_a(9.0), Float4::new(9.0, 1.0, 2.0, 3.0)); + assert_eq!(v.set_b(9.0), Float4::new(0.0, 9.0, 2.0, 3.0)); + assert_eq!(v.set_c(9.0), Float4::new(0.0, 1.0, 9.0, 3.0)); + assert_eq!(v.set_d(9.0), Float4::new(0.0, 1.0, 2.0, 9.0)); + } + #[test] fn shuffle() { let v = Float4::new(0.0, 1.0, 2.0, 3.0); @@ -434,10 +461,291 @@ mod tests { } #[test] - fn bitmask() { - let v1 = Float4::new(0.0, 1.0, 2.0, 3.0); - let v2 = Float4::new(9.0, 1.0, 9.0, 3.0); + fn abs() { + let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0); + let v2 = Float4::new(1.0, -2.0, 3.0, -4.0); - assert_eq!(v1.cmpeq(v2).bitmask(), 0b1010); + let r = Float4::new(1.0, 2.0, 3.0, 4.0); + + assert_eq!(v1.abs(), r); + assert_eq!(v2.abs(), r); + } + + #[test] + fn neg() { + let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0); + let v2 = Float4::new(1.0, -2.0, 3.0, -4.0); + + assert_eq!(-v1, v2); + assert_eq!(-v2, v1); + } + + #[test] + fn cmp_ops() { + let a = Float4::new(1.0, 2.0, -2.0, 0.0); + let b = Float4::new(1.0, -2.0, 2.0, -0.0); + + assert_eq!(a.cmplt(b), Bool4::new(false, false, true, false)); + assert_eq!(a.cmplte(b), Bool4::new(true, false, true, true)); + assert_eq!(a.cmpgt(b), Bool4::new(false, true, false, false)); + assert_eq!(a.cmpgte(b), Bool4::new(true, true, false, true)); + assert_eq!(a.cmpeq(b), Bool4::new(true, false, false, true)); + } + + #[test] + fn min_max() { + let a = Float4::new(1.0, 2.0, -2.0, 4.0); + let b = Float4::new(1.0, -2.0, 2.0, 5.0); + + assert_eq!(a.min(b), Float4::new(1.0, -2.0, -2.0, 4.0)); + assert_eq!(a.max(b), Float4::new(1.0, 2.0, 2.0, 5.0)); + + let c = Float4::new(std::f32::INFINITY, 2.0, std::f32::NAN, 4.0); + let d = Float4::new(1.0, -std::f32::INFINITY, 2.0, std::f32::NAN); + let r_min = c.min(d); + let r_max = c.max(d); + + assert_eq!(r_min.a(), 1.0); + assert_eq!(r_min.b(), -std::f32::INFINITY); + assert_eq!(r_min.c(), 2.0); + assert!(r_min.d().is_nan()); + + assert_eq!(r_max.a(), std::f32::INFINITY); + assert_eq!(r_max.b(), 2.0); + assert_eq!(r_max.c(), 2.0); + assert!(r_max.d().is_nan()); + } + + #[test] + fn dot_3() { + let v1 = Float4::new(1.0, 2.0, -3.0, 0.0); + let v2 = Float4::new(4.0, -5.0, 6.0, 0.0); + + assert_eq!(Float4::dot_3(v1, v2), -24.0); + assert_eq!(Float4::dot_3_fast(v1, v2), -24.0); + } + + #[test] + fn cross_3() { + let v1 = Float4::new(1.0, 2.0, -3.0, 0.0); + let v2 = Float4::new(4.0, -5.0, 6.0, 0.0); + + let r = Float4::new(-3.0, -18.0, -13.0, 0.0); + + assert_eq!(Float4::cross_3(v1, v2), r); + assert_eq!(Float4::cross_3(v2, v1), -r); + assert_eq!(Float4::cross_3_fast(v1, v2), r); + assert_eq!(Float4::cross_3_fast(v2, v1), -r); + } + + #[test] + fn transpose_3x3() { + let m1 = [ + Float4::new(1.0, 4.0, 7.0, 0.0), + Float4::new(2.0, 5.0, 8.0, 0.0), + Float4::new(3.0, 6.0, 9.0, 0.0), + ]; + let m2 = [ + Float4::new(1.0, 2.0, 3.0, 0.0), + Float4::new(4.0, 5.0, 6.0, 0.0), + Float4::new(7.0, 8.0, 9.0, 0.0), + ]; + + assert_eq!(Float4::transpose_3x3(&m1), m2); + assert_eq!(Float4::transpose_3x3(&m2), m1); + } + + #[test] + fn invert_3x3() { + let m = [ + Float4::new(1.0, 3.0, 9.0, 0.0), + Float4::new(2.0, 6.0, 2.0, 0.0), + Float4::new(2.0, 7.0, 11.0, 0.0), + ]; + let inv_m = [ + Float4::new(3.25, 1.875, -3.0, 0.0), + Float4::new(-1.125, -0.4375, 1.0, 0.0), + Float4::new(0.125, -0.0625, 0.0, 0.0), + ]; + + assert_eq!(Float4::invert_3x3(&m).unwrap(), inv_m); + assert_eq!(Float4::invert_3x3(&inv_m).unwrap(), m); + assert_eq!(Float4::invert_3x3_fast(&m).unwrap(), inv_m); + assert_eq!(Float4::invert_3x3_fast(&inv_m).unwrap(), m); + } + + #[test] + fn vec_mul_3x3() { + let v = Float4::new(1.0, 2.5, 4.0, 0.0); + let m = [ + Float4::new(1.0, 3.0, 9.0, 0.0), + Float4::new(2.0, 6.0, 2.0, 0.0), + Float4::new(2.0, 7.0, 11.0, 0.0), + ]; + + let r = Float4::new(14.0, 46.0, 58.0, 0.0); + + assert_eq!(v.vec_mul_3x3(&m), r); + assert_eq!(v.vec_mul_3x3_fast(&m), r); + } + + #[test] + fn vec_mul_affine() { + let p = Float4::new(1.0, 2.5, 4.0, 0.0); + let xform = ( + [ + Float4::new(1.0, 3.0, 9.0, 0.0), + Float4::new(2.0, 6.0, 2.0, 0.0), + Float4::new(2.0, 7.0, 11.0, 0.0), + ], + Float4::new(1.5, 8.0, 12.0, 0.0), + ); + + let r = Float4::new(15.5, 54.0, 70.0, 0.0); + + assert_eq!(p.vec_mul_affine(&xform.0, xform.1), r); + } + + #[test] + fn vec_mul_affine_rev() { + let p = Float4::new(15.5, 54.0, 70.0, 0.0); + let inv_m = [ + Float4::new(3.25, 1.875, -3.0, 0.0), + Float4::new(-1.125, -0.4375, 1.0, 0.0), + Float4::new(0.125, -0.0625, 0.0, 0.0), + ]; + let t = Float4::new(1.5, 8.0, 12.0, 0.0); + + let r = Float4::new(1.0, 2.5, 4.0, 0.0); + + assert_eq!(p.vec_mul_affine_rev(&inv_m, t), r); + assert_eq!(p.vec_mul_affine_rev_fast(&inv_m, t), r); + } + + #[test] + fn affine_mul_affine() { + let a = ( + [ + Float4::new(1.0, 3.0, 9.0, 0.0), + Float4::new(2.0, 6.0, 2.0, 0.0), + Float4::new(2.0, 7.0, 11.0, 0.0), + ], + Float4::new(1.5, 8.0, 12.0, 0.0), + ); + let b = ( + [ + Float4::new(1.0, 2.0, 3.0, 0.0), + Float4::new(5.0, 6.0, 7.0, 0.0), + Float4::new(9.0, 10.0, 11.0, 0.0), + ], + Float4::new(13.0, 14.0, 15.0, 0.0), + ); + + let r = ( + [ + Float4::new(97.0, 110.0, 123.0, 0.0), + Float4::new(50.0, 60.0, 70.0, 0.0), + Float4::new(136.0, 156.0, 176.0, 0.0), + ], + Float4::new(162.5, 185.0, 207.5, 0.0), + ); + + assert_eq!(Float4::affine_mul_affine(&a.0, a.1, &b.0, b.1), r); + assert_eq!(Float4::affine_mul_affine_fast(&a.0, a.1, &b.0, b.1), r); + } + + //------------ + // Bool4 + + #[test] + fn bitmask() { + assert_eq!(Bool4::new(true, false, false, false).bitmask(), 0b0001); + assert_eq!(Bool4::new(false, true, false, false).bitmask(), 0b0010); + assert_eq!(Bool4::new(false, false, true, false).bitmask(), 0b0100); + assert_eq!(Bool4::new(false, false, false, true).bitmask(), 0b1000); + + assert_eq!(Bool4::new(false, true, false, true).bitmask(), 0b1010); + assert_eq!(Bool4::new(true, false, true, false).bitmask(), 0b0101); + } + + #[test] + fn to_bools() { + assert_eq!( + Bool4::new(true, false, false, false).to_bools(), + [true, false, false, false] + ); + assert_eq!( + Bool4::new(false, true, false, false).to_bools(), + [false, true, false, false] + ); + assert_eq!( + Bool4::new(false, false, true, false).to_bools(), + [false, false, true, false] + ); + assert_eq!( + Bool4::new(false, false, false, true).to_bools(), + [false, false, false, true] + ); + + assert_eq!( + Bool4::new(false, true, false, true).to_bools(), + [false, true, false, true] + ); + assert_eq!( + Bool4::new(true, false, true, false).to_bools(), + [true, false, true, false] + ); + } + + #[test] + fn any() { + assert_eq!(Bool4::new(true, false, false, false).any(), true); + assert_eq!(Bool4::new(false, true, false, false).any(), true); + assert_eq!(Bool4::new(false, false, true, false).any(), true); + assert_eq!(Bool4::new(false, false, false, true).any(), true); + + assert_eq!(Bool4::new(false, false, false, false).any(), false); + } + + #[test] + fn all() { + assert_eq!(Bool4::new(false, true, true, true).all(), false); + assert_eq!(Bool4::new(true, false, true, true).all(), false); + assert_eq!(Bool4::new(true, true, false, true).all(), false); + assert_eq!(Bool4::new(true, true, true, false).all(), false); + + assert_eq!(Bool4::new(true, true, true, true).all(), true); + } + + #[test] + fn boolean_ops() { + let all = Bool4::new(true, true, true, true); + let none = Bool4::new(false, false, false, false); + let a = Bool4::new(true, false, true, false); + let b = Bool4::new(false, true, false, true); + + // Not. + assert_eq!(!a, b); + assert_eq!(!b, a); + assert_eq!(!all, none); + assert_eq!(!none, all); + + // And. + assert_eq!(a & b, none); + assert_eq!(all & none, none); + assert_eq!(all & all, all); + assert_eq!(none & none, none); + + // Or. + assert_eq!(a | b, all); + assert_eq!(all | none, all); + assert_eq!(all | all, all); + assert_eq!(none | none, none); + + // Xor. + assert_eq!(a ^ b, all); + assert_eq!(all ^ none, all); + assert_eq!(all ^ all, none); + assert_eq!(none ^ none, none); } } diff --git a/sub_crates/rmath/src/wide4/sse.rs b/sub_crates/rmath/src/wide4/sse.rs index f7eaee5..6f45e27 100644 --- a/sub_crates/rmath/src/wide4/sse.rs +++ b/sub_crates/rmath/src/wide4/sse.rs @@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub}; use std::arch::x86_64::{ __m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps, _mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps, - _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_ps, _mm_setzero_ps, - _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps, + _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps, + _mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps, }; use crate::FMulAdd; @@ -277,7 +277,10 @@ impl Neg for Float4 { #[inline(always)] fn neg(self) -> Self { - Self(unsafe { _mm_mul_ps(self.0, _mm_set1_ps(-1.0)) }) + Self(unsafe { + let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(1 << 31)); + _mm_xor_ps(self.0, abs_mask) + }) } } @@ -291,11 +294,26 @@ impl FMulAdd for Float4 { //============================================================= // Bool4 -#[derive(Debug, Copy, Clone)] +#[derive(Copy, Clone)] #[repr(transparent)] pub struct Bool4(__m128); impl Bool4 { + #[inline(always)] + pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self { + const ONES: i32 = unsafe { std::mem::transmute(0xffffffffu32) }; + + unsafe { + let ints = _mm_set_epi32( + d as i32 * ONES, + c as i32 * ONES, + b as i32 * ONES, + a as i32 * ONES, + ); + Bool4(_mm_castsi128_ps(ints)) + } + } + #[inline(always)] pub fn new_false() -> Self { Self(unsafe { _mm_setzero_ps() }) diff --git a/sub_crates/rmath/src/xform.rs b/sub_crates/rmath/src/xform.rs index a6479c1..8b274c4 100644 --- a/sub_crates/rmath/src/xform.rs +++ b/sub_crates/rmath/src/xform.rs @@ -3,14 +3,33 @@ use std::ops::{Add, Mul}; use crate::point::Point; +use crate::sealed::Sealed; use crate::wide4::Float4; -/// An affine transform. +/// A forward affine transform. +/// +/// Use this for working with transforms that still need to be +/// manipulated or composed with other transforms, or for storing +/// transforms more compactly. +/// +/// Note: slightly counter-intuitively, even though this can perform +/// forward (but not inverse) transforms on points and vectors, it is +/// capable of *inverse* (but not forward) transforms on surface normals. +/// This is because forward transforms on surface normals require the +/// inverse transform matrix. +/// +/// Convert to an [`XformFull`] for a larger-format type capable of +/// efficiently performing both forward and inverse transforms on all +/// types, but which is effectively "frozen" in terms of further +/// manipulation of the transform itself. #[derive(Debug, Copy, Clone, PartialEq)] #[repr(C)] pub struct Xform { - pub m: [Float4; 3], // Linear matrix. - pub t: Float4, // Translation. + /// Rotation/scale/shear matrix. + pub m: [Float4; 3], + + /// Translation. + pub t: Float4, } impl Xform { @@ -87,29 +106,26 @@ impl Xform { eq } - /// Computes a "full" version of the transform, which can do both - /// forward and inverse transforms. + /// Compute the "full" version of the transform. #[inline] - pub fn into_full(self) -> Option { - if let Some(m_inv) = Float4::invert_3x3(self.m) { + pub fn to_full(&self) -> Option { + if let Some(inv_m) = Float4::invert_3x3(&self.m) { Some(XformFull { - m: self.m, - m_inv: m_inv, - t: self.t, + fwd: *self, + inv_m: inv_m, }) } else { None } } - /// Faster but less precise version of `compute_full()`. + /// Faster but less precise version of `to_full()`. #[inline] - pub fn into_full_fast(self) -> Option { - if let Some(m_inv) = Float4::invert_3x3_fast(self.m) { + pub fn to_full_fast(&self) -> Option { + if let Some(inv_m) = Float4::invert_3x3_fast(&self.m) { Some(XformFull { - m: self.m, - m_inv: m_inv, - t: self.t, + fwd: *self, + inv_m: inv_m, }) } else { None @@ -172,36 +188,74 @@ impl Add for Xform { } } +impl AsXform for Xform { + #[inline(always)] + fn as_xform(&self) -> &Xform { + self + } +} + +impl Sealed for Xform {} + //------------------------------------------------------------- -/// An affine transform with precomputed data for performing reverse -/// transforms, among other things. +/// A combined forward/inverse affine transform. +/// +/// Unlike [`Xform`], this can perform both forward and inverse +/// transforms on all types. However, it also takes up more space and +/// is effectively "frozen" in terms of further manipulation. Prefer +/// [`Xform`] when manipulating or composing transforms, and also +/// when storing transforms if space is a consideration. +/// +/// Note: only the 3x3 part of the transform is stored inverted. This +/// is because it's both trivial and more numerically stable to reuse +/// the forward translation vector to do inverse transforms, as +/// `(point - fwd.t) * inv_m`. #[derive(Debug, Copy, Clone)] #[repr(C)] pub struct XformFull { - pub m: [Float4; 3], // Forward linear matrix. - pub m_inv: [Float4; 3], // Inverse linear matrix. - pub t: Float4, // Forward translation. + /// Forward transform. + pub fwd: Xform, + + /// Inverse rotation/scale/shear matrix. + pub inv_m: [Float4; 3], } impl XformFull { pub fn identity() -> Self { Self { - m: [ + fwd: Xform { + m: [ + Float4::new(1.0, 0.0, 0.0, 0.0), + Float4::new(0.0, 1.0, 0.0, 0.0), + Float4::new(0.0, 0.0, 1.0, 0.0), + ], + t: Float4::splat(0.0), + }, + inv_m: [ Float4::new(1.0, 0.0, 0.0, 0.0), Float4::new(0.0, 1.0, 0.0, 0.0), Float4::new(0.0, 0.0, 1.0, 0.0), ], - m_inv: [ - Float4::new(1.0, 0.0, 0.0, 0.0), - Float4::new(0.0, 1.0, 0.0, 0.0), - Float4::new(0.0, 0.0, 1.0, 0.0), - ], - t: Float4::splat(0.0), } } } +impl AsXform for XformFull { + #[inline(always)] + fn as_xform(&self) -> &Xform { + &self.fwd + } +} + +impl Sealed for XformFull {} + +//------------------------------------------------------------- + +pub trait AsXform: Sealed { + fn as_xform(&self) -> &Xform; +} + //------------------------------------------------------------- #[cfg(test)]