Whole bunch of cleanup on RMath.

2022-07-17 16:37:15 -07:00 · 2022-07-17 16:37:15 -07:00 · 6dbdcba91a
commit 6dbdcba91a
parent e2044e6579
9 changed files with 573 additions and 114 deletions
--- a/sub_crates/rmath/src/lib.rs
+++ b/sub_crates/rmath/src/lib.rs
@ -4,6 +4,7 @@

 mod normal;
 mod point;
+mod sealed;
 mod utils;
 mod vector;
 pub mod wide4;
@ -11,7 +12,9 @@ mod xform;

 use std::ops::{Add, Mul, Neg, Sub};

-pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform::XformFull};
+pub use self::{
+    normal::Normal, point::Point, vector::Vector, xform::AsXform, xform::Xform, xform::XformFull,
+};

 /// Trait for calculating dot products.
 pub trait DotProduct {
@ -111,3 +114,16 @@ where
    let delta = sum - a;
    (sum, (a - (sum - delta)) + (b - delta))
 }
+
+/// `a - b` but also returns a rounding error for precise composition
+/// with other operations.
+#[inline(always)]
+fn two_diff<T>(a: T, b: T) -> (T, T)
+// (diff, rounding_err)
+where
+    T: Copy + Add<Output = T> + Sub<Output = T>,
+{
+    let diff = a - b;
+    let delta = diff - a;
+    (diff, (a - (diff - delta)) - (b + delta))
+}
--- a/sub_crates/rmath/src/normal.rs
+++ b/sub_crates/rmath/src/normal.rs
@ -4,7 +4,7 @@ use std::cmp::PartialEq;
 use std::ops::{Add, Div, Mul, Neg, Sub};

 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};
 use crate::Vector;
 use crate::{CrossProduct, DotProduct};

@ -87,20 +87,37 @@ impl Normal {
    //-------------
    // Transforms.

+    /// Forward-transform the normal.
+    #[inline(always)]
    pub fn xform(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m_inv)))
+        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(&xform.inv_m)))
    }

-    pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m)))
+    /// Inverse-transform the normal.
+    #[inline(always)]
+    pub fn xform_inv<T: AsXform>(self, xform: &T) -> Self {
+        Self(
+            self.0
+                .vec_mul_3x3(&Float4::transpose_3x3(&xform.as_xform().m)),
+        )
    }

+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
    pub fn xform_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m_inv)))
+        Self(
+            self.0
+                .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.inv_m)),
+        )
    }

-    pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m)))
+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
+    pub fn xform_inv_fast<T: AsXform>(self, xform: &T) -> Self {
+        Self(
+            self.0
+                .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.as_xform().m)),
+        )
    }
 }

@ -218,7 +235,7 @@ mod tests {
    fn xform() {
        let n = Normal::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();

        assert_eq!(n.xform(&m), Normal::new(-4.0625, 1.78125, -0.03125));
@ -229,7 +246,7 @@ mod tests {
    fn xform_fast() {
        let n = Normal::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();

        assert_eq!(n.xform_fast(&m), Normal::new(-4.0625, 1.78125, -0.03125));
--- a/sub_crates/rmath/src/point.rs
+++ b/sub_crates/rmath/src/point.rs
@ -4,7 +4,7 @@ use std::ops::{Add, Sub};

 use crate::vector::Vector;
 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};

 /// A position in 3D space.
 #[derive(Debug, Copy, Clone)]
@ -78,20 +78,30 @@ impl Point {
    //-------------
    // Transforms.

-    pub fn xform(self, xform: &XformFull) -> Self {
+    /// Forward-transform the point.
+    #[inline(always)]
+    pub fn xform<T: AsXform>(self, xform: &T) -> Self {
+        let xform = xform.as_xform();
        Self(self.0.vec_mul_affine(&xform.m, xform.t))
    }

+    /// Inverse-transform the point.
+    #[inline(always)]
    pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_affine_rev(&xform.m_inv, -xform.t))
+        Self(self.0.vec_mul_affine_rev(&xform.inv_m, xform.fwd.t))
    }

-    pub fn xform_fast(self, xform: &XformFull) -> Self {
+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
+    pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
+        let xform = xform.as_xform();
        Self(self.0.vec_mul_affine_fast(&xform.m, xform.t))
    }

+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
    pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_affine_rev_fast(&xform.m_inv, -xform.t))
+        Self(self.0.vec_mul_affine_rev_fast(&xform.inv_m, xform.fwd.t))
    }
 }

@ -158,7 +168,7 @@ mod tests {
    fn xform() {
        let p = Point::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();
        assert_eq!(p.xform(&m), Point::new(15.5, 54.0, 70.0));
        assert_eq!(p.xform(&m).xform_inv(&m), p);
@ -168,7 +178,7 @@ mod tests {
    fn xform_fast() {
        let p = Point::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();
        assert_eq!(p.xform_fast(&m), Point::new(15.5, 54.0, 70.0));
        assert_eq!(p.xform_fast(&m).xform_inv_fast(&m), p);
--- a/sub_crates/rmath/src/sealed.rs
+++ b/sub_crates/rmath/src/sealed.rs
@ -0,0 +1,5 @@
+/// For sealing other traits.
+///
+/// Even though this is marked as public, this module isn't, and
+/// therefore this trait is not available outside the crate.
+pub trait Sealed {}
--- a/sub_crates/rmath/src/vector.rs
+++ b/sub_crates/rmath/src/vector.rs
@ -6,7 +6,7 @@ use std::ops::{Add, Div, Mul, Neg, Sub};
 use crate::normal::Normal;
 use crate::point::Point;
 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};
 use crate::{CrossProduct, DotProduct};

 /// A direction vector in 3D space.
@ -98,20 +98,28 @@ impl Vector {
    //-------------
    // Transforms.

-    pub fn xform(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&xform.m))
+    /// Forward-transform the vector.
+    #[inline(always)]
+    pub fn xform<T: AsXform>(self, xform: &T) -> Self {
+        Self(self.0.vec_mul_3x3(&xform.as_xform().m))
    }

+    /// Inverse-transform the vector.
+    #[inline(always)]
    pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&xform.m_inv))
+        Self(self.0.vec_mul_3x3(&xform.inv_m))
    }

-    pub fn xform_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&xform.m))
+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
+    pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
+        Self(self.0.vec_mul_3x3_fast(&xform.as_xform().m))
    }

+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
    pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&xform.m_inv))
+        Self(self.0.vec_mul_3x3_fast(&xform.inv_m))
    }
 }

@ -229,7 +237,7 @@ mod tests {
    fn xform() {
        let v = Vector::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();

        assert_eq!(v.xform(&m), Vector::new(14.0, 46.0, 58.0));
@ -240,7 +248,7 @@ mod tests {
    fn xform_fast() {
        let v = Vector::new(1.0, 2.5, 4.0);
        let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
            .unwrap();

        assert_eq!(v.xform_fast(&m), Vector::new(14.0, 46.0, 58.0));
--- a/sub_crates/rmath/src/wide4/fallback.rs
+++ b/sub_crates/rmath/src/wide4/fallback.rs
@ -38,22 +38,40 @@ impl Float4 {
    /// Vertical minimum.
    #[inline(always)]
    pub fn min(self, a: Self) -> Self {
+        // Custom min to match behavior of SSE.
+        #[inline(always)]
+        pub fn minf(a: f32, b: f32) -> f32 {
+            if a < b {
+                a
+            } else {
+                b
+            }
+        }
        Self([
-            self.0[0].min(a.0[0]),
-            self.0[1].min(a.0[1]),
-            self.0[2].min(a.0[2]),
-            self.0[3].min(a.0[3]),
+            minf(self.0[0], a.0[0]),
+            minf(self.0[1], a.0[1]),
+            minf(self.0[2], a.0[2]),
+            minf(self.0[3], a.0[3]),
        ])
    }

    /// Vertical maximum.
    #[inline(always)]
    pub fn max(self, a: Self) -> Self {
+        // Custom max to match behavior of SSE.
+        #[inline(always)]
+        pub fn maxf(a: f32, b: f32) -> f32 {
+            if a > b {
+                a
+            } else {
+                b
+            }
+        }
        Self([
-            self.0[0].max(a.0[0]),
-            self.0[1].max(a.0[1]),
-            self.0[2].max(a.0[2]),
-            self.0[3].max(a.0[3]),
+            maxf(self.0[0], a.0[0]),
+            maxf(self.0[1], a.0[1]),
+            maxf(self.0[2], a.0[2]),
+            maxf(self.0[3], a.0[3]),
        ])
    }

@ -352,11 +370,16 @@ impl FMulAdd for Float4 {
 //=============================================================
 // Bool4

-#[derive(Debug, Copy, Clone)]
+#[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct Bool4([bool; 4]);

 impl Bool4 {
+    #[inline(always)]
+    pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
+        Bool4([a, b, c, d])
+    }
+
    #[inline(always)]
    pub fn new_false() -> Self {
        Self([false, false, false, false])
--- a/sub_crates/rmath/src/wide4/mod.rs
+++ b/sub_crates/rmath/src/wide4/mod.rs
@ -1,15 +1,14 @@
 use std::{
-    cmp::PartialEq,
+    cmp::{Eq, PartialEq},
    ops::{AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, SubAssign},
 };

 use crate::utils::ulps_eq;
-use crate::{difference_of_products, two_prod, two_sum};
+use crate::{difference_of_products, two_diff, two_prod, two_sum};

 //-------------------------------------------------------------
 // Which implementation to use.

-#[cfg(not(any(target_arch = "x86_64")))]
 mod fallback;
 #[cfg(not(any(target_arch = "x86_64")))]
 pub use fallback::{Bool4, Float4};
@ -43,9 +42,7 @@ impl Float4 {
        s2 + err2
    }

-    /// 3D dot product (only uses the first 3 components).
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `dot_3()`.
    #[inline(always)]
    pub fn dot_3_fast(a: Self, b: Self) -> f32 {
        let c = a * b;
@ -58,16 +55,14 @@ impl Float4 {
        difference_of_products(a.bcad(), b.cabd(), a.cabd(), b.bcad())
    }

-    /// 3D cross product (only uses the first 3 components).
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version `cross_3()`.
    #[inline(always)]
    pub fn cross_3_fast(a: Self, b: Self) -> Self {
        (a.bcad() * b.cabd()) - (a.cabd() * b.bcad())
    }

    #[inline(always)]
-    pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
+    pub fn transpose_3x3(m: &[Self; 3]) -> [Self; 3] {
        [
            // The fourth component in each row below is arbitrary,
            // but in this case chosen so that it matches the
@ -82,7 +77,7 @@ impl Float4 {
    ///
    /// Returns `None` if not invertible.
    #[inline]
-    pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> {
+    pub fn invert_3x3(m: &[Self; 3]) -> Option<[Self; 3]> {
        let m0_bca = m[0].bcad();
        let m1_bca = m[1].bcad();
        let m2_bca = m[2].bcad();
@ -101,15 +96,13 @@ impl Float4 {
        if det == 0.0 {
            None
        } else {
-            Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
+            Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
        }
    }

-    /// Invert a 3x3 matrix.  Faster but less precise version.
-    ///
-    /// Returns `None` if not invertible.
+    /// Faster but less precise version of `invert_3x3()`.
    #[inline]
-    pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> {
+    pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {
        let m0_bca = m[0].bcad();
        let m1_bca = m[1].bcad();
        let m2_bca = m[2].bcad();
@ -128,7 +121,7 @@ impl Float4 {
        if det == 0.0 {
            None
        } else {
-            Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
+            Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
        }
    }

@ -154,9 +147,7 @@ impl Float4 {
        s2 + err2
    }

-    /// Multiplies a 3D vector with a 3x3 matrix.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_3x3()`.
    #[inline]
    pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self {
        let x = self.aaaa();
@ -193,9 +184,7 @@ impl Float4 {
        s3 + err3
    }

-    /// Transforms a 3d point by an affine transform.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_affine()`.
    #[inline]
    pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self {
        let x = self.aaaa();
@ -205,25 +194,26 @@ impl Float4 {
        (x * m[0]) + (y * m[1]) + (z * m[2]) + t
    }

-    /// Transforms a 3d point by an affine transform, except it applies
-    /// the translation part before the 3x3 part.
+    /// Transforms a 3d point by an affine transform, except it does
+    /// `(vec - t) * inv_m` instead of `vec * m + t`.
    ///
-    /// This is primarily useful for performing efficient inverse transforms by
-    /// passing an inverted 3x3 part and a negated translation part.
+    /// This is useful for performing efficient inverse transforms while
+    /// only having to invert the 3x3 part of the transform itself.
    ///
-    /// `m` is the 3x3 part of the affine transform, `t` is the translation part.
+    /// `inv_m` is the inverse 3x3 part of the affine transform, `t` is
+    /// the forward translation part.
    #[inline]
-    pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self {
-        let (v, v_err) = two_sum(self, t);
+    pub fn vec_mul_affine_rev(self, inv_m: &[Self; 3], t: Self) -> Self {
+        let (v, v_err) = two_diff(self, t);

        let (x, x_err) = (v.aaaa(), v_err.aaaa());
        let (y, y_err) = (v.bbbb(), v_err.bbbb());
        let (z, z_err) = (v.cccc(), v_err.cccc());

        // Products.
-        let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]);
-        let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]);
-        let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]);
+        let ((a, a_err1), a_err2) = (two_prod(x, inv_m[0]), x_err * inv_m[0]);
+        let ((b, b_err1), b_err2) = (two_prod(y, inv_m[1]), y_err * inv_m[1]);
+        let ((c, c_err1), c_err2) = (two_prod(z, inv_m[2]), z_err * inv_m[2]);
        let a_err = a_err1 + a_err2;
        let b_err = b_err1 + b_err2;
        let c_err = c_err1 + c_err2;
@ -238,19 +228,16 @@ impl Float4 {
        s2 + err2
    }

-    /// Transforms a 3d point by an affine transform, except it applies
-    /// the translation part before the 3x3 part.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_affine_rev()`.
    #[inline]
-    pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self {
-        let v = self + t;
+    pub fn vec_mul_affine_rev_fast(self, inv_m: &[Self; 3], t: Self) -> Self {
+        let v = self - t;

        let x = v.aaaa();
        let y = v.bbbb();
        let z = v.cccc();

-        (x * m[0]) + (y * m[1]) + (z * m[2])
+        (x * inv_m[0]) + (y * inv_m[1]) + (z * inv_m[2])
    }

    /// Returns whether the `Float4`s are approximately equal to each
@ -290,9 +277,7 @@ impl Float4 {
        )
    }

-    /// Transforms one affine transform by another.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `affine_mul_affine()`.
    #[inline]
    pub fn affine_mul_affine_fast(
        m1: &[Self; 3],
@ -389,12 +374,34 @@ impl BitXorAssign for Bool4 {
    }
 }

+impl PartialEq for Bool4 {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        self.bitmask() == rhs.bitmask()
+    }
+}
+
+impl Eq for Bool4 {}
+
+impl std::fmt::Debug for Bool4 {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        f.write_str("Bool4(")?;
+        f.debug_list().entries(self.to_bools().iter()).finish()?;
+        f.write_str(")")?;
+
+        Ok(())
+    }
+}
+
 //-------------------------------------------------------------

 #[cfg(test)]
 mod tests {
    use super::*;

+    //------------
+    // Float4
+
    #[test]
    fn approximate_equality_test() {
        let a = Float4::new(1.0, 2.0, 3.0, 4.0);
@ -420,6 +427,26 @@ mod tests {
        assert_eq!(v[3], 3.0);
    }

+    #[test]
+    fn get() {
+        let v = Float4::new(0.0, 1.0, 2.0, 3.0);
+
+        assert_eq!(v.a(), 0.0);
+        assert_eq!(v.b(), 1.0);
+        assert_eq!(v.c(), 2.0);
+        assert_eq!(v.d(), 3.0);
+    }
+
+    #[test]
+    fn set() {
+        let v = Float4::new(0.0, 1.0, 2.0, 3.0);
+
+        assert_eq!(v.set_a(9.0), Float4::new(9.0, 1.0, 2.0, 3.0));
+        assert_eq!(v.set_b(9.0), Float4::new(0.0, 9.0, 2.0, 3.0));
+        assert_eq!(v.set_c(9.0), Float4::new(0.0, 1.0, 9.0, 3.0));
+        assert_eq!(v.set_d(9.0), Float4::new(0.0, 1.0, 2.0, 9.0));
+    }
+
    #[test]
    fn shuffle() {
        let v = Float4::new(0.0, 1.0, 2.0, 3.0);
@ -434,10 +461,291 @@ mod tests {
    }

    #[test]
-    fn bitmask() {
-        let v1 = Float4::new(0.0, 1.0, 2.0, 3.0);
-        let v2 = Float4::new(9.0, 1.0, 9.0, 3.0);
+    fn abs() {
+        let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
+        let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);

-        assert_eq!(v1.cmpeq(v2).bitmask(), 0b1010);
+        let r = Float4::new(1.0, 2.0, 3.0, 4.0);
+
+        assert_eq!(v1.abs(), r);
+        assert_eq!(v2.abs(), r);
+    }
+
+    #[test]
+    fn neg() {
+        let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
+        let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
+
+        assert_eq!(-v1, v2);
+        assert_eq!(-v2, v1);
+    }
+
+    #[test]
+    fn cmp_ops() {
+        let a = Float4::new(1.0, 2.0, -2.0, 0.0);
+        let b = Float4::new(1.0, -2.0, 2.0, -0.0);
+
+        assert_eq!(a.cmplt(b), Bool4::new(false, false, true, false));
+        assert_eq!(a.cmplte(b), Bool4::new(true, false, true, true));
+        assert_eq!(a.cmpgt(b), Bool4::new(false, true, false, false));
+        assert_eq!(a.cmpgte(b), Bool4::new(true, true, false, true));
+        assert_eq!(a.cmpeq(b), Bool4::new(true, false, false, true));
+    }
+
+    #[test]
+    fn min_max() {
+        let a = Float4::new(1.0, 2.0, -2.0, 4.0);
+        let b = Float4::new(1.0, -2.0, 2.0, 5.0);
+
+        assert_eq!(a.min(b), Float4::new(1.0, -2.0, -2.0, 4.0));
+        assert_eq!(a.max(b), Float4::new(1.0, 2.0, 2.0, 5.0));
+
+        let c = Float4::new(std::f32::INFINITY, 2.0, std::f32::NAN, 4.0);
+        let d = Float4::new(1.0, -std::f32::INFINITY, 2.0, std::f32::NAN);
+        let r_min = c.min(d);
+        let r_max = c.max(d);
+
+        assert_eq!(r_min.a(), 1.0);
+        assert_eq!(r_min.b(), -std::f32::INFINITY);
+        assert_eq!(r_min.c(), 2.0);
+        assert!(r_min.d().is_nan());
+
+        assert_eq!(r_max.a(), std::f32::INFINITY);
+        assert_eq!(r_max.b(), 2.0);
+        assert_eq!(r_max.c(), 2.0);
+        assert!(r_max.d().is_nan());
+    }
+
+    #[test]
+    fn dot_3() {
+        let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
+        let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
+
+        assert_eq!(Float4::dot_3(v1, v2), -24.0);
+        assert_eq!(Float4::dot_3_fast(v1, v2), -24.0);
+    }
+
+    #[test]
+    fn cross_3() {
+        let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
+        let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
+
+        let r = Float4::new(-3.0, -18.0, -13.0, 0.0);
+
+        assert_eq!(Float4::cross_3(v1, v2), r);
+        assert_eq!(Float4::cross_3(v2, v1), -r);
+        assert_eq!(Float4::cross_3_fast(v1, v2), r);
+        assert_eq!(Float4::cross_3_fast(v2, v1), -r);
+    }
+
+    #[test]
+    fn transpose_3x3() {
+        let m1 = [
+            Float4::new(1.0, 4.0, 7.0, 0.0),
+            Float4::new(2.0, 5.0, 8.0, 0.0),
+            Float4::new(3.0, 6.0, 9.0, 0.0),
+        ];
+        let m2 = [
+            Float4::new(1.0, 2.0, 3.0, 0.0),
+            Float4::new(4.0, 5.0, 6.0, 0.0),
+            Float4::new(7.0, 8.0, 9.0, 0.0),
+        ];
+
+        assert_eq!(Float4::transpose_3x3(&m1), m2);
+        assert_eq!(Float4::transpose_3x3(&m2), m1);
+    }
+
+    #[test]
+    fn invert_3x3() {
+        let m = [
+            Float4::new(1.0, 3.0, 9.0, 0.0),
+            Float4::new(2.0, 6.0, 2.0, 0.0),
+            Float4::new(2.0, 7.0, 11.0, 0.0),
+        ];
+        let inv_m = [
+            Float4::new(3.25, 1.875, -3.0, 0.0),
+            Float4::new(-1.125, -0.4375, 1.0, 0.0),
+            Float4::new(0.125, -0.0625, 0.0, 0.0),
+        ];
+
+        assert_eq!(Float4::invert_3x3(&m).unwrap(), inv_m);
+        assert_eq!(Float4::invert_3x3(&inv_m).unwrap(), m);
+        assert_eq!(Float4::invert_3x3_fast(&m).unwrap(), inv_m);
+        assert_eq!(Float4::invert_3x3_fast(&inv_m).unwrap(), m);
+    }
+
+    #[test]
+    fn vec_mul_3x3() {
+        let v = Float4::new(1.0, 2.5, 4.0, 0.0);
+        let m = [
+            Float4::new(1.0, 3.0, 9.0, 0.0),
+            Float4::new(2.0, 6.0, 2.0, 0.0),
+            Float4::new(2.0, 7.0, 11.0, 0.0),
+        ];
+
+        let r = Float4::new(14.0, 46.0, 58.0, 0.0);
+
+        assert_eq!(v.vec_mul_3x3(&m), r);
+        assert_eq!(v.vec_mul_3x3_fast(&m), r);
+    }
+
+    #[test]
+    fn vec_mul_affine() {
+        let p = Float4::new(1.0, 2.5, 4.0, 0.0);
+        let xform = (
+            [
+                Float4::new(1.0, 3.0, 9.0, 0.0),
+                Float4::new(2.0, 6.0, 2.0, 0.0),
+                Float4::new(2.0, 7.0, 11.0, 0.0),
+            ],
+            Float4::new(1.5, 8.0, 12.0, 0.0),
+        );
+
+        let r = Float4::new(15.5, 54.0, 70.0, 0.0);
+
+        assert_eq!(p.vec_mul_affine(&xform.0, xform.1), r);
+    }
+
+    #[test]
+    fn vec_mul_affine_rev() {
+        let p = Float4::new(15.5, 54.0, 70.0, 0.0);
+        let inv_m = [
+            Float4::new(3.25, 1.875, -3.0, 0.0),
+            Float4::new(-1.125, -0.4375, 1.0, 0.0),
+            Float4::new(0.125, -0.0625, 0.0, 0.0),
+        ];
+        let t = Float4::new(1.5, 8.0, 12.0, 0.0);
+
+        let r = Float4::new(1.0, 2.5, 4.0, 0.0);
+
+        assert_eq!(p.vec_mul_affine_rev(&inv_m, t), r);
+        assert_eq!(p.vec_mul_affine_rev_fast(&inv_m, t), r);
+    }
+
+    #[test]
+    fn affine_mul_affine() {
+        let a = (
+            [
+                Float4::new(1.0, 3.0, 9.0, 0.0),
+                Float4::new(2.0, 6.0, 2.0, 0.0),
+                Float4::new(2.0, 7.0, 11.0, 0.0),
+            ],
+            Float4::new(1.5, 8.0, 12.0, 0.0),
+        );
+        let b = (
+            [
+                Float4::new(1.0, 2.0, 3.0, 0.0),
+                Float4::new(5.0, 6.0, 7.0, 0.0),
+                Float4::new(9.0, 10.0, 11.0, 0.0),
+            ],
+            Float4::new(13.0, 14.0, 15.0, 0.0),
+        );
+
+        let r = (
+            [
+                Float4::new(97.0, 110.0, 123.0, 0.0),
+                Float4::new(50.0, 60.0, 70.0, 0.0),
+                Float4::new(136.0, 156.0, 176.0, 0.0),
+            ],
+            Float4::new(162.5, 185.0, 207.5, 0.0),
+        );
+
+        assert_eq!(Float4::affine_mul_affine(&a.0, a.1, &b.0, b.1), r);
+        assert_eq!(Float4::affine_mul_affine_fast(&a.0, a.1, &b.0, b.1), r);
+    }
+
+    //------------
+    // Bool4
+
+    #[test]
+    fn bitmask() {
+        assert_eq!(Bool4::new(true, false, false, false).bitmask(), 0b0001);
+        assert_eq!(Bool4::new(false, true, false, false).bitmask(), 0b0010);
+        assert_eq!(Bool4::new(false, false, true, false).bitmask(), 0b0100);
+        assert_eq!(Bool4::new(false, false, false, true).bitmask(), 0b1000);
+
+        assert_eq!(Bool4::new(false, true, false, true).bitmask(), 0b1010);
+        assert_eq!(Bool4::new(true, false, true, false).bitmask(), 0b0101);
+    }
+
+    #[test]
+    fn to_bools() {
+        assert_eq!(
+            Bool4::new(true, false, false, false).to_bools(),
+            [true, false, false, false]
+        );
+        assert_eq!(
+            Bool4::new(false, true, false, false).to_bools(),
+            [false, true, false, false]
+        );
+        assert_eq!(
+            Bool4::new(false, false, true, false).to_bools(),
+            [false, false, true, false]
+        );
+        assert_eq!(
+            Bool4::new(false, false, false, true).to_bools(),
+            [false, false, false, true]
+        );
+
+        assert_eq!(
+            Bool4::new(false, true, false, true).to_bools(),
+            [false, true, false, true]
+        );
+        assert_eq!(
+            Bool4::new(true, false, true, false).to_bools(),
+            [true, false, true, false]
+        );
+    }
+
+    #[test]
+    fn any() {
+        assert_eq!(Bool4::new(true, false, false, false).any(), true);
+        assert_eq!(Bool4::new(false, true, false, false).any(), true);
+        assert_eq!(Bool4::new(false, false, true, false).any(), true);
+        assert_eq!(Bool4::new(false, false, false, true).any(), true);
+
+        assert_eq!(Bool4::new(false, false, false, false).any(), false);
+    }
+
+    #[test]
+    fn all() {
+        assert_eq!(Bool4::new(false, true, true, true).all(), false);
+        assert_eq!(Bool4::new(true, false, true, true).all(), false);
+        assert_eq!(Bool4::new(true, true, false, true).all(), false);
+        assert_eq!(Bool4::new(true, true, true, false).all(), false);
+
+        assert_eq!(Bool4::new(true, true, true, true).all(), true);
+    }
+
+    #[test]
+    fn boolean_ops() {
+        let all = Bool4::new(true, true, true, true);
+        let none = Bool4::new(false, false, false, false);
+        let a = Bool4::new(true, false, true, false);
+        let b = Bool4::new(false, true, false, true);
+
+        // Not.
+        assert_eq!(!a, b);
+        assert_eq!(!b, a);
+        assert_eq!(!all, none);
+        assert_eq!(!none, all);
+
+        // And.
+        assert_eq!(a & b, none);
+        assert_eq!(all & none, none);
+        assert_eq!(all & all, all);
+        assert_eq!(none & none, none);
+
+        // Or.
+        assert_eq!(a | b, all);
+        assert_eq!(all | none, all);
+        assert_eq!(all | all, all);
+        assert_eq!(none | none, none);
+
+        // Xor.
+        assert_eq!(a ^ b, all);
+        assert_eq!(all ^ none, all);
+        assert_eq!(all ^ all, none);
+        assert_eq!(none ^ none, none);
    }
 }
--- a/sub_crates/rmath/src/wide4/sse.rs
+++ b/sub_crates/rmath/src/wide4/sse.rs
@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
 use std::arch::x86_64::{
    __m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
    _mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
-    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_ps, _mm_setzero_ps,
-    _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
+    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps,
+    _mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
 };

 use crate::FMulAdd;
@ -277,7 +277,10 @@ impl Neg for Float4 {

    #[inline(always)]
    fn neg(self) -> Self {
-        Self(unsafe { _mm_mul_ps(self.0, _mm_set1_ps(-1.0)) })
+        Self(unsafe {
+            let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(1 << 31));
+            _mm_xor_ps(self.0, abs_mask)
+        })
    }
 }

@ -291,11 +294,26 @@ impl FMulAdd for Float4 {
 //=============================================================
 // Bool4

-#[derive(Debug, Copy, Clone)]
+#[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct Bool4(__m128);

 impl Bool4 {
+    #[inline(always)]
+    pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
+        const ONES: i32 = unsafe { std::mem::transmute(0xffffffffu32) };
+
+        unsafe {
+            let ints = _mm_set_epi32(
+                d as i32 * ONES,
+                c as i32 * ONES,
+                b as i32 * ONES,
+                a as i32 * ONES,
+            );
+            Bool4(_mm_castsi128_ps(ints))
+        }
+    }
+
    #[inline(always)]
    pub fn new_false() -> Self {
        Self(unsafe { _mm_setzero_ps() })
--- a/sub_crates/rmath/src/xform.rs
+++ b/sub_crates/rmath/src/xform.rs
@ -3,14 +3,33 @@
 use std::ops::{Add, Mul};

 use crate::point::Point;
+use crate::sealed::Sealed;
 use crate::wide4::Float4;

-/// An affine transform.
+/// A forward affine transform.
+///
+/// Use this for working with transforms that still need to be
+/// manipulated or composed with other transforms, or for storing
+/// transforms more compactly.
+///
+/// Note: slightly counter-intuitively, even though this can perform
+/// forward (but not inverse) transforms on points and vectors, it is
+/// capable of *inverse* (but not forward) transforms on surface normals.
+/// This is because forward transforms on surface normals require the
+/// inverse transform matrix.
+///
+/// Convert to an [`XformFull`] for a larger-format type capable of
+/// efficiently performing both forward and inverse transforms on all
+/// types, but which is effectively "frozen" in terms of further
+/// manipulation of the transform itself.
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[repr(C)]
 pub struct Xform {
-    pub m: [Float4; 3], // Linear matrix.
-    pub t: Float4,      // Translation.
+    /// Rotation/scale/shear matrix.
+    pub m: [Float4; 3],
+
+    /// Translation.
+    pub t: Float4,
 }

 impl Xform {
@ -87,29 +106,26 @@ impl Xform {
        eq
    }

-    /// Computes a "full" version of the transform, which can do both
-    /// forward and inverse transforms.
+    /// Compute the "full" version of the transform.
    #[inline]
-    pub fn into_full(self) -> Option<XformFull> {
-        if let Some(m_inv) = Float4::invert_3x3(self.m) {
+    pub fn to_full(&self) -> Option<XformFull> {
+        if let Some(inv_m) = Float4::invert_3x3(&self.m) {
            Some(XformFull {
-                m: self.m,
-                m_inv: m_inv,
-                t: self.t,
+                fwd: *self,
+                inv_m: inv_m,
            })
        } else {
            None
        }
    }

-    /// Faster but less precise version of `compute_full()`.
+    /// Faster but less precise version of `to_full()`.
    #[inline]
-    pub fn into_full_fast(self) -> Option<XformFull> {
-        if let Some(m_inv) = Float4::invert_3x3_fast(self.m) {
+    pub fn to_full_fast(&self) -> Option<XformFull> {
+        if let Some(inv_m) = Float4::invert_3x3_fast(&self.m) {
            Some(XformFull {
-                m: self.m,
-                m_inv: m_inv,
-                t: self.t,
+                fwd: *self,
+                inv_m: inv_m,
            })
        } else {
            None
@ -172,36 +188,74 @@ impl Add for Xform {
    }
 }

+impl AsXform for Xform {
+    #[inline(always)]
+    fn as_xform(&self) -> &Xform {
+        self
+    }
+}
+
+impl Sealed for Xform {}
+
 //-------------------------------------------------------------

-/// An affine transform with precomputed data for performing reverse
-/// transforms, among other things.
+/// A combined forward/inverse affine transform.
+///
+/// Unlike [`Xform`], this can perform both forward and inverse
+/// transforms on all types.  However, it also takes up more space and
+/// is effectively "frozen" in terms of further manipulation.  Prefer
+/// [`Xform`] when manipulating or composing transforms, and also
+/// when storing transforms if space is a consideration.
+///
+/// Note: only the 3x3 part of the transform is stored inverted.  This
+/// is because it's both trivial and more numerically stable to reuse
+/// the forward translation vector to do inverse transforms, as
+/// `(point - fwd.t) * inv_m`.
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub struct XformFull {
-    pub m: [Float4; 3],     // Forward linear matrix.
-    pub m_inv: [Float4; 3], // Inverse linear matrix.
-    pub t: Float4,          // Forward translation.
+    /// Forward transform.
+    pub fwd: Xform,
+
+    /// Inverse rotation/scale/shear matrix.
+    pub inv_m: [Float4; 3],
 }

 impl XformFull {
    pub fn identity() -> Self {
        Self {
+            fwd: Xform {
                m: [
                    Float4::new(1.0, 0.0, 0.0, 0.0),
                    Float4::new(0.0, 1.0, 0.0, 0.0),
                    Float4::new(0.0, 0.0, 1.0, 0.0),
                ],
-            m_inv: [
+                t: Float4::splat(0.0),
+            },
+            inv_m: [
                Float4::new(1.0, 0.0, 0.0, 0.0),
                Float4::new(0.0, 1.0, 0.0, 0.0),
                Float4::new(0.0, 0.0, 1.0, 0.0),
            ],
-            t: Float4::splat(0.0),
        }
    }
 }

+impl AsXform for XformFull {
+    #[inline(always)]
+    fn as_xform(&self) -> &Xform {
+        &self.fwd
+    }
+}
+
+impl Sealed for XformFull {}
+
+//-------------------------------------------------------------
+
+pub trait AsXform: Sealed {
+    fn as_xform(&self) -> &Xform;
+}
+
 //-------------------------------------------------------------

 #[cfg(test)]