diff --git a/sub_crates/rmath/src/lib.rs b/sub_crates/rmath/src/lib.rs
index d097132..7d8cf7e 100644
--- a/sub_crates/rmath/src/lib.rs
+++ b/sub_crates/rmath/src/lib.rs
@@ -4,6 +4,7 @@
 
 mod normal;
 mod point;
+mod sealed;
 mod utils;
 mod vector;
 pub mod wide4;
@@ -11,7 +12,9 @@ mod xform;
 
 use std::ops::{Add, Mul, Neg, Sub};
 
-pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform::XformFull};
+pub use self::{
+    normal::Normal, point::Point, vector::Vector, xform::AsXform, xform::Xform, xform::XformFull,
+};
 
 /// Trait for calculating dot products.
 pub trait DotProduct {
@@ -111,3 +114,16 @@ where
     let delta = sum - a;
     (sum, (a - (sum - delta)) + (b - delta))
 }
+
+/// `a - b` but also returns a rounding error for precise composition
+/// with other operations.
+#[inline(always)]
+fn two_diff<T>(a: T, b: T) -> (T, T)
+// (diff, rounding_err)
+where
+    T: Copy + Add<Output = T> + Sub<Output = T>,
+{
+    let diff = a - b;
+    let delta = diff - a;
+    (diff, (a - (diff - delta)) - (b + delta))
+}
diff --git a/sub_crates/rmath/src/normal.rs b/sub_crates/rmath/src/normal.rs
index 64160a8..bd0c002 100644
--- a/sub_crates/rmath/src/normal.rs
+++ b/sub_crates/rmath/src/normal.rs
@@ -4,7 +4,7 @@ use std::cmp::PartialEq;
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};
 use crate::Vector;
 use crate::{CrossProduct, DotProduct};
 
@@ -87,20 +87,37 @@ impl Normal {
     //-------------
     // Transforms.
 
+    /// Forward-transform the normal.
+    #[inline(always)]
     pub fn xform(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m_inv)))
+        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(&xform.inv_m)))
     }
 
-    pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m)))
+    /// Inverse-transform the normal.
+    #[inline(always)]
+    pub fn xform_inv<T: AsXform>(self, xform: &T) -> Self {
+        Self(
+            self.0
+                .vec_mul_3x3(&Float4::transpose_3x3(&xform.as_xform().m)),
+        )
     }
 
+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
     pub fn xform_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m_inv)))
+        Self(
+            self.0
+                .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.inv_m)),
+        )
     }
 
-    pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m)))
+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
+    pub fn xform_inv_fast<T: AsXform>(self, xform: &T) -> Self {
+        Self(
+            self.0
+                .vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.as_xform().m)),
+        )
     }
 }
 
@@ -218,7 +235,7 @@ mod tests {
     fn xform() {
         let n = Normal::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
 
         assert_eq!(n.xform(&m), Normal::new(-4.0625, 1.78125, -0.03125));
@@ -229,7 +246,7 @@ mod tests {
     fn xform_fast() {
         let n = Normal::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
 
         assert_eq!(n.xform_fast(&m), Normal::new(-4.0625, 1.78125, -0.03125));
diff --git a/sub_crates/rmath/src/point.rs b/sub_crates/rmath/src/point.rs
index f260d74..1b7e4f8 100644
--- a/sub_crates/rmath/src/point.rs
+++ b/sub_crates/rmath/src/point.rs
@@ -4,7 +4,7 @@ use std::ops::{Add, Sub};
 
 use crate::vector::Vector;
 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};
 
 /// A position in 3D space.
 #[derive(Debug, Copy, Clone)]
@@ -78,20 +78,30 @@ impl Point {
     //-------------
     // Transforms.
 
-    pub fn xform(self, xform: &XformFull) -> Self {
+    /// Forward-transform the point.
+    #[inline(always)]
+    pub fn xform<T: AsXform>(self, xform: &T) -> Self {
+        let xform = xform.as_xform();
         Self(self.0.vec_mul_affine(&xform.m, xform.t))
     }
 
+    /// Inverse-transform the point.
+    #[inline(always)]
     pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_affine_rev(&xform.m_inv, -xform.t))
+        Self(self.0.vec_mul_affine_rev(&xform.inv_m, xform.fwd.t))
     }
 
-    pub fn xform_fast(self, xform: &XformFull) -> Self {
+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
+    pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
+        let xform = xform.as_xform();
         Self(self.0.vec_mul_affine_fast(&xform.m, xform.t))
     }
 
+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
     pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_affine_rev_fast(&xform.m_inv, -xform.t))
+        Self(self.0.vec_mul_affine_rev_fast(&xform.inv_m, xform.fwd.t))
     }
 }
 
@@ -158,7 +168,7 @@ mod tests {
     fn xform() {
         let p = Point::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
         assert_eq!(p.xform(&m), Point::new(15.5, 54.0, 70.0));
         assert_eq!(p.xform(&m).xform_inv(&m), p);
@@ -168,7 +178,7 @@ mod tests {
     fn xform_fast() {
         let p = Point::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
         assert_eq!(p.xform_fast(&m), Point::new(15.5, 54.0, 70.0));
         assert_eq!(p.xform_fast(&m).xform_inv_fast(&m), p);
diff --git a/sub_crates/rmath/src/sealed.rs b/sub_crates/rmath/src/sealed.rs
new file mode 100644
index 0000000..8ee7071
--- /dev/null
+++ b/sub_crates/rmath/src/sealed.rs
@@ -0,0 +1,5 @@
+/// For sealing other traits.
+///
+/// Even though this is marked as public, this module isn't, and
+/// therefore this trait is not available outside the crate.
+pub trait Sealed {}
diff --git a/sub_crates/rmath/src/vector.rs b/sub_crates/rmath/src/vector.rs
index 21491a2..87cc1ff 100644
--- a/sub_crates/rmath/src/vector.rs
+++ b/sub_crates/rmath/src/vector.rs
@@ -6,7 +6,7 @@ use std::ops::{Add, Div, Mul, Neg, Sub};
 use crate::normal::Normal;
 use crate::point::Point;
 use crate::wide4::Float4;
-use crate::xform::XformFull;
+use crate::xform::{AsXform, XformFull};
 use crate::{CrossProduct, DotProduct};
 
 /// A direction vector in 3D space.
@@ -98,20 +98,28 @@ impl Vector {
     //-------------
     // Transforms.
 
-    pub fn xform(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&xform.m))
+    /// Forward-transform the vector.
+    #[inline(always)]
+    pub fn xform<T: AsXform>(self, xform: &T) -> Self {
+        Self(self.0.vec_mul_3x3(&xform.as_xform().m))
     }
 
+    /// Inverse-transform the vector.
+    #[inline(always)]
     pub fn xform_inv(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3(&xform.m_inv))
+        Self(self.0.vec_mul_3x3(&xform.inv_m))
     }
 
-    pub fn xform_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&xform.m))
+    /// Faster but less precise version of `xform()`.
+    #[inline(always)]
+    pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
+        Self(self.0.vec_mul_3x3_fast(&xform.as_xform().m))
     }
 
+    /// Faster but less precise version of `xform_inv()`.
+    #[inline(always)]
     pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
-        Self(self.0.vec_mul_3x3_fast(&xform.m_inv))
+        Self(self.0.vec_mul_3x3_fast(&xform.inv_m))
     }
 }
 
@@ -229,7 +237,7 @@ mod tests {
     fn xform() {
         let v = Vector::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
 
         assert_eq!(v.xform(&m), Vector::new(14.0, 46.0, 58.0));
@@ -240,7 +248,7 @@ mod tests {
     fn xform_fast() {
         let v = Vector::new(1.0, 2.5, 4.0);
         let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
-            .into_full()
+            .to_full()
             .unwrap();
 
         assert_eq!(v.xform_fast(&m), Vector::new(14.0, 46.0, 58.0));
diff --git a/sub_crates/rmath/src/wide4/fallback.rs b/sub_crates/rmath/src/wide4/fallback.rs
index 076bcf5..546a4af 100644
--- a/sub_crates/rmath/src/wide4/fallback.rs
+++ b/sub_crates/rmath/src/wide4/fallback.rs
@@ -38,22 +38,40 @@ impl Float4 {
     /// Vertical minimum.
     #[inline(always)]
     pub fn min(self, a: Self) -> Self {
+        // Custom min to match behavior of SSE.
+        #[inline(always)]
+        pub fn minf(a: f32, b: f32) -> f32 {
+            if a < b {
+                a
+            } else {
+                b
+            }
+        }
         Self([
-            self.0[0].min(a.0[0]),
-            self.0[1].min(a.0[1]),
-            self.0[2].min(a.0[2]),
-            self.0[3].min(a.0[3]),
+            minf(self.0[0], a.0[0]),
+            minf(self.0[1], a.0[1]),
+            minf(self.0[2], a.0[2]),
+            minf(self.0[3], a.0[3]),
         ])
     }
 
     /// Vertical maximum.
     #[inline(always)]
     pub fn max(self, a: Self) -> Self {
+        // Custom max to match behavior of SSE.
+        #[inline(always)]
+        pub fn maxf(a: f32, b: f32) -> f32 {
+            if a > b {
+                a
+            } else {
+                b
+            }
+        }
         Self([
-            self.0[0].max(a.0[0]),
-            self.0[1].max(a.0[1]),
-            self.0[2].max(a.0[2]),
-            self.0[3].max(a.0[3]),
+            maxf(self.0[0], a.0[0]),
+            maxf(self.0[1], a.0[1]),
+            maxf(self.0[2], a.0[2]),
+            maxf(self.0[3], a.0[3]),
         ])
     }
 
@@ -352,11 +370,16 @@ impl FMulAdd for Float4 {
 //=============================================================
 // Bool4
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct Bool4([bool; 4]);
 
 impl Bool4 {
+    #[inline(always)]
+    pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
+        Bool4([a, b, c, d])
+    }
+
     #[inline(always)]
     pub fn new_false() -> Self {
         Self([false, false, false, false])
diff --git a/sub_crates/rmath/src/wide4/mod.rs b/sub_crates/rmath/src/wide4/mod.rs
index a0ebe35..9e4d72c 100644
--- a/sub_crates/rmath/src/wide4/mod.rs
+++ b/sub_crates/rmath/src/wide4/mod.rs
@@ -1,15 +1,14 @@
 use std::{
-    cmp::PartialEq,
+    cmp::{Eq, PartialEq},
     ops::{AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, SubAssign},
 };
 
 use crate::utils::ulps_eq;
-use crate::{difference_of_products, two_prod, two_sum};
+use crate::{difference_of_products, two_diff, two_prod, two_sum};
 
 //-------------------------------------------------------------
 // Which implementation to use.
 
-#[cfg(not(any(target_arch = "x86_64")))]
 mod fallback;
 #[cfg(not(any(target_arch = "x86_64")))]
 pub use fallback::{Bool4, Float4};
@@ -43,9 +42,7 @@ impl Float4 {
         s2 + err2
     }
 
-    /// 3D dot product (only uses the first 3 components).
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `dot_3()`.
     #[inline(always)]
     pub fn dot_3_fast(a: Self, b: Self) -> f32 {
         let c = a * b;
@@ -58,16 +55,14 @@ impl Float4 {
         difference_of_products(a.bcad(), b.cabd(), a.cabd(), b.bcad())
     }
 
-    /// 3D cross product (only uses the first 3 components).
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version `cross_3()`.
     #[inline(always)]
     pub fn cross_3_fast(a: Self, b: Self) -> Self {
         (a.bcad() * b.cabd()) - (a.cabd() * b.bcad())
     }
 
     #[inline(always)]
-    pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
+    pub fn transpose_3x3(m: &[Self; 3]) -> [Self; 3] {
         [
             // The fourth component in each row below is arbitrary,
             // but in this case chosen so that it matches the
@@ -82,7 +77,7 @@ impl Float4 {
     ///
     /// Returns `None` if not invertible.
     #[inline]
-    pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> {
+    pub fn invert_3x3(m: &[Self; 3]) -> Option<[Self; 3]> {
         let m0_bca = m[0].bcad();
         let m1_bca = m[1].bcad();
         let m2_bca = m[2].bcad();
@@ -101,15 +96,13 @@ impl Float4 {
         if det == 0.0 {
             None
         } else {
-            Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
+            Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
         }
     }
 
-    /// Invert a 3x3 matrix.  Faster but less precise version.
-    ///
-    /// Returns `None` if not invertible.
+    /// Faster but less precise version of `invert_3x3()`.
     #[inline]
-    pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> {
+    pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {
         let m0_bca = m[0].bcad();
         let m1_bca = m[1].bcad();
         let m2_bca = m[2].bcad();
@@ -128,7 +121,7 @@ impl Float4 {
         if det == 0.0 {
             None
         } else {
-            Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
+            Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
         }
     }
 
@@ -154,9 +147,7 @@ impl Float4 {
         s2 + err2
     }
 
-    /// Multiplies a 3D vector with a 3x3 matrix.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_3x3()`.
     #[inline]
     pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self {
         let x = self.aaaa();
@@ -193,9 +184,7 @@ impl Float4 {
         s3 + err3
     }
 
-    /// Transforms a 3d point by an affine transform.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_affine()`.
     #[inline]
     pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self {
         let x = self.aaaa();
@@ -205,25 +194,26 @@ impl Float4 {
         (x * m[0]) + (y * m[1]) + (z * m[2]) + t
     }
 
-    /// Transforms a 3d point by an affine transform, except it applies
-    /// the translation part before the 3x3 part.
+    /// Transforms a 3d point by an affine transform, except it does
+    /// `(vec - t) * inv_m` instead of `vec * m + t`.
     ///
-    /// This is primarily useful for performing efficient inverse transforms by
-    /// passing an inverted 3x3 part and a negated translation part.
+    /// This is useful for performing efficient inverse transforms while
+    /// only having to invert the 3x3 part of the transform itself.
     ///
-    /// `m` is the 3x3 part of the affine transform, `t` is the translation part.
+    /// `inv_m` is the inverse 3x3 part of the affine transform, `t` is
+    /// the forward translation part.
     #[inline]
-    pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self {
-        let (v, v_err) = two_sum(self, t);
+    pub fn vec_mul_affine_rev(self, inv_m: &[Self; 3], t: Self) -> Self {
+        let (v, v_err) = two_diff(self, t);
 
         let (x, x_err) = (v.aaaa(), v_err.aaaa());
         let (y, y_err) = (v.bbbb(), v_err.bbbb());
         let (z, z_err) = (v.cccc(), v_err.cccc());
 
         // Products.
-        let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]);
-        let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]);
-        let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]);
+        let ((a, a_err1), a_err2) = (two_prod(x, inv_m[0]), x_err * inv_m[0]);
+        let ((b, b_err1), b_err2) = (two_prod(y, inv_m[1]), y_err * inv_m[1]);
+        let ((c, c_err1), c_err2) = (two_prod(z, inv_m[2]), z_err * inv_m[2]);
         let a_err = a_err1 + a_err2;
         let b_err = b_err1 + b_err2;
         let c_err = c_err1 + c_err2;
@@ -238,19 +228,16 @@ impl Float4 {
         s2 + err2
     }
 
-    /// Transforms a 3d point by an affine transform, except it applies
-    /// the translation part before the 3x3 part.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `vec_mul_affine_rev()`.
     #[inline]
-    pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self {
-        let v = self + t;
+    pub fn vec_mul_affine_rev_fast(self, inv_m: &[Self; 3], t: Self) -> Self {
+        let v = self - t;
 
         let x = v.aaaa();
         let y = v.bbbb();
         let z = v.cccc();
 
-        (x * m[0]) + (y * m[1]) + (z * m[2])
+        (x * inv_m[0]) + (y * inv_m[1]) + (z * inv_m[2])
     }
 
     /// Returns whether the `Float4`s are approximately equal to each
@@ -290,9 +277,7 @@ impl Float4 {
         )
     }
 
-    /// Transforms one affine transform by another.
-    ///
-    /// Faster but less precise version.
+    /// Faster but less precise version of `affine_mul_affine()`.
     #[inline]
     pub fn affine_mul_affine_fast(
         m1: &[Self; 3],
@@ -389,12 +374,34 @@ impl BitXorAssign for Bool4 {
     }
 }
 
+impl PartialEq for Bool4 {
+    #[inline(always)]
+    fn eq(&self, rhs: &Self) -> bool {
+        self.bitmask() == rhs.bitmask()
+    }
+}
+
+impl Eq for Bool4 {}
+
+impl std::fmt::Debug for Bool4 {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
+        f.write_str("Bool4(")?;
+        f.debug_list().entries(self.to_bools().iter()).finish()?;
+        f.write_str(")")?;
+
+        Ok(())
+    }
+}
+
 //-------------------------------------------------------------
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    //------------
+    // Float4
+
     #[test]
     fn approximate_equality_test() {
         let a = Float4::new(1.0, 2.0, 3.0, 4.0);
@@ -420,6 +427,26 @@ mod tests {
         assert_eq!(v[3], 3.0);
     }
 
+    #[test]
+    fn get() {
+        let v = Float4::new(0.0, 1.0, 2.0, 3.0);
+
+        assert_eq!(v.a(), 0.0);
+        assert_eq!(v.b(), 1.0);
+        assert_eq!(v.c(), 2.0);
+        assert_eq!(v.d(), 3.0);
+    }
+
+    #[test]
+    fn set() {
+        let v = Float4::new(0.0, 1.0, 2.0, 3.0);
+
+        assert_eq!(v.set_a(9.0), Float4::new(9.0, 1.0, 2.0, 3.0));
+        assert_eq!(v.set_b(9.0), Float4::new(0.0, 9.0, 2.0, 3.0));
+        assert_eq!(v.set_c(9.0), Float4::new(0.0, 1.0, 9.0, 3.0));
+        assert_eq!(v.set_d(9.0), Float4::new(0.0, 1.0, 2.0, 9.0));
+    }
+
     #[test]
     fn shuffle() {
         let v = Float4::new(0.0, 1.0, 2.0, 3.0);
@@ -434,10 +461,291 @@ mod tests {
     }
 
     #[test]
-    fn bitmask() {
-        let v1 = Float4::new(0.0, 1.0, 2.0, 3.0);
-        let v2 = Float4::new(9.0, 1.0, 9.0, 3.0);
+    fn abs() {
+        let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
+        let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
 
-        assert_eq!(v1.cmpeq(v2).bitmask(), 0b1010);
+        let r = Float4::new(1.0, 2.0, 3.0, 4.0);
+
+        assert_eq!(v1.abs(), r);
+        assert_eq!(v2.abs(), r);
+    }
+
+    #[test]
+    fn neg() {
+        let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
+        let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
+
+        assert_eq!(-v1, v2);
+        assert_eq!(-v2, v1);
+    }
+
+    #[test]
+    fn cmp_ops() {
+        let a = Float4::new(1.0, 2.0, -2.0, 0.0);
+        let b = Float4::new(1.0, -2.0, 2.0, -0.0);
+
+        assert_eq!(a.cmplt(b), Bool4::new(false, false, true, false));
+        assert_eq!(a.cmplte(b), Bool4::new(true, false, true, true));
+        assert_eq!(a.cmpgt(b), Bool4::new(false, true, false, false));
+        assert_eq!(a.cmpgte(b), Bool4::new(true, true, false, true));
+        assert_eq!(a.cmpeq(b), Bool4::new(true, false, false, true));
+    }
+
+    #[test]
+    fn min_max() {
+        let a = Float4::new(1.0, 2.0, -2.0, 4.0);
+        let b = Float4::new(1.0, -2.0, 2.0, 5.0);
+
+        assert_eq!(a.min(b), Float4::new(1.0, -2.0, -2.0, 4.0));
+        assert_eq!(a.max(b), Float4::new(1.0, 2.0, 2.0, 5.0));
+
+        let c = Float4::new(std::f32::INFINITY, 2.0, std::f32::NAN, 4.0);
+        let d = Float4::new(1.0, -std::f32::INFINITY, 2.0, std::f32::NAN);
+        let r_min = c.min(d);
+        let r_max = c.max(d);
+
+        assert_eq!(r_min.a(), 1.0);
+        assert_eq!(r_min.b(), -std::f32::INFINITY);
+        assert_eq!(r_min.c(), 2.0);
+        assert!(r_min.d().is_nan());
+
+        assert_eq!(r_max.a(), std::f32::INFINITY);
+        assert_eq!(r_max.b(), 2.0);
+        assert_eq!(r_max.c(), 2.0);
+        assert!(r_max.d().is_nan());
+    }
+
+    #[test]
+    fn dot_3() {
+        let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
+        let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
+
+        assert_eq!(Float4::dot_3(v1, v2), -24.0);
+        assert_eq!(Float4::dot_3_fast(v1, v2), -24.0);
+    }
+
+    #[test]
+    fn cross_3() {
+        let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
+        let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
+
+        let r = Float4::new(-3.0, -18.0, -13.0, 0.0);
+
+        assert_eq!(Float4::cross_3(v1, v2), r);
+        assert_eq!(Float4::cross_3(v2, v1), -r);
+        assert_eq!(Float4::cross_3_fast(v1, v2), r);
+        assert_eq!(Float4::cross_3_fast(v2, v1), -r);
+    }
+
+    #[test]
+    fn transpose_3x3() {
+        let m1 = [
+            Float4::new(1.0, 4.0, 7.0, 0.0),
+            Float4::new(2.0, 5.0, 8.0, 0.0),
+            Float4::new(3.0, 6.0, 9.0, 0.0),
+        ];
+        let m2 = [
+            Float4::new(1.0, 2.0, 3.0, 0.0),
+            Float4::new(4.0, 5.0, 6.0, 0.0),
+            Float4::new(7.0, 8.0, 9.0, 0.0),
+        ];
+
+        assert_eq!(Float4::transpose_3x3(&m1), m2);
+        assert_eq!(Float4::transpose_3x3(&m2), m1);
+    }
+
+    #[test]
+    fn invert_3x3() {
+        let m = [
+            Float4::new(1.0, 3.0, 9.0, 0.0),
+            Float4::new(2.0, 6.0, 2.0, 0.0),
+            Float4::new(2.0, 7.0, 11.0, 0.0),
+        ];
+        let inv_m = [
+            Float4::new(3.25, 1.875, -3.0, 0.0),
+            Float4::new(-1.125, -0.4375, 1.0, 0.0),
+            Float4::new(0.125, -0.0625, 0.0, 0.0),
+        ];
+
+        assert_eq!(Float4::invert_3x3(&m).unwrap(), inv_m);
+        assert_eq!(Float4::invert_3x3(&inv_m).unwrap(), m);
+        assert_eq!(Float4::invert_3x3_fast(&m).unwrap(), inv_m);
+        assert_eq!(Float4::invert_3x3_fast(&inv_m).unwrap(), m);
+    }
+
+    #[test]
+    fn vec_mul_3x3() {
+        let v = Float4::new(1.0, 2.5, 4.0, 0.0);
+        let m = [
+            Float4::new(1.0, 3.0, 9.0, 0.0),
+            Float4::new(2.0, 6.0, 2.0, 0.0),
+            Float4::new(2.0, 7.0, 11.0, 0.0),
+        ];
+
+        let r = Float4::new(14.0, 46.0, 58.0, 0.0);
+
+        assert_eq!(v.vec_mul_3x3(&m), r);
+        assert_eq!(v.vec_mul_3x3_fast(&m), r);
+    }
+
+    #[test]
+    fn vec_mul_affine() {
+        let p = Float4::new(1.0, 2.5, 4.0, 0.0);
+        let xform = (
+            [
+                Float4::new(1.0, 3.0, 9.0, 0.0),
+                Float4::new(2.0, 6.0, 2.0, 0.0),
+                Float4::new(2.0, 7.0, 11.0, 0.0),
+            ],
+            Float4::new(1.5, 8.0, 12.0, 0.0),
+        );
+
+        let r = Float4::new(15.5, 54.0, 70.0, 0.0);
+
+        assert_eq!(p.vec_mul_affine(&xform.0, xform.1), r);
+    }
+
+    #[test]
+    fn vec_mul_affine_rev() {
+        let p = Float4::new(15.5, 54.0, 70.0, 0.0);
+        let inv_m = [
+            Float4::new(3.25, 1.875, -3.0, 0.0),
+            Float4::new(-1.125, -0.4375, 1.0, 0.0),
+            Float4::new(0.125, -0.0625, 0.0, 0.0),
+        ];
+        let t = Float4::new(1.5, 8.0, 12.0, 0.0);
+
+        let r = Float4::new(1.0, 2.5, 4.0, 0.0);
+
+        assert_eq!(p.vec_mul_affine_rev(&inv_m, t), r);
+        assert_eq!(p.vec_mul_affine_rev_fast(&inv_m, t), r);
+    }
+
+    #[test]
+    fn affine_mul_affine() {
+        let a = (
+            [
+                Float4::new(1.0, 3.0, 9.0, 0.0),
+                Float4::new(2.0, 6.0, 2.0, 0.0),
+                Float4::new(2.0, 7.0, 11.0, 0.0),
+            ],
+            Float4::new(1.5, 8.0, 12.0, 0.0),
+        );
+        let b = (
+            [
+                Float4::new(1.0, 2.0, 3.0, 0.0),
+                Float4::new(5.0, 6.0, 7.0, 0.0),
+                Float4::new(9.0, 10.0, 11.0, 0.0),
+            ],
+            Float4::new(13.0, 14.0, 15.0, 0.0),
+        );
+
+        let r = (
+            [
+                Float4::new(97.0, 110.0, 123.0, 0.0),
+                Float4::new(50.0, 60.0, 70.0, 0.0),
+                Float4::new(136.0, 156.0, 176.0, 0.0),
+            ],
+            Float4::new(162.5, 185.0, 207.5, 0.0),
+        );
+
+        assert_eq!(Float4::affine_mul_affine(&a.0, a.1, &b.0, b.1), r);
+        assert_eq!(Float4::affine_mul_affine_fast(&a.0, a.1, &b.0, b.1), r);
+    }
+
+    //------------
+    // Bool4
+
+    #[test]
+    fn bitmask() {
+        assert_eq!(Bool4::new(true, false, false, false).bitmask(), 0b0001);
+        assert_eq!(Bool4::new(false, true, false, false).bitmask(), 0b0010);
+        assert_eq!(Bool4::new(false, false, true, false).bitmask(), 0b0100);
+        assert_eq!(Bool4::new(false, false, false, true).bitmask(), 0b1000);
+
+        assert_eq!(Bool4::new(false, true, false, true).bitmask(), 0b1010);
+        assert_eq!(Bool4::new(true, false, true, false).bitmask(), 0b0101);
+    }
+
+    #[test]
+    fn to_bools() {
+        assert_eq!(
+            Bool4::new(true, false, false, false).to_bools(),
+            [true, false, false, false]
+        );
+        assert_eq!(
+            Bool4::new(false, true, false, false).to_bools(),
+            [false, true, false, false]
+        );
+        assert_eq!(
+            Bool4::new(false, false, true, false).to_bools(),
+            [false, false, true, false]
+        );
+        assert_eq!(
+            Bool4::new(false, false, false, true).to_bools(),
+            [false, false, false, true]
+        );
+
+        assert_eq!(
+            Bool4::new(false, true, false, true).to_bools(),
+            [false, true, false, true]
+        );
+        assert_eq!(
+            Bool4::new(true, false, true, false).to_bools(),
+            [true, false, true, false]
+        );
+    }
+
+    #[test]
+    fn any() {
+        assert_eq!(Bool4::new(true, false, false, false).any(), true);
+        assert_eq!(Bool4::new(false, true, false, false).any(), true);
+        assert_eq!(Bool4::new(false, false, true, false).any(), true);
+        assert_eq!(Bool4::new(false, false, false, true).any(), true);
+
+        assert_eq!(Bool4::new(false, false, false, false).any(), false);
+    }
+
+    #[test]
+    fn all() {
+        assert_eq!(Bool4::new(false, true, true, true).all(), false);
+        assert_eq!(Bool4::new(true, false, true, true).all(), false);
+        assert_eq!(Bool4::new(true, true, false, true).all(), false);
+        assert_eq!(Bool4::new(true, true, true, false).all(), false);
+
+        assert_eq!(Bool4::new(true, true, true, true).all(), true);
+    }
+
+    #[test]
+    fn boolean_ops() {
+        let all = Bool4::new(true, true, true, true);
+        let none = Bool4::new(false, false, false, false);
+        let a = Bool4::new(true, false, true, false);
+        let b = Bool4::new(false, true, false, true);
+
+        // Not.
+        assert_eq!(!a, b);
+        assert_eq!(!b, a);
+        assert_eq!(!all, none);
+        assert_eq!(!none, all);
+
+        // And.
+        assert_eq!(a & b, none);
+        assert_eq!(all & none, none);
+        assert_eq!(all & all, all);
+        assert_eq!(none & none, none);
+
+        // Or.
+        assert_eq!(a | b, all);
+        assert_eq!(all | none, all);
+        assert_eq!(all | all, all);
+        assert_eq!(none | none, none);
+
+        // Xor.
+        assert_eq!(a ^ b, all);
+        assert_eq!(all ^ none, all);
+        assert_eq!(all ^ all, none);
+        assert_eq!(none ^ none, none);
     }
 }
diff --git a/sub_crates/rmath/src/wide4/sse.rs b/sub_crates/rmath/src/wide4/sse.rs
index f7eaee5..6f45e27 100644
--- a/sub_crates/rmath/src/wide4/sse.rs
+++ b/sub_crates/rmath/src/wide4/sse.rs
@@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
 use std::arch::x86_64::{
     __m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
     _mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
-    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_ps, _mm_setzero_ps,
-    _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
+    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps,
+    _mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
 };
 
 use crate::FMulAdd;
@@ -277,7 +277,10 @@ impl Neg for Float4 {
 
     #[inline(always)]
     fn neg(self) -> Self {
-        Self(unsafe { _mm_mul_ps(self.0, _mm_set1_ps(-1.0)) })
+        Self(unsafe {
+            let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(1 << 31));
+            _mm_xor_ps(self.0, abs_mask)
+        })
     }
 }
 
@@ -291,11 +294,26 @@ impl FMulAdd for Float4 {
 //=============================================================
 // Bool4
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Copy, Clone)]
 #[repr(transparent)]
 pub struct Bool4(__m128);
 
 impl Bool4 {
+    #[inline(always)]
+    pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
+        const ONES: i32 = unsafe { std::mem::transmute(0xffffffffu32) };
+
+        unsafe {
+            let ints = _mm_set_epi32(
+                d as i32 * ONES,
+                c as i32 * ONES,
+                b as i32 * ONES,
+                a as i32 * ONES,
+            );
+            Bool4(_mm_castsi128_ps(ints))
+        }
+    }
+
     #[inline(always)]
     pub fn new_false() -> Self {
         Self(unsafe { _mm_setzero_ps() })
diff --git a/sub_crates/rmath/src/xform.rs b/sub_crates/rmath/src/xform.rs
index a6479c1..8b274c4 100644
--- a/sub_crates/rmath/src/xform.rs
+++ b/sub_crates/rmath/src/xform.rs
@@ -3,14 +3,33 @@
 use std::ops::{Add, Mul};
 
 use crate::point::Point;
+use crate::sealed::Sealed;
 use crate::wide4::Float4;
 
-/// An affine transform.
+/// A forward affine transform.
+///
+/// Use this for working with transforms that still need to be
+/// manipulated or composed with other transforms, or for storing
+/// transforms more compactly.
+///
+/// Note: slightly counter-intuitively, even though this can perform
+/// forward (but not inverse) transforms on points and vectors, it is
+/// capable of *inverse* (but not forward) transforms on surface normals.
+/// This is because forward transforms on surface normals require the
+/// inverse transform matrix.
+///
+/// Convert to an [`XformFull`] for a larger-format type capable of
+/// efficiently performing both forward and inverse transforms on all
+/// types, but which is effectively "frozen" in terms of further
+/// manipulation of the transform itself.
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[repr(C)]
 pub struct Xform {
-    pub m: [Float4; 3], // Linear matrix.
-    pub t: Float4,      // Translation.
+    /// Rotation/scale/shear matrix.
+    pub m: [Float4; 3],
+
+    /// Translation.
+    pub t: Float4,
 }
 
 impl Xform {
@@ -87,29 +106,26 @@ impl Xform {
         eq
     }
 
-    /// Computes a "full" version of the transform, which can do both
-    /// forward and inverse transforms.
+    /// Compute the "full" version of the transform.
     #[inline]
-    pub fn into_full(self) -> Option<XformFull> {
-        if let Some(m_inv) = Float4::invert_3x3(self.m) {
+    pub fn to_full(&self) -> Option<XformFull> {
+        if let Some(inv_m) = Float4::invert_3x3(&self.m) {
             Some(XformFull {
-                m: self.m,
-                m_inv: m_inv,
-                t: self.t,
+                fwd: *self,
+                inv_m: inv_m,
             })
         } else {
             None
         }
     }
 
-    /// Faster but less precise version of `compute_full()`.
+    /// Faster but less precise version of `to_full()`.
     #[inline]
-    pub fn into_full_fast(self) -> Option<XformFull> {
-        if let Some(m_inv) = Float4::invert_3x3_fast(self.m) {
+    pub fn to_full_fast(&self) -> Option<XformFull> {
+        if let Some(inv_m) = Float4::invert_3x3_fast(&self.m) {
             Some(XformFull {
-                m: self.m,
-                m_inv: m_inv,
-                t: self.t,
+                fwd: *self,
+                inv_m: inv_m,
             })
         } else {
             None
@@ -172,36 +188,74 @@ impl Add for Xform {
     }
 }
 
+impl AsXform for Xform {
+    #[inline(always)]
+    fn as_xform(&self) -> &Xform {
+        self
+    }
+}
+
+impl Sealed for Xform {}
+
 //-------------------------------------------------------------
 
-/// An affine transform with precomputed data for performing reverse
-/// transforms, among other things.
+/// A combined forward/inverse affine transform.
+///
+/// Unlike [`Xform`], this can perform both forward and inverse
+/// transforms on all types.  However, it also takes up more space and
+/// is effectively "frozen" in terms of further manipulation.  Prefer
+/// [`Xform`] when manipulating or composing transforms, and also
+/// when storing transforms if space is a consideration.
+///
+/// Note: only the 3x3 part of the transform is stored inverted.  This
+/// is because it's both trivial and more numerically stable to reuse
+/// the forward translation vector to do inverse transforms, as
+/// `(point - fwd.t) * inv_m`.
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub struct XformFull {
-    pub m: [Float4; 3],     // Forward linear matrix.
-    pub m_inv: [Float4; 3], // Inverse linear matrix.
-    pub t: Float4,          // Forward translation.
+    /// Forward transform.
+    pub fwd: Xform,
+
+    /// Inverse rotation/scale/shear matrix.
+    pub inv_m: [Float4; 3],
 }
 
 impl XformFull {
     pub fn identity() -> Self {
         Self {
-            m: [
+            fwd: Xform {
+                m: [
+                    Float4::new(1.0, 0.0, 0.0, 0.0),
+                    Float4::new(0.0, 1.0, 0.0, 0.0),
+                    Float4::new(0.0, 0.0, 1.0, 0.0),
+                ],
+                t: Float4::splat(0.0),
+            },
+            inv_m: [
                 Float4::new(1.0, 0.0, 0.0, 0.0),
                 Float4::new(0.0, 1.0, 0.0, 0.0),
                 Float4::new(0.0, 0.0, 1.0, 0.0),
             ],
-            m_inv: [
-                Float4::new(1.0, 0.0, 0.0, 0.0),
-                Float4::new(0.0, 1.0, 0.0, 0.0),
-                Float4::new(0.0, 0.0, 1.0, 0.0),
-            ],
-            t: Float4::splat(0.0),
         }
     }
 }
 
+impl AsXform for XformFull {
+    #[inline(always)]
+    fn as_xform(&self) -> &Xform {
+        &self.fwd
+    }
+}
+
+impl Sealed for XformFull {}
+
+//-------------------------------------------------------------
+
+pub trait AsXform: Sealed {
+    fn as_xform(&self) -> &Xform;
+}
+
 //-------------------------------------------------------------
 
 #[cfg(test)]