diff --git a/Cargo.lock b/Cargo.lock
index efa4183..a16b557 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -177,12 +177,6 @@ dependencies = [
  "wasi",
 ]
 
-[[package]]
-name = "glam"
-version = "0.15.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "411e0584defa447c328f25c756ba3d0685727ecc126b46c3c1176001141cd4b6"
-
 [[package]]
 name = "half"
 version = "1.7.1"
@@ -339,7 +333,6 @@ dependencies = [
  "copy_in_place",
  "crossbeam",
  "fastapprox",
- "glam",
  "half",
  "halton",
  "kioku",
@@ -604,7 +597,7 @@ checksum = "26e3528b09b1f1b1e152342a4462d1e80d568dc5623a0772252a6e584a53d550"
 name = "spectral_upsampling"
 version = "0.1.0"
 dependencies = [
- "glam",
+ "rmath",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index cfafd46..3ef777e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,7 +35,6 @@ png_encode_mini = "0.1.2"
 rustc-serialize = "0.3"
 scoped_threadpool = "0.1"
 time = "0.1"
-glam = "0.15"
 fastapprox = "0.3"
 
 # Local crate dependencies
diff --git a/psychoblend/assembly.py b/psychoblend/assembly.py
index aecb954..73df05f 100644
--- a/psychoblend/assembly.py
+++ b/psychoblend/assembly.py
@@ -314,7 +314,7 @@ class Instance:
         w.indent()
         w.write("Data [$%s]\n" % self.data_name)
         for mat in self.time_xforms:
-            w.write("Transform [%s]\n" % mat2str(mat.inverted()))
+            w.write("Transform [%s]\n" % mat2str(mat))
         for ms in self.ob.material_slots:
             if ms != None:
                 w.write("SurfaceShaderBind [$%s]\n" % escape_name(ms.material.name))
diff --git a/psychoblend/world.py b/psychoblend/world.py
index c886fa4..bcaa645 100644
--- a/psychoblend/world.py
+++ b/psychoblend/world.py
@@ -82,7 +82,7 @@ class Camera:
         mat = self.ob.matrix_world.copy()
         matz = Matrix()
         matz[2][2] = -1
-        self.xforms += [mat * matz]
+        self.xforms += [(mat * matz).inverted()]
 
     def export(self, render_engine, w):
         render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
diff --git a/src/accel/bvh4.rs b/src/accel/bvh4.rs
index 3b78077..3c3b169 100644
--- a/src/accel/bvh4.rs
+++ b/src/accel/bvh4.rs
@@ -6,7 +6,7 @@
 
 use std::mem::{transmute, MaybeUninit};
 
-use glam::BVec4A;
+use rmath::wide4::Bool4;
 
 use kioku::Arena;
 
@@ -123,12 +123,12 @@ impl<'a> BVH4<'a> {
                     traversal_code,
                 } => {
                     node_tests += ray_stack.ray_count_in_next_task() as u64;
-                    let mut all_hits = BVec4A::default();
+                    let mut all_hits = Bool4::new_false();
 
                     // Ray testing
                     ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
                         if rays.is_done(ray_idx) {
-                            BVec4A::default()
+                            Bool4::new_false()
                         } else {
                             let hits = if bounds.len() == 1 {
                                 bounds[0].intersect_ray(
diff --git a/src/bbox.rs b/src/bbox.rs
index f4a2ab6..68e0c4a 100644
--- a/src/bbox.rs
+++ b/src/bbox.rs
@@ -7,7 +7,7 @@ use std::{
 
 use crate::{
     lerp::{lerp, lerp_slice, Lerp},
-    math::{fast_minf32, Point, Transform, Vector},
+    math::{fast_minf32, Point, Vector, Xform, XformFull},
 };
 
 const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
@@ -41,12 +41,12 @@ impl BBox {
     // Returns whether the given ray intersects with the bbox.
     pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> bool {
         // Calculate slab intersections
-        let t1 = (self.min.co - orig.co) * dir_inv.co;
-        let t2 = (self.max.co - orig.co) * dir_inv.co;
+        let t1 = (self.min.0 - orig.0) * dir_inv.0;
+        let t2 = (self.max.0 - orig.0) * dir_inv.0;
 
         // Find the far and near intersection
-        let far_t = t1.max(t2).extend(std::f32::INFINITY);
-        let near_t = t1.min(t2).extend(0.0);
+        let far_t = t1.max(t2).set_d(std::f32::INFINITY);
+        let near_t = t1.min(t2).set_d(0.0);
         let far_hit_t = fast_minf32(far_t.min_element() * BBOX_MAXT_ADJUST, max_t);
         let near_hit_t = near_t.max_element();
 
@@ -54,8 +54,10 @@ impl BBox {
         near_hit_t <= far_hit_t
     }
 
-    // Creates a new BBox transformed into a different space.
-    pub fn transformed(&self, xform: Transform) -> BBox {
+    // Creates a new BBox transformed from its local space to the
+    // given space.
+    #[must_use]
+    pub fn xform(&self, xform: &XformFull) -> BBox {
         // BBox corners
         let vs = [
             Point::new(self.min.x(), self.min.y(), self.min.z()),
@@ -71,7 +73,7 @@ impl BBox {
         // Transform BBox corners and make new bbox
         let mut b = BBox::new();
         for v in &vs {
-            let v = *v * xform;
+            let v = v.xform(&xform);
             b.min = v.min(b.min);
             b.max = v.max(b.max);
         }
@@ -103,12 +105,8 @@ impl BitOr for BBox {
 
     fn bitor(self, rhs: BBox) -> BBox {
         BBox::from_points(
-            Point {
-                co: self.min.co.min(rhs.min.co),
-            },
-            Point {
-                co: self.max.co.max(rhs.max.co),
-            },
+            Point(self.min.0.min(rhs.min.0)),
+            Point(self.max.0.max(rhs.max.0)),
         )
     }
 }
@@ -124,14 +122,7 @@ impl BitOr<Point> for BBox {
     type Output = BBox;
 
     fn bitor(self, rhs: Point) -> BBox {
-        BBox::from_points(
-            Point {
-                co: self.min.co.min(rhs.co),
-            },
-            Point {
-                co: self.max.co.max(rhs.co),
-            },
-        )
+        BBox::from_points(Point(self.min.0.min(rhs.0)), Point(self.max.0.max(rhs.0)))
     }
 }
 
@@ -150,7 +141,11 @@ impl Lerp for BBox {
     }
 }
 
-pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out: &mut Vec<BBox>) {
+pub fn transform_bbox_slice_from(
+    bbs_in: &[BBox],
+    xforms: &[Xform],
+    bbs_out: &mut Vec<BBox>,
+) -> Result<(), ()> {
     bbs_out.clear();
 
     // Transform the bounding boxes
@@ -158,17 +153,19 @@ pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out:
         bbs_out.extend_from_slice(bbs_in);
     } else if bbs_in.len() == xforms.len() {
         for (bb, xf) in Iterator::zip(bbs_in.iter(), xforms.iter()) {
-            bbs_out.push(bb.transformed(xf.inverse()));
+            bbs_out.push(bb.xform(&xf.into_full().ok_or(())?));
         }
     } else if bbs_in.len() > xforms.len() {
         let s = (bbs_in.len() - 1) as f32;
         for (i, bb) in bbs_in.iter().enumerate() {
-            bbs_out.push(bb.transformed(lerp_slice(xforms, i as f32 / s).inverse()));
+            bbs_out.push(bb.xform(&lerp_slice(xforms, i as f32 / s).into_full().ok_or(())?));
         }
     } else if bbs_in.len() < xforms.len() {
         let s = (xforms.len() - 1) as f32;
         for (i, xf) in xforms.iter().enumerate() {
-            bbs_out.push(lerp_slice(bbs_in, i as f32 / s).transformed(xf.inverse()));
+            bbs_out.push(lerp_slice(bbs_in, i as f32 / s).xform(&xf.into_full().ok_or(())?));
         }
     }
+
+    Ok(())
 }
diff --git a/src/bbox4.rs b/src/bbox4.rs
index 9464388..c0e5861 100644
--- a/src/bbox4.rs
+++ b/src/bbox4.rs
@@ -9,16 +9,16 @@ use crate::{
     math::{Point, Vector},
 };
 
-use glam::{BVec4A, Vec4};
+use rmath::wide4::{Bool4, Float4};
 
 const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
 
 /// A SIMD set of 4 3D axis-aligned bounding boxes.
 #[derive(Debug, Copy, Clone)]
 pub struct BBox4 {
-    pub x: (Vec4, Vec4), // (min, max)
-    pub y: (Vec4, Vec4), // (min, max)
-    pub z: (Vec4, Vec4), // (min, max)
+    pub x: (Float4, Float4), // (min, max)
+    pub y: (Float4, Float4), // (min, max)
+    pub z: (Float4, Float4), // (min, max)
 }
 
 impl BBox4 {
@@ -26,16 +26,16 @@ impl BBox4 {
     pub fn new() -> BBox4 {
         BBox4 {
             x: (
-                Vec4::splat(std::f32::INFINITY),
-                Vec4::splat(std::f32::NEG_INFINITY),
+                Float4::splat(std::f32::INFINITY),
+                Float4::splat(std::f32::NEG_INFINITY),
             ),
             y: (
-                Vec4::splat(std::f32::INFINITY),
-                Vec4::splat(std::f32::NEG_INFINITY),
+                Float4::splat(std::f32::INFINITY),
+                Float4::splat(std::f32::NEG_INFINITY),
             ),
             z: (
-                Vec4::splat(std::f32::INFINITY),
-                Vec4::splat(std::f32::NEG_INFINITY),
+                Float4::splat(std::f32::INFINITY),
+                Float4::splat(std::f32::NEG_INFINITY),
             ),
         }
     }
@@ -45,30 +45,30 @@ impl BBox4 {
     pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 {
         BBox4 {
             x: (
-                Vec4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
-                Vec4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()),
+                Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
+                Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()),
             ),
             y: (
-                Vec4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
-                Vec4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()),
+                Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
+                Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()),
             ),
             z: (
-                Vec4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
-                Vec4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()),
+                Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
+                Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()),
             ),
         }
     }
 
     // Returns whether the given ray intersects with the bboxes.
-    pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> BVec4A {
+    pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> Bool4 {
         // Get the ray data into SIMD format.
-        let ro_x = Vec4::splat(orig.co[0]);
-        let ro_y = Vec4::splat(orig.co[1]);
-        let ro_z = Vec4::splat(orig.co[2]);
-        let rdi_x = Vec4::splat(dir_inv.co[0]);
-        let rdi_y = Vec4::splat(dir_inv.co[1]);
-        let rdi_z = Vec4::splat(dir_inv.co[2]);
-        let max_t = Vec4::splat(max_t);
+        let ro_x = orig.0.aaaa();
+        let ro_y = orig.0.bbbb();
+        let ro_z = orig.0.cccc();
+        let rdi_x = dir_inv.0.aaaa();
+        let rdi_y = dir_inv.0.bbbb();
+        let rdi_z = dir_inv.0.cccc();
+        let max_t = Float4::splat(max_t);
 
         // Slab tests
         let t1_x = (self.x.0 - ro_x) * rdi_x;
@@ -87,10 +87,11 @@ impl BBox4 {
         let t_near_z = t1_z.min(t2_z);
 
         // Calculate over-all far t hit.
-        let far_t = (t_far_x.min(t_far_y.min(t_far_z)) * Vec4::splat(BBOX_MAXT_ADJUST)).min(max_t);
+        let far_t =
+            (t_far_x.min(t_far_y.min(t_far_z)) * Float4::splat(BBOX_MAXT_ADJUST)).min(max_t);
 
         // Calculate over-all near t hit.
-        let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Vec4::splat(0.0)));
+        let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Float4::splat(0.0)));
 
         // Hit results
         near_t.cmplt(far_t)
diff --git a/src/camera.rs b/src/camera.rs
index 788e207..4f65a7f 100644
--- a/src/camera.rs
+++ b/src/camera.rs
@@ -4,14 +4,14 @@ use kioku::Arena;
 
 use crate::{
     lerp::lerp_slice,
-    math::{Point, Transform, Vector},
+    math::{Point, Vector, Xform},
     ray::Ray,
     sampling::square_to_circle,
 };
 
 #[derive(Copy, Clone, Debug)]
 pub struct Camera<'a> {
-    transforms: &'a [Transform],
+    transforms: &'a [Xform],
     fovs: &'a [f32],
     tfovs: &'a [f32],
     aperture_radii: &'a [f32],
@@ -21,7 +21,7 @@ pub struct Camera<'a> {
 impl<'a> Camera<'a> {
     pub fn new(
         arena: &'a Arena,
-        transforms: &[Transform],
+        transforms: &[Xform],
         fovs: &[f32],
         mut aperture_radii: &[f32],
         mut focus_distances: &[f32],
@@ -73,7 +73,7 @@ impl<'a> Camera<'a> {
 
     pub fn generate_ray(&self, x: f32, y: f32, time: f32, wavelength: f32, u: f32, v: f32) -> Ray {
         // Get time-interpolated camera settings
-        let transform = lerp_slice(self.transforms, time);
+        let transform = lerp_slice(self.transforms, time).into_full_fast().unwrap();
         let tfov = lerp_slice(self.tfovs, time);
         let aperture_radius = lerp_slice(self.aperture_radii, time);
         let focus_distance = lerp_slice(self.focus_distances, time);
@@ -93,8 +93,8 @@ impl<'a> Camera<'a> {
         .normalized();
 
         Ray {
-            orig: orig * transform,
-            dir: dir * transform,
+            orig: orig.xform_inv_fast(&transform),
+            dir: dir.xform_inv_fast(&transform),
             time: time,
             wavelength: wavelength,
             max_t: std::f32::INFINITY,
diff --git a/src/color.rs b/src/color.rs
index 3b1ec50..9e68b70 100644
--- a/src/color.rs
+++ b/src/color.rs
@@ -1,11 +1,11 @@
 use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign};
 
+use crate::math::Float4;
 pub use color::{
     rec709_e_to_xyz, rec709_to_xyz, xyz_to_aces_ap0, xyz_to_aces_ap0_e, xyz_to_rec709,
     xyz_to_rec709_e,
 };
 use compact::fluv::fluv32;
-use glam::Vec4;
 use half::f16;
 use spectral_upsampling::meng::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE};
 
@@ -31,10 +31,10 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
     }
 }
 
-/// Returns all wavelengths of a hero wavelength set as a Vec4
+/// Returns all wavelengths of a hero wavelength set as a Float4
 #[inline(always)]
-fn wavelengths(hero_wavelength: f32) -> Vec4 {
-    Vec4::new(
+fn wavelengths(hero_wavelength: f32) -> Float4 {
+    Float4::new(
         nth_wavelength(hero_wavelength, 0),
         nth_wavelength(hero_wavelength, 1),
         nth_wavelength(hero_wavelength, 2),
@@ -94,7 +94,7 @@ impl Color {
             } => {
                 SpectralSample::from_parts(
                     // TODO: make this SIMD
-                    Vec4::new(
+                    Float4::new(
                         plancks_law(temperature, wls[0]) * factor,
                         plancks_law(temperature, wls[1]) * factor,
                         plancks_law(temperature, wls[2]) * factor,
@@ -109,7 +109,7 @@ impl Color {
             } => {
                 SpectralSample::from_parts(
                     // TODO: make this SIMD
-                    Vec4::new(
+                    Float4::new(
                         plancks_law_normalized(temperature, wls[0]) * factor,
                         plancks_law_normalized(temperature, wls[1]) * factor,
                         plancks_law_normalized(temperature, wls[2]) * factor,
@@ -386,7 +386,7 @@ fn plancks_law_normalized(temperature: f32, wavelength: f32) -> f32 {
 
 #[derive(Copy, Clone, Debug)]
 pub struct SpectralSample {
-    pub e: Vec4,
+    pub e: Float4,
     hero_wavelength: f32,
 }
 
@@ -394,7 +394,7 @@ impl SpectralSample {
     pub fn new(wavelength: f32) -> SpectralSample {
         debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
         SpectralSample {
-            e: Vec4::splat(0.0),
+            e: Float4::splat(0.0),
             hero_wavelength: wavelength,
         }
     }
@@ -403,12 +403,12 @@ impl SpectralSample {
     pub fn from_value(value: f32, wavelength: f32) -> SpectralSample {
         debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
         SpectralSample {
-            e: Vec4::splat(value),
+            e: Float4::splat(value),
             hero_wavelength: wavelength,
         }
     }
 
-    pub fn from_parts(e: Vec4, wavelength: f32) -> SpectralSample {
+    pub fn from_parts(e: Float4, wavelength: f32) -> SpectralSample {
         debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
         SpectralSample {
             e: e,
@@ -599,8 +599,8 @@ impl DivAssign<f32> for XYZ {
 /// the method in the paper "Physically Meaningful Rendering using Tristimulus
 /// Colours" by Meng et al.
 #[inline(always)]
-fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Vec4) -> Vec4 {
-    spectrum_xyz_to_p_4(wavelengths, xyz) * Vec4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE)
+fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 {
+    spectrum_xyz_to_p_4(wavelengths, xyz) * Float4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE)
     // aces_to_spectrum_p4(wavelengths, xyz_to_aces_ap0_e(xyz))
 }
 
diff --git a/src/lerp.rs b/src/lerp.rs
index 65a6479..8ffea7e 100644
--- a/src/lerp.rs
+++ b/src/lerp.rs
@@ -1,6 +1,6 @@
 #![allow(dead_code)]
 
-use math3d::{Normal, Point, Transform, Vector};
+use rmath::{wide4::Float4, Normal, Point, Vector, Xform};
 
 /// Trait for allowing a type to be linearly interpolated.
 pub trait Lerp: Copy {
@@ -100,36 +100,34 @@ impl<T: Lerp> Lerp for [T; 4] {
     }
 }
 
-impl Lerp for glam::Vec4 {
-    fn lerp(self, other: glam::Vec4, alpha: f32) -> glam::Vec4 {
+impl Lerp for Float4 {
+    fn lerp(self, other: Self, alpha: f32) -> Self {
         (self * (1.0 - alpha)) + (other * alpha)
     }
 }
 
-impl Lerp for Transform {
-    fn lerp(self, other: Transform, alpha: f32) -> Transform {
+impl Lerp for Xform {
+    fn lerp(self, other: Self, alpha: f32) -> Self {
         (self * (1.0 - alpha)) + (other * alpha)
     }
 }
 
 impl Lerp for Normal {
-    fn lerp(self, other: Normal, alpha: f32) -> Normal {
+    fn lerp(self, other: Self, alpha: f32) -> Self {
         (self * (1.0 - alpha)) + (other * alpha)
     }
 }
 
 impl Lerp for Point {
-    fn lerp(self, other: Point, alpha: f32) -> Point {
-        let s = self;
-        let o = other;
-        Point {
-            co: (s.co * (1.0 - alpha)) + (o.co * alpha),
-        }
+    fn lerp(self, other: Self, alpha: f32) -> Self {
+        let a = self.0;
+        let b = other.0;
+        Point((a * (1.0 - alpha)) + (b * alpha))
     }
 }
 
 impl Lerp for Vector {
-    fn lerp(self, other: Vector, alpha: f32) -> Vector {
+    fn lerp(self, other: Self, alpha: f32) -> Self {
         (self * (1.0 - alpha)) + (other * alpha)
     }
 }
diff --git a/src/light/mod.rs b/src/light/mod.rs
index a45c567..817e91d 100644
--- a/src/light/mod.rs
+++ b/src/light/mod.rs
@@ -6,7 +6,7 @@ use std::fmt::Debug;
 
 use crate::{
     color::SpectralSample,
-    math::{Normal, Point, Transform, Vector},
+    math::{Normal, Point, Vector, XformFull},
     surface::Surface,
 };
 
@@ -34,7 +34,7 @@ pub trait SurfaceLight: Surface {
     /// - The pdf of the sample.
     fn sample_from_point(
         &self,
-        space: &Transform,
+        space: &XformFull,
         arr: Point,
         u: f32,
         v: f32,
diff --git a/src/light/rectangle_light.rs b/src/light/rectangle_light.rs
index 5460dfc..ab183c1 100644
--- a/src/light/rectangle_light.rs
+++ b/src/light/rectangle_light.rs
@@ -5,7 +5,7 @@ use crate::{
     boundable::Boundable,
     color::{Color, SpectralSample},
     lerp::lerp_slice,
-    math::{cross, dot, Normal, Point, Transform, Vector},
+    math::{cross, dot, Normal, Point, Vector, Xform, XformFull},
     ray::{RayBatch, RayStack},
     sampling::{
         spherical_triangle_solid_angle, triangle_surface_area, uniform_sample_spherical_triangle,
@@ -51,7 +51,7 @@ impl<'a> RectangleLight<'a> {
     // more efficiently by inlining it there.
     fn sample_pdf(
         &self,
-        space: &Transform,
+        space: &XformFull,
         arr: Point,
         sample_dir: Vector,
         hit_point: Point,
@@ -64,11 +64,10 @@ impl<'a> RectangleLight<'a> {
         let dim = lerp_slice(self.dimensions, time);
 
         // Get the four corners of the rectangle, transformed into world space
-        let space_inv = space.inverse();
-        let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
-        let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
-        let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
-        let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
+        let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space);
+        let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space);
+        let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space);
+        let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space);
 
         // Get the four corners of the rectangle, projected on to the unit
         // sphere centered around arr.
@@ -82,7 +81,7 @@ impl<'a> RectangleLight<'a> {
         let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3);
 
         // World-space surface normal
-        let normal = Normal::new(0.0, 0.0, 1.0) * space_inv;
+        let normal = Normal::new(0.0, 0.0, 1.0).xform(space);
 
         // PDF
         if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD {
@@ -97,7 +96,7 @@ impl<'a> RectangleLight<'a> {
 
     // fn outgoing(
     //     &self,
-    //     space: &Transform,
+    //     space: &XformFull,
     //     dir: Vector,
     //     u: f32,
     //     v: f32,
@@ -120,7 +119,7 @@ impl<'a> RectangleLight<'a> {
 impl<'a> SurfaceLight for RectangleLight<'a> {
     fn sample_from_point(
         &self,
-        space: &Transform,
+        space: &XformFull,
         arr: Point,
         u: f32,
         v: f32,
@@ -135,11 +134,10 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
         let surface_area_inv: f64 = 1.0 / surface_area;
 
         // Get the four corners of the rectangle, transformed into world space
-        let space_inv = space.inverse();
-        let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
-        let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
-        let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
-        let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
+        let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space);
+        let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space);
+        let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space);
+        let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space);
 
         // Get the four corners of the rectangle relative to arr.
         let lp1 = p1 - arr;
@@ -158,7 +156,7 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
         let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3);
 
         // Calculate world-space surface normal
-        let normal = Normal::new(0.0, 0.0, 1.0) * space_inv;
+        let normal = Normal::new(0.0, 0.0, 1.0).xform(space);
 
         if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD {
             // Simple sampling for more distant lights
@@ -215,18 +213,16 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
             };
 
             // Project shadow_vec back onto the light's surface
-            let arr_local = arr * *space;
-            let shadow_vec_local = shadow_vec * *space;
+            let arr_local = arr.xform_inv(space);
+            let shadow_vec_local = shadow_vec.xform_inv(space);
             let shadow_vec_local = shadow_vec_local * (-arr_local.z() / shadow_vec_local.z());
             let mut sample_point_local = arr_local + shadow_vec_local;
             {
                 let x = sample_point_local.x().max(dim.0 * -0.5).min(dim.0 * 0.5);
                 let y = sample_point_local.y().max(dim.1 * -0.5).min(dim.1 * 0.5);
-                sample_point_local.set_x(x);
-                sample_point_local.set_y(y);
-                sample_point_local.set_z(0.0);
+                sample_point_local = Point::new(x, y, 0.0);
             }
-            let sample_point = sample_point_local * space_inv;
+            let sample_point = sample_point_local.xform(space);
             let point_err = 0.0001; // TODO: this is a hack, do properly.
 
             // Calculate pdf and light energy
@@ -261,7 +257,7 @@ impl<'a> Surface for RectangleLight<'a> {
         ray_stack: &mut RayStack,
         isects: &mut [SurfaceIntersection],
         shader: &dyn SurfaceShader,
-        space: &[Transform],
+        space: &[Xform],
     ) {
         let _ = shader; // Silence 'unused' warning
 
@@ -275,13 +271,17 @@ impl<'a> Surface for RectangleLight<'a> {
             let dim = lerp_slice(self.dimensions, time);
             let xform = lerp_slice(space, time);
 
-            let space_inv = xform.inverse();
+            let space = if let Some(xform) = xform.into_full() {
+                xform
+            } else {
+                return;
+            };
 
             // Get the four corners of the rectangle, transformed into world space
-            let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
-            let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
-            let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
-            let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
+            let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(&space);
+            let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(&space);
+            let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(&space);
+            let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(&space);
 
             // Test against two triangles that make up the light
             let ray_pre = triangle::RayTriPrecompute::new(dir);
@@ -302,9 +302,9 @@ impl<'a> Surface for RectangleLight<'a> {
                                 pos_err: pos_err,
                                 nor: normal,
                                 nor_g: normal,
-                                local_space: xform,
+                                local_space: space,
                                 sample_pdf: self.sample_pdf(
-                                    &xform,
+                                    &space,
                                     orig,
                                     dir,
                                     pos,
diff --git a/src/light/sphere_light.rs b/src/light/sphere_light.rs
index 03ea40a..a7ef6ab 100644
--- a/src/light/sphere_light.rs
+++ b/src/light/sphere_light.rs
@@ -7,7 +7,7 @@ use crate::{
     boundable::Boundable,
     color::{Color, SpectralSample},
     lerp::lerp_slice,
-    math::{coordinate_system_from_vector, dot, Normal, Point, Transform, Vector},
+    math::{coordinate_system_from_vector, dot, Normal, Point, Vector, Xform, XformFull},
     ray::{RayBatch, RayStack},
     sampling::{uniform_sample_cone, uniform_sample_cone_pdf, uniform_sample_sphere},
     shading::surface_closure::SurfaceClosure,
@@ -50,7 +50,7 @@ impl<'a> SphereLight<'a> {
     // more efficiently by inlining it there.
     fn sample_pdf(
         &self,
-        space: &Transform,
+        space: &XformFull,
         arr: Point,
         sample_dir: Vector,
         sample_u: f32,
@@ -61,7 +61,7 @@ impl<'a> SphereLight<'a> {
         // We're not using these, silence warnings
         let _ = (sample_dir, sample_u, sample_v, wavelength);
 
-        let arr = arr * *space;
+        let arr = arr.xform_inv(space);
         let pos = Point::new(0.0, 0.0, 0.0);
         let radius: f64 = lerp_slice(self.radii, time) as f64;
 
@@ -84,7 +84,7 @@ impl<'a> SphereLight<'a> {
 impl<'a> SurfaceLight for SphereLight<'a> {
     fn sample_from_point(
         &self,
-        space: &Transform,
+        space: &XformFull,
         arr: Point,
         u: f32,
         v: f32,
@@ -92,12 +92,9 @@ impl<'a> SurfaceLight for SphereLight<'a> {
         time: f32,
     ) -> (SpectralSample, (Point, Normal, f32), f32) {
         // TODO: track fp error due to transforms
-        let arr = arr * *space;
+        let arr = arr.xform_inv(space);
         let pos = Point::new(0.0, 0.0, 0.0);
 
-        // Precalculate local->world space transform matrix
-        let inv_space = space.inverse();
-
         // Calculate time interpolated values
         let radius: f64 = lerp_slice(self.radii, time) as f64;
         let col = lerp_slice(self.colors, time);
@@ -115,7 +112,7 @@ impl<'a> SurfaceLight for SphereLight<'a> {
         // TODO: do this properly.  This is a total hack.
         let sample_point_err = {
             let v = Vector::new(radius as f32, radius as f32, radius as f32);
-            let v2 = v * inv_space;
+            let v2 = v.xform(space);
             v2.length() * SAMPLE_POINT_FUDGE
         };
 
@@ -159,8 +156,8 @@ impl<'a> SurfaceLight for SphereLight<'a> {
                 let normal = (arr + sample_vec).into_vector().normalized();
                 let point = normal * radius as f32;
                 (
-                    point.into_point() * inv_space,
-                    normal.into_normal() * inv_space,
+                    point.into_point().xform(space),
+                    normal.into_normal().xform(space),
                 )
             };
             let pdf = uniform_sample_cone_pdf(cos_theta_max);
@@ -177,8 +174,8 @@ impl<'a> SurfaceLight for SphereLight<'a> {
                 let normal = (arr + sample_vec).into_vector().normalized();
                 let point = normal * radius as f32;
                 (
-                    point.into_point() * inv_space,
-                    normal.into_normal() * inv_space,
+                    point.into_point().xform(space),
+                    normal.into_normal().xform(space),
                 )
             };
             let pdf = 1.0 / (4.0 * PI_64);
@@ -210,7 +207,7 @@ impl<'a> Surface for SphereLight<'a> {
         ray_stack: &mut RayStack,
         isects: &mut [SurfaceIntersection],
         shader: &dyn SurfaceShader,
-        space: &[Transform],
+        space: &[Xform],
     ) {
         let _ = shader; // Silence 'unused' warning
 
@@ -218,14 +215,18 @@ impl<'a> Surface for SphereLight<'a> {
             let time = rays.time(ray_idx);
 
             // Get the transform space
-            let xform = lerp_slice(space, time);
+            let xform = if let Some(xform) = lerp_slice(space, time).into_full() {
+                xform
+            } else {
+                return;
+            };
 
             // Get the radius of the sphere at the ray's time
             let radius = lerp_slice(self.radii, time); // Radius of the sphere
 
             // Get the ray origin and direction in local space
             let orig = rays.orig_local(ray_idx).into_vector();
-            let dir = rays.dir(ray_idx) * xform;
+            let dir = rays.dir(ray_idx).xform_inv(&xform);
 
             // Code adapted to Rust from https://github.com/Tecla/Rayito
             // Ray-sphere intersection can result in either zero, one or two points
@@ -286,18 +287,16 @@ impl<'a> Surface for SphereLight<'a> {
                 isects[ray_idx] = SurfaceIntersection::Occlude;
                 rays.mark_done(ray_idx);
             } else {
-                let inv_xform = xform.inverse();
-
                 // Position is calculated from the local-space ray and t, and then
                 // re-projected onto the surface of the sphere.
                 let t_pos = orig + (dir * t);
                 let unit_pos = t_pos.normalized();
-                let pos = (unit_pos * radius * inv_xform).into_point();
+                let pos = (unit_pos * radius).xform(&xform).into_point();
 
                 // TODO: proper error bounds.
                 let pos_err = 0.001;
 
-                let normal = unit_pos.into_normal() * inv_xform;
+                let normal = unit_pos.into_normal().xform(&xform);
 
                 let intersection_data = SurfaceIntersectionData {
                     incoming: rays.dir(ray_idx),
diff --git a/src/math.rs b/src/math.rs
index fec2f06..b29f6b1 100644
--- a/src/math.rs
+++ b/src/math.rs
@@ -2,7 +2,9 @@
 
 use std::f32;
 
-pub use math3d::{cross, dot, CrossProduct, DotProduct, Normal, Point, Transform, Vector};
+pub use rmath::{
+    cross, dot, wide4::Float4, CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull,
+};
 
 /// Clamps a value between a min and max.
 pub fn clamp<T: PartialOrd>(v: T, lower: T, upper: T) -> T {
diff --git a/src/parse/psy.rs b/src/parse/psy.rs
index 4d2f6f5..b6ea518 100644
--- a/src/parse/psy.rs
+++ b/src/parse/psy.rs
@@ -10,7 +10,7 @@ use crate::{
     camera::Camera,
     color::{rec709_e_to_xyz, Color},
     light::WorldLightSource,
-    math::Transform,
+    math::Xform,
     renderer::Renderer,
     scene::Scene,
     scene::World,
@@ -553,17 +553,17 @@ fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<World<'a>, Ps
     }
 }
 
-pub fn parse_matrix(contents: &str) -> Result<Transform, PsyParseError> {
+pub fn parse_matrix(contents: &str) -> Result<Xform, PsyParseError> {
     if let IResult::Ok((leftover, ns)) = all_consuming(tuple((
         ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32,
         ws_f32, ws_f32, ws_f32, ws_f32, ws_f32,
     )))(contents)
     {
         if leftover.is_empty() {
-            return Ok(Transform::new_from_values(
+            return Ok(Xform::new(
                 // We throw away the last row, since it's not necessarily affine.
                 // TODO: is there a more correct way to handle this?
-                ns.0, ns.4, ns.8, ns.12, ns.1, ns.5, ns.9, ns.13, ns.2, ns.6, ns.10, ns.14,
+                ns.0, ns.1, ns.2, ns.4, ns.5, ns.6, ns.8, ns.9, ns.10, ns.12, ns.13, ns.14,
             ));
         }
     }
diff --git a/src/ray.rs b/src/ray.rs
index daf29ab..035cdf2 100644
--- a/src/ray.rs
+++ b/src/ray.rs
@@ -1,8 +1,8 @@
 #![allow(dead_code)]
 
-use glam::BVec4A;
+use rmath::wide4::Bool4;
 
-use crate::math::{Point, Transform, Vector};
+use crate::math::{Point, Vector, XformFull};
 
 type RayIndexType = u16;
 type FlagType = u8;
@@ -85,9 +85,7 @@ impl RayBatch {
 
     pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) {
         self.hot[idx].orig_local = ray.orig;
-        self.hot[idx].dir_inv_local = Vector {
-            co: ray.dir.co.recip(),
-        };
+        self.hot[idx].dir_inv_local = Vector(ray.dir.0.recip());
         self.hot[idx].max_t = ray.max_t;
         self.hot[idx].time = ray.time;
         self.hot[idx].flags = if is_occlusion { OCCLUSION_FLAG } else { 0 };
@@ -115,15 +113,13 @@ impl RayBatch {
     }
 
     /// Updates the accel data of the given ray (at index `idx`) with the
-    /// given world-to-local-space transform matrix.
+    /// given transform.
     ///
     /// This should be called when entering (and exiting) traversal of a
     /// new transform space.
-    pub fn update_local(&mut self, idx: usize, xform: &Transform) {
-        self.hot[idx].orig_local = self.cold[idx].orig * *xform;
-        self.hot[idx].dir_inv_local = Vector {
-            co: (self.cold[idx].dir * *xform).co.recip(),
-        };
+    pub fn update_local(&mut self, idx: usize, xform: &XformFull) {
+        self.hot[idx].orig_local = self.cold[idx].orig.xform_inv(xform);
+        self.hot[idx].dir_inv_local = Vector((self.cold[idx].dir.xform_inv(xform)).0.recip());
     }
 
     //==========================================================
@@ -349,7 +345,7 @@ impl RayStack {
     /// indicated lanes.
     pub fn pop_do_next_task_and_push_rays<F>(&mut self, output_lane_count: usize, mut handle_ray: F)
     where
-        F: FnMut(usize) -> BVec4A,
+        F: FnMut(usize) -> Bool4,
     {
         // Pop the task and do necessary bookkeeping.
         let task = self.tasks.pop().unwrap();
diff --git a/src/renderer.rs b/src/renderer.rs
index 9cbd1e6..8325173 100644
--- a/src/renderer.rs
+++ b/src/renderer.rs
@@ -9,8 +9,6 @@ use std::{
 use crossbeam::sync::MsQueue;
 use scoped_threadpool::Pool;
 
-use glam::Vec4;
-
 use crate::{
     accel::ACCEL_NODE_RAY_TESTS,
     color::{map_0_1_to_wavelength, SpectralSample, XYZ},
@@ -18,7 +16,7 @@ use crate::{
     hash::hash_u32,
     hilbert,
     image::Image,
-    math::{probit, upper_power_of_two},
+    math::{probit, upper_power_of_two, Float4},
     mis::power_heuristic,
     ray::{Ray, RayBatch},
     scene::{Scene, SceneLightSample},
@@ -379,12 +377,12 @@ pub struct LightPath {
     wavelength: f32,
 
     next_bounce_ray: Option<Ray>,
-    next_attenuation_fac: Vec4,
+    next_attenuation_fac: Float4,
 
     closure_sample_pdf: f32,
-    light_attenuation: Vec4,
-    pending_color_addition: Vec4,
-    color: Vec4,
+    light_attenuation: Float4,
+    pending_color_addition: Float4,
+    color: Float4,
 }
 
 #[allow(clippy::new_ret_no_self)]
@@ -412,12 +410,12 @@ impl LightPath {
                 wavelength: wavelength,
 
                 next_bounce_ray: None,
-                next_attenuation_fac: Vec4::splat(1.0),
+                next_attenuation_fac: Float4::splat(1.0),
 
                 closure_sample_pdf: 1.0,
-                light_attenuation: Vec4::splat(1.0),
-                pending_color_addition: Vec4::splat(0.0),
-                color: Vec4::splat(0.0),
+                light_attenuation: Float4::splat(1.0),
+                pending_color_addition: Float4::splat(0.0),
+                color: Float4::splat(0.0),
             },
             scene.camera.generate_ray(
                 image_plane_co.0,
diff --git a/src/scene/assembly.rs b/src/scene/assembly.rs
index e9d8ce9..90a296d 100644
--- a/src/scene/assembly.rs
+++ b/src/scene/assembly.rs
@@ -10,7 +10,7 @@ use crate::{
     color::SpectralSample,
     lerp::lerp_slice,
     light::SurfaceLight,
-    math::{Normal, Point, Transform},
+    math::{Normal, Point, Xform, XformFull},
     shading::SurfaceShader,
     surface::{Surface, SurfaceIntersection},
     transform_stack::TransformStack,
@@ -21,7 +21,7 @@ pub struct Assembly<'a> {
     // Instance list
     pub instances: &'a [Instance],
     pub light_instances: &'a [Instance],
-    pub xforms: &'a [Transform],
+    pub xforms: &'a [Xform],
 
     // Surface shader list
     pub surface_shaders: &'a [&'a dyn SurfaceShader],
@@ -58,15 +58,20 @@ impl<'a> Assembly<'a> {
         } = *intr
         {
             let sel_xform = if !xform_stack.top().is_empty() {
-                lerp_slice(xform_stack.top(), time)
+                if let Some(xform) = lerp_slice(xform_stack.top(), time).into_full() {
+                    xform
+                } else {
+                    return None;
+                }
             } else {
-                Transform::new()
+                XformFull::identity()
             };
+
             if let Some((light_i, sel_pdf, whittled_n)) = self.light_accel.select(
-                idata.incoming * sel_xform,
-                idata.pos * sel_xform,
-                idata.nor * sel_xform,
-                idata.nor_g * sel_xform,
+                idata.incoming.xform_inv(&sel_xform),
+                idata.pos.xform_inv(&sel_xform),
+                idata.nor.xform_inv(&sel_xform),
+                idata.nor_g.xform_inv(&sel_xform),
                 &closure,
                 time,
                 n,
@@ -76,12 +81,12 @@ impl<'a> Assembly<'a> {
                     InstanceType::Object => {
                         match self.objects[inst.data_index] {
                             Object::SurfaceLight(light) => {
-                                // Get the world-to-object space transform of the light
+                                // Get the transform of the light.
                                 let xform = if let Some((a, b)) = inst.transform_indices {
                                     let pxforms = xform_stack.top();
                                     let xform = lerp_slice(&self.xforms[a..b], time);
                                     if !pxforms.is_empty() {
-                                        lerp_slice(pxforms, time) * xform
+                                        lerp_slice(pxforms, time).compose(&xform)
                                     } else {
                                         xform
                                     }
@@ -90,15 +95,20 @@ impl<'a> Assembly<'a> {
                                     if !pxforms.is_empty() {
                                         lerp_slice(pxforms, time)
                                     } else {
-                                        Transform::new()
+                                        Xform::identity()
                                     }
-                                };
+                                }
+                                .into_full();
 
                                 // Sample the light
-                                let (color, sample_geo, pdf) = light.sample_from_point(
-                                    &xform, idata.pos, uvw.0, uvw.1, wavelength, time,
-                                );
-                                return Some((color, sample_geo, pdf, sel_pdf));
+                                if let Some(xform) = xform {
+                                    let (color, sample_geo, pdf) = light.sample_from_point(
+                                        &xform, idata.pos, uvw.0, uvw.1, wavelength, time,
+                                    );
+                                    return Some((color, sample_geo, pdf, sel_pdf));
+                                } else {
+                                    return None;
+                                }
                             }
 
                             _ => unimplemented!(),
@@ -106,7 +116,7 @@ impl<'a> Assembly<'a> {
                     }
 
                     InstanceType::Assembly => {
-                        // Push the world-to-object space transforms of the assembly onto
+                        // Push the transform of the assembly onto
                         // the transform stack.
                         if let Some((a, b)) = inst.transform_indices {
                             xform_stack.push(&self.xforms[a..b]);
@@ -152,7 +162,7 @@ pub struct AssemblyBuilder<'a> {
 
     // Instance list
     instances: Vec<Instance>,
-    xforms: Vec<Transform>,
+    xforms: Vec<Xform>,
 
     // Shader list
     surface_shaders: Vec<&'a dyn SurfaceShader>,
@@ -224,7 +234,7 @@ impl<'a> AssemblyBuilder<'a> {
         &mut self,
         name: &str,
         surface_shader_name: Option<&str>,
-        xforms: Option<&[Transform]>,
+        xforms: Option<&[Xform]>,
     ) {
         // Make sure name exists
         if !self.name_exists(name) {
@@ -380,7 +390,7 @@ impl<'a> AssemblyBuilder<'a> {
             // Transform the bounding boxes, if necessary
             if let Some((xstart, xend)) = inst.transform_indices {
                 let xf = &self.xforms[xstart..xend];
-                transform_bbox_slice_from(&bbs, xf, &mut bbs2);
+                transform_bbox_slice_from(&bbs, xf, &mut bbs2).unwrap();
             } else {
                 bbs2.clear();
                 bbs2.extend(bbs);
diff --git a/src/shading/surface_closure.rs b/src/shading/surface_closure.rs
index b1f3741..6e0aa16 100644
--- a/src/shading/surface_closure.rs
+++ b/src/shading/surface_closure.rs
@@ -2,12 +2,10 @@
 
 use std::f32::consts::PI as PI_32;
 
-use glam::Vec4;
-
 use crate::{
     color::{Color, SpectralSample},
     lerp::{lerp, Lerp},
-    math::{clamp, dot, zup_to_vec, Normal, Vector},
+    math::{clamp, dot, zup_to_vec, Float4, Normal, Vector},
     sampling::cosine_sample_hemisphere,
 };
 
@@ -512,7 +510,7 @@ mod ggx_closure {
                 rev_fresnel,
             );
 
-            SpectralSample::from_parts(Vec4::new(c0, c1, c2, c3), wavelength)
+            SpectralSample::from_parts(Float4::new(c0, c1, c2, c3), wavelength)
         };
 
         // Calculate everything else
diff --git a/src/surface/micropoly_batch.rs b/src/surface/micropoly_batch.rs
index ccb2029..54de7a4 100644
--- a/src/surface/micropoly_batch.rs
+++ b/src/surface/micropoly_batch.rs
@@ -9,7 +9,7 @@ use crate::{
     bbox::BBox,
     boundable::Boundable,
     lerp::lerp_slice,
-    math::{cross, dot, Normal, Point, Transform},
+    math::{cross, dot, Normal, Point, Xform, XformFull},
     ray::{RayBatch, RayStack},
     shading::SurfaceClosure,
 };
@@ -150,13 +150,17 @@ impl<'a> MicropolyBatch<'a> {
         rays: &mut RayBatch,
         ray_stack: &mut RayStack,
         isects: &mut [SurfaceIntersection],
-        space: &[Transform],
+        space: &[Xform],
     ) {
         // Precalculate transform for non-motion blur cases
         let static_mat_space = if space.len() == 1 {
-            lerp_slice(space, 0.0).inverse()
+            if let Some(xform) = space[0].into_full() {
+                xform
+            } else {
+                return;
+            }
         } else {
-            Transform::new()
+            XformFull::identity()
         };
 
         self.accel
@@ -182,11 +186,11 @@ impl<'a> MicropolyBatch<'a> {
                             );
                             if !space.is_empty() {
                                 (*tri_cache[i].as_mut_ptr()).0 =
-                                    (*tri_cache[i].as_mut_ptr()).0 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).0.xform(&static_mat_space);
                                 (*tri_cache[i].as_mut_ptr()).1 =
-                                    (*tri_cache[i].as_mut_ptr()).1 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).1.xform(&static_mat_space);
                                 (*tri_cache[i].as_mut_ptr()).2 =
-                                    (*tri_cache[i].as_mut_ptr()).2 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).2.xform(&static_mat_space);
                             }
                         }
                     }
@@ -205,7 +209,11 @@ impl<'a> MicropolyBatch<'a> {
                     // Calculate the ray space, if necessary.
                     let mat_space = if space.len() > 1 {
                         // Per-ray transform, for motion blur
-                        lerp_slice(space, ray_time).inverse()
+                        if let Some(xform) = lerp_slice(space, ray_time).into_full() {
+                            xform
+                        } else {
+                            return;
+                        }
                     } else {
                         static_mat_space
                     };
@@ -251,9 +259,9 @@ impl<'a> MicropolyBatch<'a> {
                             };
 
                             if !space.is_empty() {
-                                tri.0 = tri.0 * mat_space;
-                                tri.1 = tri.1 * mat_space;
-                                tri.2 = tri.2 * mat_space;
+                                tri.0 = tri.0.xform(&mat_space);
+                                tri.1 = tri.1.xform(&mat_space);
+                                tri.2 = tri.2.xform(&mat_space);
                             }
 
                             tri
@@ -311,7 +319,7 @@ impl<'a> MicropolyBatch<'a> {
                             let n1 = lerp_slice(n1_slice, ray_time).normalized();
                             let n2 = lerp_slice(n2_slice, ray_time).normalized();
 
-                            let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
+                            let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space);
                             if dot(s_nor, geo_normal) >= 0.0 {
                                 s_nor
                             } else {
diff --git a/src/surface/mod.rs b/src/surface/mod.rs
index a718f4d..6b20132 100644
--- a/src/surface/mod.rs
+++ b/src/surface/mod.rs
@@ -10,7 +10,7 @@ use std::fmt::Debug;
 
 use crate::{
     boundable::Boundable,
-    math::{Normal, Point, Transform, Vector},
+    math::{Normal, Point, Vector, Xform, XformFull},
     ray::{RayBatch, RayStack},
     shading::surface_closure::SurfaceClosure,
     shading::SurfaceShader,
@@ -25,7 +25,7 @@ pub trait Surface: Boundable + Debug + Sync {
         ray_stack: &mut RayStack,
         isects: &mut [SurfaceIntersection],
         shader: &dyn SurfaceShader,
-        space: &[Transform],
+        space: &[Xform],
     );
 }
 
@@ -80,13 +80,13 @@ pub enum SurfaceIntersection {
 
 #[derive(Debug, Copy, Clone)]
 pub struct SurfaceIntersectionData {
-    pub incoming: Vector, // Direction of the incoming ray
-    pub pos: Point,       // Position of the intersection
+    pub incoming: Vector, // Direction of the incoming ray.
+    pub pos: Point,       // Position of the intersection.
     pub pos_err: f32,     // Error magnitude of the intersection position.  Imagine
     // a cube centered around `pos` with dimensions of `2 * pos_err`.
-    pub nor: Normal,            // Shading normal
-    pub nor_g: Normal,          // True geometric normal
-    pub local_space: Transform, // Matrix from global space to local space
-    pub t: f32,                 // Ray t-value at the intersection point
-    pub sample_pdf: f32,        // The PDF of getting this point by explicitly sampling the surface
+    pub nor: Normal,            // Shading normal.
+    pub nor_g: Normal,          // True geometric normal.
+    pub local_space: XformFull, // Matrix from local to world space.
+    pub t: f32,                 // Ray t-value at the intersection point.
+    pub sample_pdf: f32,        // The PDF of getting this point by explicitly sampling the surface.
 }
diff --git a/src/surface/triangle.rs b/src/surface/triangle.rs
index 5e0f60f..25dd17a 100644
--- a/src/surface/triangle.rs
+++ b/src/surface/triangle.rs
@@ -162,7 +162,7 @@ pub fn surface_point(tri: (Point, Point, Point), bary: (f32, f32, f32)) -> (Poin
         + (tri.1.into_vector().abs() * bary.1)
         + (tri.2.into_vector().abs() * bary.2))
         * fp_gamma(7))
-    .co
+    .0
     .max_element();
 
     (pos, pos_err)
diff --git a/src/surface/triangle_mesh.rs b/src/surface/triangle_mesh.rs
index 6b16ab6..ae82dac 100644
--- a/src/surface/triangle_mesh.rs
+++ b/src/surface/triangle_mesh.rs
@@ -7,7 +7,7 @@ use crate::{
     bbox::BBox,
     boundable::Boundable,
     lerp::lerp_slice,
-    math::{cross, dot, Normal, Point, Transform},
+    math::{cross, dot, Normal, Point, Xform, XformFull},
     ray::{RayBatch, RayStack},
     shading::SurfaceShader,
 };
@@ -128,13 +128,17 @@ impl<'a> Surface for TriangleMesh<'a> {
         ray_stack: &mut RayStack,
         isects: &mut [SurfaceIntersection],
         shader: &dyn SurfaceShader,
-        space: &[Transform],
+        space: &[Xform],
     ) {
         // Precalculate transform for non-motion blur cases
         let static_mat_space = if space.len() == 1 {
-            lerp_slice(space, 0.0).inverse()
+            if let Some(xform) = lerp_slice(space, 0.0).into_full() {
+                xform
+            } else {
+                return;
+            }
         } else {
-            Transform::new()
+            XformFull::identity()
         };
 
         self.accel
@@ -160,11 +164,11 @@ impl<'a> Surface for TriangleMesh<'a> {
                             );
                             if !space.is_empty() {
                                 (*tri_cache[i].as_mut_ptr()).0 =
-                                    (*tri_cache[i].as_mut_ptr()).0 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).0.xform(&static_mat_space);
                                 (*tri_cache[i].as_mut_ptr()).1 =
-                                    (*tri_cache[i].as_mut_ptr()).1 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).1.xform(&static_mat_space);
                                 (*tri_cache[i].as_mut_ptr()).2 =
-                                    (*tri_cache[i].as_mut_ptr()).2 * static_mat_space;
+                                    (*tri_cache[i].as_mut_ptr()).2.xform(&static_mat_space);
                             }
                         }
                     }
@@ -183,7 +187,11 @@ impl<'a> Surface for TriangleMesh<'a> {
                     // Calculate the ray space, if necessary.
                     let mat_space = if space.len() > 1 {
                         // Per-ray transform, for motion blur
-                        lerp_slice(space, ray_time).inverse()
+                        if let Some(xform) = lerp_slice(space, ray_time).into_full() {
+                            xform
+                        } else {
+                            return;
+                        }
                     } else {
                         static_mat_space
                     };
@@ -229,9 +237,9 @@ impl<'a> Surface for TriangleMesh<'a> {
                             };
 
                             if !space.is_empty() {
-                                tri.0 = tri.0 * mat_space;
-                                tri.1 = tri.1 * mat_space;
-                                tri.2 = tri.2 * mat_space;
+                                tri.0 = tri.0.xform(&mat_space);
+                                tri.1 = tri.1.xform(&mat_space);
+                                tri.2 = tri.2.xform(&mat_space);
                             }
 
                             tri
@@ -289,7 +297,7 @@ impl<'a> Surface for TriangleMesh<'a> {
                             let n1 = lerp_slice(n1_slice, ray_time).normalized();
                             let n2 = lerp_slice(n2_slice, ray_time).normalized();
 
-                            let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
+                            let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space);
                             if dot(s_nor, geo_normal) >= 0.0 {
                                 s_nor
                             } else {
diff --git a/src/tracer.rs b/src/tracer.rs
index 8e85db6..88833d3 100644
--- a/src/tracer.rs
+++ b/src/tracer.rs
@@ -4,7 +4,7 @@ use crate::{
     accel::ray_code,
     color::{rec709_to_xyz, Color},
     lerp::lerp_slice,
-    math::Transform,
+    math::XformFull,
     ray::{RayBatch, RayStack},
     scene::{Assembly, InstanceType, Object},
     shading::{SimpleSurfaceShader, SurfaceShader},
@@ -63,7 +63,7 @@ impl<'a> TracerInner<'a> {
 
         // Prep the accel part of the rays.
         {
-            let ident = Transform::new();
+            let ident = XformFull::identity();
             for i in 0..rays.len() {
                 rays.update_local(i, &ident);
             }
@@ -105,7 +105,15 @@ impl<'a> TracerInner<'a> {
                     let xforms = self.xform_stack.top();
                     ray_stack.do_next_task(|ray_idx| {
                         let t = rays.time(ray_idx);
-                        rays.update_local(ray_idx, &lerp_slice(xforms, t));
+                        rays.update_local(
+                            ray_idx,
+                            &if let Some(xform) = lerp_slice(xforms, t).into_full() {
+                                xform
+                            } else {
+                                // TODO: filter out ray instead.
+                                XformFull::identity()
+                            },
+                        );
                     });
                     ray_stack.duplicate_next_task();
                 }
@@ -137,10 +145,18 @@ impl<'a> TracerInner<'a> {
                     if !xforms.is_empty() {
                         ray_stack.pop_do_next_task(|ray_idx| {
                             let t = rays.time(ray_idx);
-                            rays.update_local(ray_idx, &lerp_slice(xforms, t));
+                            rays.update_local(
+                                ray_idx,
+                                &if let Some(xform) = lerp_slice(xforms, t).into_full() {
+                                    xform
+                                } else {
+                                    // TODO: filter out ray instead.
+                                    XformFull::identity()
+                                },
+                            );
                         });
                     } else {
-                        let ident = Transform::new();
+                        let ident = XformFull::identity();
                         ray_stack.pop_do_next_task(|ray_idx| {
                             rays.update_local(ray_idx, &ident);
                         });
diff --git a/src/transform_stack.rs b/src/transform_stack.rs
index 8219799..ce5356d 100644
--- a/src/transform_stack.rs
+++ b/src/transform_stack.rs
@@ -3,10 +3,10 @@ use std::{
     mem::{transmute, MaybeUninit},
 };
 
-use crate::{algorithm::merge_slices_to, math::Transform};
+use crate::{algorithm::merge_slices_to, math::Xform};
 
 pub struct TransformStack {
-    stack: Vec<MaybeUninit<Transform>>,
+    stack: Vec<MaybeUninit<Xform>>,
     stack_indices: Vec<usize>,
 }
 
@@ -30,11 +30,11 @@ impl TransformStack {
         self.stack_indices.push(0);
     }
 
-    pub fn push(&mut self, xforms: &[Transform]) {
+    pub fn push(&mut self, xforms: &[Xform]) {
         assert!(!xforms.is_empty());
 
         if self.stack.is_empty() {
-            let xforms: &[MaybeUninit<Transform>] = unsafe { transmute(xforms) };
+            let xforms: &[MaybeUninit<Xform>] = unsafe { transmute(xforms) };
             self.stack.extend(xforms);
         } else {
             let sil = self.stack_indices.len();
@@ -54,7 +54,7 @@ impl TransformStack {
                 unsafe { transmute(&xfs1[i1..i2]) },
                 xforms,
                 xfs2,
-                |xf1, xf2| *xf1 * *xf2,
+                |xf1, xf2| xf2.compose(xf1),
             );
         }
 
@@ -73,7 +73,7 @@ impl TransformStack {
         self.stack_indices.pop();
     }
 
-    pub fn top(&self) -> &[Transform] {
+    pub fn top(&self) -> &[Xform] {
         let sil = self.stack_indices.len();
         let i1 = self.stack_indices[sil - 2];
         let i2 = self.stack_indices[sil - 1];
diff --git a/sub_crates/rmath/src/normal.rs b/sub_crates/rmath/src/normal.rs
index 950ac38..64160a8 100644
--- a/sub_crates/rmath/src/normal.rs
+++ b/sub_crates/rmath/src/normal.rs
@@ -56,6 +56,16 @@ impl Normal {
         self.0.c()
     }
 
+    #[inline(always)]
+    pub fn get_n(self, i: usize) -> f32 {
+        match i {
+            0 => self.x(),
+            1 => self.y(),
+            2 => self.z(),
+            _ => panic!("Out of bounds index into 3D vector."),
+        }
+    }
+
     #[inline(always)]
     #[must_use]
     pub fn set_x(self, x: f32) -> Self {
diff --git a/sub_crates/rmath/src/point.rs b/sub_crates/rmath/src/point.rs
index 669335b..f260d74 100644
--- a/sub_crates/rmath/src/point.rs
+++ b/sub_crates/rmath/src/point.rs
@@ -47,6 +47,16 @@ impl Point {
         self.0.c()
     }
 
+    #[inline(always)]
+    pub fn get_n(self, i: usize) -> f32 {
+        match i {
+            0 => self.x(),
+            1 => self.y(),
+            2 => self.z(),
+            _ => panic!("Out of bounds index into 3D vector."),
+        }
+    }
+
     #[inline(always)]
     #[must_use]
     pub fn set_x(self, x: f32) -> Self {
diff --git a/sub_crates/rmath/src/vector.rs b/sub_crates/rmath/src/vector.rs
index 8cdf424..21491a2 100644
--- a/sub_crates/rmath/src/vector.rs
+++ b/sub_crates/rmath/src/vector.rs
@@ -37,6 +37,11 @@ impl Vector {
         Self(self.0 / self.length())
     }
 
+    #[inline(always)]
+    pub fn abs(self) -> Self {
+        Self(self.0.abs())
+    }
+
     #[inline(always)]
     pub fn into_point(self) -> Point {
         Point(self.0)
@@ -62,6 +67,16 @@ impl Vector {
         self.0.c()
     }
 
+    #[inline(always)]
+    pub fn get_n(self, i: usize) -> f32 {
+        match i {
+            0 => self.x(),
+            1 => self.y(),
+            2 => self.z(),
+            _ => panic!("Out of bounds index into 3D vector."),
+        }
+    }
+
     #[inline(always)]
     #[must_use]
     pub fn set_x(self, x: f32) -> Self {
diff --git a/sub_crates/rmath/src/wide4.rs b/sub_crates/rmath/src/wide4.rs
index 6398d41..4e449e2 100644
--- a/sub_crates/rmath/src/wide4.rs
+++ b/sub_crates/rmath/src/wide4.rs
@@ -9,7 +9,7 @@ use crate::{difference_of_products, two_prod, two_sum};
 
 pub use fallback::{Bool4, Float4};
 mod fallback {
-    use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Sub};
+    use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
 
     use crate::FMulAdd;
 
@@ -65,28 +65,44 @@ mod fallback {
             ])
         }
 
-        // /// Horizontal minimum.
-        // #[inline(always)]
-        // pub fn hmin(self) -> f32 {
-        //     let a = self.0[0].min(self.0[1]);
-        //     let b = self.0[2].min(self.0[3]);
-        //     a.min(b)
-        // }
+        /// Horizontal minimum.
+        #[inline(always)]
+        pub fn min_element(self) -> f32 {
+            let a = self.0[0].min(self.0[1]);
+            let b = self.0[2].min(self.0[3]);
+            a.min(b)
+        }
 
-        // /// Horizontal maximum.
-        // #[inline(always)]
-        // pub fn hmax(self) -> f32 {
-        //     let a = self.0[0].max(self.0[1]);
-        //     let b = self.0[2].max(self.0[3]);
-        //     a.max(b)
-        // }
+        /// Horizontal maximum.
+        #[inline(always)]
+        pub fn max_element(self) -> f32 {
+            let a = self.0[0].max(self.0[1]);
+            let b = self.0[2].max(self.0[3]);
+            a.max(b)
+        }
+
+        /// 1.0 / self
+        #[inline(always)]
+        pub fn recip(self) -> Self {
+            Float4::splat(1.0) / self
+        }
+
+        #[inline(always)]
+        pub fn abs(self) -> Self {
+            Float4::new(
+                self.a().abs(),
+                self.b().abs(),
+                self.c().abs(),
+                self.d().abs(),
+            )
+        }
 
         //-----------------------------------------------------
         // Comparisons.
 
         /// Less than.
         #[inline(always)]
-        pub fn lt(self, rhs: Self) -> Bool4 {
+        pub fn cmplt(self, rhs: Self) -> Bool4 {
             Bool4([
                 self.0[0] < rhs.0[0],
                 self.0[1] < rhs.0[1],
@@ -97,7 +113,7 @@ mod fallback {
 
         /// Less than or equal.
         #[inline(always)]
-        pub fn lte(self, rhs: Self) -> Bool4 {
+        pub fn cmplte(self, rhs: Self) -> Bool4 {
             Bool4([
                 self.0[0] <= rhs.0[0],
                 self.0[1] <= rhs.0[1],
@@ -108,7 +124,7 @@ mod fallback {
 
         /// Greater than.
         #[inline(always)]
-        pub fn gt(self, rhs: Self) -> Bool4 {
+        pub fn cmpgt(self, rhs: Self) -> Bool4 {
             Bool4([
                 self.0[0] > rhs.0[0],
                 self.0[1] > rhs.0[1],
@@ -119,7 +135,7 @@ mod fallback {
 
         /// Greater than or equal.
         #[inline(always)]
-        pub fn gte(self, rhs: Self) -> Bool4 {
+        pub fn cmpgte(self, rhs: Self) -> Bool4 {
             Bool4([
                 self.0[0] >= rhs.0[0],
                 self.0[1] >= rhs.0[1],
@@ -130,7 +146,7 @@ mod fallback {
 
         /// Equal.
         #[inline(always)]
-        pub fn eq(self, rhs: Self) -> Bool4 {
+        pub fn cmpeq(self, rhs: Self) -> Bool4 {
             Bool4([
                 self.0[0] == rhs.0[0],
                 self.0[1] == rhs.0[1],
@@ -232,6 +248,15 @@ mod fallback {
         }
     }
 
+    impl Index<usize> for Float4 {
+        type Output = f32;
+
+        #[inline(always)]
+        fn index(&self, idx: usize) -> &f32 {
+            &self.0[idx]
+        }
+    }
+
     impl Add for Float4 {
         type Output = Self;
 
@@ -339,18 +364,34 @@ mod fallback {
     pub struct Bool4([bool; 4]);
 
     impl Bool4 {
+        #[inline(always)]
+        pub fn new_false() -> Self {
+            Self([false, false, false, false])
+        }
+
         #[inline(always)]
         pub fn to_bools(self) -> [bool; 4] {
             self.0
         }
 
+        /// Note: `a` goes to the least significant bit.
         #[inline(always)]
-        pub fn to_bitmask(self) -> u8 {
+        pub fn bitmask(self) -> u8 {
             self.0[0] as u8
                 | ((self.0[1] as u8) << 1)
                 | ((self.0[2] as u8) << 2)
                 | ((self.0[3] as u8) << 3)
         }
+
+        #[inline(always)]
+        pub fn any(self) -> bool {
+            self.0[0] | &self.0[1] | self.0[2] | self.0[3]
+        }
+
+        #[inline(always)]
+        pub fn all(self) -> bool {
+            self.0[0] & &self.0[1] & self.0[2] & self.0[3]
+        }
     }
 
     impl BitAnd for Bool4 {
@@ -698,6 +739,12 @@ impl Float4 {
     }
 }
 
+impl From<Float4> for (f32, f32, f32, f32) {
+    fn from(v: Float4) -> (f32, f32, f32, f32) {
+        (v.a(), v.b(), v.c(), v.d())
+    }
+}
+
 impl AddAssign for Float4 {
     #[inline(always)]
     fn add_assign(&mut self, rhs: Self) {
@@ -743,7 +790,7 @@ impl DivAssign<f32> for Float4 {
 impl PartialEq for Float4 {
     #[inline(always)]
     fn eq(&self, rhs: &Self) -> bool {
-        Self::eq(*self, *rhs).to_bitmask() == 0b1111
+        self.cmpeq(*rhs).bitmask() == 0b1111
     }
 }
 
diff --git a/sub_crates/rmath/src/xform.rs b/sub_crates/rmath/src/xform.rs
index 1e476ec..a6479c1 100644
--- a/sub_crates/rmath/src/xform.rs
+++ b/sub_crates/rmath/src/xform.rs
@@ -184,6 +184,24 @@ pub struct XformFull {
     pub t: Float4,          // Forward translation.
 }
 
+impl XformFull {
+    pub fn identity() -> Self {
+        Self {
+            m: [
+                Float4::new(1.0, 0.0, 0.0, 0.0),
+                Float4::new(0.0, 1.0, 0.0, 0.0),
+                Float4::new(0.0, 0.0, 1.0, 0.0),
+            ],
+            m_inv: [
+                Float4::new(1.0, 0.0, 0.0, 0.0),
+                Float4::new(0.0, 1.0, 0.0, 0.0),
+                Float4::new(0.0, 0.0, 1.0, 0.0),
+            ],
+            t: Float4::splat(0.0),
+        }
+    }
+}
+
 //-------------------------------------------------------------
 
 #[cfg(test)]
diff --git a/sub_crates/spectral_upsampling/Cargo.toml b/sub_crates/spectral_upsampling/Cargo.toml
index 88be212..939bcf1 100644
--- a/sub_crates/spectral_upsampling/Cargo.toml
+++ b/sub_crates/spectral_upsampling/Cargo.toml
@@ -10,4 +10,4 @@ name = "spectral_upsampling"
 path = "src/lib.rs"
 
 [dependencies]
-glam = "0.15"
\ No newline at end of file
+rmath = { path = "../rmath" }
\ No newline at end of file
diff --git a/sub_crates/spectral_upsampling/src/jakob.rs b/sub_crates/spectral_upsampling/src/jakob.rs
index 9a8e07c..9288dd0 100644
--- a/sub_crates/spectral_upsampling/src/jakob.rs
+++ b/sub_crates/spectral_upsampling/src/jakob.rs
@@ -6,7 +6,7 @@
 /// The provides similar color matching as full Jakob, at the expense of
 /// somewhat lower quality spectrums, and the inability to precalculate
 /// the coefficents for even more efficient evaluation later on.
-use glam::Vec4;
+use rmath::wide4::Float4;
 
 /// How many polynomial coefficients?
 const RGB2SPEC_N_COEFFS: usize = 3;
@@ -15,7 +15,7 @@ const RGB2SPEC_N_COEFFS: usize = 3;
 include!(concat!(env!("OUT_DIR"), "/jakob_table_inc.rs"));
 
 #[inline]
-pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
+pub fn rec709_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
     small_rgb_to_spectrum_p4(
         REC709_TABLE,
         REC709_TABLE_RES,
@@ -26,7 +26,7 @@ pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
 }
 
 #[inline]
-pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
+pub fn rec2020_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
     small_rgb_to_spectrum_p4(
         REC2020_TABLE,
         REC2020_TABLE_RES,
@@ -37,7 +37,7 @@ pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
 }
 
 #[inline]
-pub fn aces_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
+pub fn aces_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
     small_rgb_to_spectrum_p4(
         ACES_TABLE,
         ACES_TABLE_RES,
@@ -56,9 +56,9 @@ fn small_rgb_to_spectrum_p4(
     table: &[[(f32, f32, f32); 2]],
     table_res: usize,
     table_mid_value: f32,
-    lambdas: Vec4,
+    lambdas: Float4,
     rgb: (f32, f32, f32),
-) -> Vec4 {
+) -> Float4 {
     // Determine largest RGB component, and calculate the other two
     // components scaled for lookups.
     let (i, max_val, x, y) = if rgb.0 > rgb.1 && rgb.0 > rgb.2 {
@@ -71,7 +71,7 @@ fn small_rgb_to_spectrum_p4(
     if max_val == 0.0 {
         // If max_val is zero, just return zero.  This avoids NaN's from
         // divide by zero.  This is also correct, since it's black.
-        return Vec4::splat(0.0);
+        return Float4::splat(0.0);
     }
     let x = x * 63.0 / max_val;
     let y = y * 63.0 / max_val;
@@ -91,20 +91,20 @@ fn small_rgb_to_spectrum_p4(
 
     // Convert to SIMD format for faster interpolation.
     let a0 = [
-        Vec4::new(a0[0].0, a0[0].1, a0[0].2, 0.0),
-        Vec4::new(a0[1].0, a0[1].1, a0[1].2, 0.0),
+        Float4::new(a0[0].0, a0[0].1, a0[0].2, 0.0),
+        Float4::new(a0[1].0, a0[1].1, a0[1].2, 0.0),
     ];
     let a1 = [
-        Vec4::new(a1[0].0, a1[0].1, a1[0].2, 0.0),
-        Vec4::new(a1[1].0, a1[1].1, a1[1].2, 0.0),
+        Float4::new(a1[0].0, a1[0].1, a1[0].2, 0.0),
+        Float4::new(a1[1].0, a1[1].1, a1[1].2, 0.0),
     ];
     let a2 = [
-        Vec4::new(a2[0].0, a2[0].1, a2[0].2, 0.0),
-        Vec4::new(a2[1].0, a2[1].1, a2[1].2, 0.0),
+        Float4::new(a2[0].0, a2[0].1, a2[0].2, 0.0),
+        Float4::new(a2[1].0, a2[1].1, a2[1].2, 0.0),
     ];
     let a3 = [
-        Vec4::new(a3[0].0, a3[0].1, a3[0].2, 0.0),
-        Vec4::new(a3[1].0, a3[1].1, a3[1].2, 0.0),
+        Float4::new(a3[0].0, a3[0].1, a3[0].2, 0.0),
+        Float4::new(a3[1].0, a3[1].1, a3[1].2, 0.0),
     ];
 
     // Do interpolation.
@@ -133,22 +133,22 @@ fn small_rgb_to_spectrum_p4(
 // Coefficient -> eval functions
 
 #[inline(always)]
-fn rgb2spec_fma_4(a: Vec4, b: Vec4, c: Vec4) -> Vec4 {
+fn rgb2spec_fma_4(a: Float4, b: Float4, c: Float4) -> Float4 {
     (a * b) + c
 }
 
-fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Vec4) -> Vec4 {
-    let co0 = Vec4::splat(coeff[0]);
-    let co1 = Vec4::splat(coeff[1]);
-    let co2 = Vec4::splat(coeff[2]);
+fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Float4) -> Float4 {
+    let co0 = Float4::splat(coeff[0]);
+    let co1 = Float4::splat(coeff[1]);
+    let co2 = Float4::splat(coeff[2]);
 
     let x = rgb2spec_fma_4(rgb2spec_fma_4(co0, lambda, co1), lambda, co2);
 
     let y = {
         // TODO: replace this with a SIMD sqrt op.
-        let (x, y, z, w) = rgb2spec_fma_4(x, x, Vec4::splat(1.0)).into();
-        Vec4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip()
+        let (x, y, z, w) = rgb2spec_fma_4(x, x, Float4::splat(1.0)).into();
+        Float4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip()
     };
 
-    rgb2spec_fma_4(Vec4::splat(0.5) * x, y, Vec4::splat(0.5))
+    rgb2spec_fma_4(Float4::splat(0.5) * x, y, Float4::splat(0.5))
 }
diff --git a/sub_crates/spectral_upsampling/src/meng.rs b/sub_crates/spectral_upsampling/src/meng.rs
index f3ad3e7..d75c89d 100644
--- a/sub_crates/spectral_upsampling/src/meng.rs
+++ b/sub_crates/spectral_upsampling/src/meng.rs
@@ -6,7 +6,7 @@
 
 use std::f32;
 
-use glam::Vec4;
+use rmath::wide4::Float4;
 
 mod meng_spectra_tables;
 
@@ -174,7 +174,7 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
 ///
 /// Works on 4 wavelengths at once via SIMD.
 #[inline]
-pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
+pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 {
     assert!(lambdas.min_element() >= SPECTRUM_SAMPLE_MIN);
     assert!(lambdas.max_element() <= SPECTRUM_SAMPLE_MAX);
 
@@ -184,7 +184,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
         if norm < f32::MAX {
             norm
         } else {
-            return Vec4::splat(0.0);
+            return Float4::splat(0.0);
         }
     };
 
@@ -197,7 +197,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
         || uv.1 < 0.0
         || uv.1 >= SPECTRUM_GRID_HEIGHT as f32
     {
-        return Vec4::splat(0.0);
+        return Float4::splat(0.0);
     }
 
     let uvi = (uv.0 as i32, uv.1 as i32);
@@ -214,11 +214,11 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
 
     // If the cell has no points, nothing we can do, so return 0.0
     if num == 0 {
-        return Vec4::splat(0.0);
+        return Float4::splat(0.0);
     }
 
     // Normalize lambda to spectrum table index range.
-    let sb: Vec4 = (lambdas - Vec4::splat(SPECTRUM_SAMPLE_MIN))
+    let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN))
         / (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN)
         * (SPECTRUM_NUM_SAMPLES as f32 - 1.0);
     debug_assert!(sb.min_element() >= 0.0);
@@ -226,7 +226,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
 
     // Get the spectral values for the vertices of the grid cell.
     // TODO: use integer SIMD intrinsics to make this part faster.
-    let mut p = [Vec4::splat(0.0); 6];
+    let mut p = [Float4::splat(0.0); 6];
     let sb0: [i32; 4] = [sb[0] as i32, sb[1] as i32, sb[2] as i32, sb[3] as i32];
     assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES);
     let sb1: [i32; 4] = [
@@ -235,27 +235,27 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
         (sb[2] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1),
         (sb[3] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1),
     ];
-    let sbf = sb - Vec4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
+    let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
     for i in 0..(num as usize) {
         debug_assert!(idx[i] >= 0);
         let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum;
-        let p0 = Vec4::new(
+        let p0 = Float4::new(
             spectrum[sb0[0] as usize],
             spectrum[sb0[1] as usize],
             spectrum[sb0[2] as usize],
             spectrum[sb0[3] as usize],
         );
-        let p1 = Vec4::new(
+        let p1 = Float4::new(
             spectrum[sb1[0] as usize],
             spectrum[sb1[1] as usize],
             spectrum[sb1[2] as usize],
             spectrum[sb1[3] as usize],
         );
-        p[i] = p0 * (Vec4::splat(1.0) - sbf) + p1 * sbf;
+        p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf;
     }
 
     // Linearly interpolate the spectral power of the cell vertices.
-    let mut interpolated_p = Vec4::splat(0.0);
+    let mut interpolated_p = Float4::splat(0.0);
     if inside {
         // Fast path for normal inner quads:
         let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);