diff --git a/Cargo.lock b/Cargo.lock
index 308032f..15243e4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,5 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+version = 3
+
 [[package]]
 name = "ansi_term"
 version = "0.11.0"
@@ -245,7 +247,6 @@ name = "math3d"
 version = "0.1.0"
 dependencies = [
  "approx",
- "glam",
 ]
 
 [[package]]
diff --git a/sub_crates/math3d/Cargo.toml b/sub_crates/math3d/Cargo.toml
index dba6e93..18fd257 100644
--- a/sub_crates/math3d/Cargo.toml
+++ b/sub_crates/math3d/Cargo.toml
@@ -9,7 +9,5 @@ license = "MIT, Apache 2.0"
 name = "math3d"
 path = "src/lib.rs"
 
-# Local crate dependencies
 [dependencies]
-glam = "0.15"
 approx = "0.4"
diff --git a/sub_crates/math3d/src/lib.rs b/sub_crates/math3d/src/lib.rs
index 3c51eed..53ba427 100644
--- a/sub_crates/math3d/src/lib.rs
+++ b/sub_crates/math3d/src/lib.rs
@@ -1,28 +1,34 @@
 #![allow(dead_code)]
 
-mod normal;
-mod point;
-mod transform;
-mod vector;
+pub mod mat3x3;
+pub mod normal;
+pub mod point;
+pub mod transform;
+pub mod transform_dual;
+pub mod vector;
+pub mod wide4;
 
-pub use self::{normal::Normal, point::Point, transform::Transform, vector::Vector};
+pub use self::{
+    normal::Normal, point::Point, transform::Transform, transform_dual::TransformDual,
+    vector::Vector,
+};
 
-/// Trait for calculating dot products.
-pub trait DotProduct {
-    fn dot(self, other: Self) -> f32;
-}
+// /// Trait for calculating dot products.
+// pub trait DotProduct {
+//     fn dot(self, other: Self) -> f32;
+// }
 
-#[inline]
-pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
-    a.dot(b)
-}
+// #[inline]
+// pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
+//     a.dot(b)
+// }
 
-/// Trait for calculating cross products.
-pub trait CrossProduct {
-    fn cross(self, other: Self) -> Self;
-}
+// /// Trait for calculating cross products.
+// pub trait CrossProduct {
+//     fn cross(self, other: Self) -> Self;
+// }
 
-#[inline]
-pub fn cross<T: CrossProduct>(a: T, b: T) -> T {
-    a.cross(b)
-}
+// #[inline]
+// pub fn cross<T: CrossProduct>(a: T, b: T) -> T {
+//     a.cross(b)
+// }
diff --git a/sub_crates/math3d/src/mat3x3.rs b/sub_crates/math3d/src/mat3x3.rs
new file mode 100644
index 0000000..0be112e
--- /dev/null
+++ b/sub_crates/math3d/src/mat3x3.rs
@@ -0,0 +1,83 @@
+use std::ops::{Add, Div, Mul};
+
+use crate::wide4::f32x4;
+
+/// A 3x3 matrix.
+///
+/// Internally this is actually 4x3 to take advantage of SIMD.
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub(crate) struct Mat3x3(pub(crate) [f32x4; 3]);
+
+impl Mat3x3 {
+    #[inline(always)]
+    pub fn new(a: f32x4, b: f32x4, c: f32x4) -> Self {
+        Self([a, b, c])
+    }
+
+    pub fn identity() -> Self {
+        Self([
+            f32x4::new(1.0, 0.0, 0.0, 0.0),
+            f32x4::new(0.0, 1.0, 0.0, 0.0),
+            f32x4::new(0.0, 0.0, 1.0, 0.0),
+        ])
+    }
+
+    #[must_use]
+    #[inline]
+    pub fn inverse(self) -> Self {
+        todo!()
+    }
+
+    #[must_use]
+    #[inline]
+    pub fn inverse_precise(self) -> Self {
+        todo!()
+    }
+
+    #[must_use]
+    #[inline]
+    pub fn transpose(self) -> Self {
+        todo!()
+    }
+}
+
+impl Add for Mat3x3 {
+    type Output = Self;
+
+    #[inline(always)]
+    fn add(self, rhs: Self) -> Self {
+        Self([
+            self.0[0] + rhs.0[0],
+            self.0[1] + rhs.0[1],
+            self.0[2] + rhs.0[2],
+        ])
+    }
+}
+
+impl Mul for Mat3x3 {
+    type Output = Self;
+
+    #[inline]
+    fn mul(self, _rhs: Self) -> Self {
+        todo!()
+    }
+}
+
+impl Mul<f32> for Mat3x3 {
+    type Output = Self;
+
+    #[inline(always)]
+    fn mul(self, rhs: f32) -> Self {
+        Self([self.0[0] * rhs, self.0[1] * rhs, self.0[2] * rhs])
+    }
+}
+
+impl Div<f32> for Mat3x3 {
+    type Output = Self;
+
+    #[inline(always)]
+    fn div(self, rhs: f32) -> Self {
+        Self([self.0[0] / rhs, self.0[1] / rhs, self.0[2] / rhs])
+    }
+}
diff --git a/sub_crates/math3d/src/normal.rs b/sub_crates/math3d/src/normal.rs
index c5d55a7..b0cd469 100644
--- a/sub_crates/math3d/src/normal.rs
+++ b/sub_crates/math3d/src/normal.rs
@@ -1,270 +1,244 @@
 #![allow(dead_code)]
 
-use std::{
-    cmp::PartialEq,
-    ops::{Add, Div, Mul, Neg, Sub},
-};
+use std::ops::{Add, Div, Mul, Neg, Sub};
 
-use glam::Vec3A;
+use crate::wide4::f32x4;
 
-use super::{CrossProduct, DotProduct, Transform, Vector};
+use crate::Vector;
 
-/// A surface normal in 3d homogeneous space.
+/// A surface normal in 3D space.
 #[derive(Debug, Copy, Clone)]
-pub struct Normal {
-    pub co: Vec3A,
-}
+#[repr(transparent)]
+pub struct Normal(pub(crate) f32x4);
 
 impl Normal {
     #[inline(always)]
-    pub fn new(x: f32, y: f32, z: f32) -> Normal {
-        Normal {
-            co: Vec3A::new(x, y, z),
-        }
+    pub fn new(x: f32, y: f32, z: f32) -> Self {
+        Self(f32x4::new(x, y, z, 0.0))
     }
 
     #[inline(always)]
-    pub fn length(&self) -> f32 {
-        self.co.length()
+    pub fn length(self) -> f32 {
+        self.length2().sqrt()
     }
 
     #[inline(always)]
-    pub fn length2(&self) -> f32 {
-        self.co.length_squared()
+    pub fn length2(self) -> f32 {
+        let sqr = self.0 * self.0;
+        sqr.a() + sqr.b() + sqr.c()
     }
 
     #[inline(always)]
-    pub fn normalized(&self) -> Normal {
-        Normal {
-            co: self.co.normalize(),
-        }
+    #[must_use]
+    pub fn normalized(self) -> Self {
+        Self(self.0 / self.length())
     }
 
     #[inline(always)]
     pub fn into_vector(self) -> Vector {
-        Vector { co: self.co }
+        Vector(self.0)
     }
 
     #[inline(always)]
-    pub fn get_n(&self, n: usize) -> f32 {
-        match n {
-            0 => self.x(),
-            1 => self.y(),
-            2 => self.z(),
-            _ => panic!("Attempt to access dimension beyond z."),
-        }
+    pub fn x(self) -> f32 {
+        self.0.a()
     }
 
     #[inline(always)]
-    pub fn x(&self) -> f32 {
-        self.co[0]
+    pub fn y(self) -> f32 {
+        self.0.b()
     }
 
     #[inline(always)]
-    pub fn y(&self) -> f32 {
-        self.co[1]
+    pub fn z(self) -> f32 {
+        self.0.c()
     }
 
     #[inline(always)]
-    pub fn z(&self) -> f32 {
-        self.co[2]
+    #[must_use]
+    pub fn set_x(self, x: f32) -> Self {
+        Self(self.0.set_a(x))
     }
 
     #[inline(always)]
-    pub fn set_x(&mut self, x: f32) {
-        self.co[0] = x;
+    #[must_use]
+    pub fn set_y(self, y: f32) -> Self {
+        Self(self.0.set_b(y))
     }
 
     #[inline(always)]
-    pub fn set_y(&mut self, y: f32) {
-        self.co[1] = y;
-    }
-
-    #[inline(always)]
-    pub fn set_z(&mut self, z: f32) {
-        self.co[2] = z;
-    }
-}
-
-impl PartialEq for Normal {
-    #[inline(always)]
-    fn eq(&self, other: &Normal) -> bool {
-        self.co == other.co
+    #[must_use]
+    pub fn set_z(self, z: f32) -> Self {
+        Self(self.0.set_c(z))
     }
 }
 
 impl Add for Normal {
-    type Output = Normal;
+    type Output = Self;
 
     #[inline(always)]
-    fn add(self, other: Normal) -> Normal {
-        Normal {
-            co: self.co + other.co,
-        }
+    fn add(self, other: Self) -> Self {
+        Self(self.0 + other.0)
     }
 }
 
 impl Sub for Normal {
-    type Output = Normal;
+    type Output = Self;
 
     #[inline(always)]
-    fn sub(self, other: Normal) -> Normal {
-        Normal {
-            co: self.co - other.co,
-        }
+    fn sub(self, other: Self) -> Self {
+        Self(self.0 - other.0)
     }
 }
 
 impl Mul<f32> for Normal {
-    type Output = Normal;
+    type Output = Self;
 
     #[inline(always)]
-    fn mul(self, other: f32) -> Normal {
-        Normal {
-            co: self.co * other,
-        }
+    fn mul(self, other: f32) -> Self {
+        Self(self.0 * other)
     }
 }
 
-impl Mul<Transform> for Normal {
-    type Output = Normal;
+// impl Mul<Transform> for Normal {
+//     type Output = Normal;
 
-    #[inline]
-    fn mul(self, other: Transform) -> Normal {
-        Normal {
-            co: other.0.matrix3.inverse().transpose().mul_vec3a(self.co),
-        }
-    }
-}
+//     #[inline]
+//     fn mul(self, other: Transform) -> Normal {
+//         Normal {
+//             co: other.0.matrix3.inverse().transpose().mul_vec3a(self.co),
+//         }
+//     }
+// }
 
 impl Div<f32> for Normal {
-    type Output = Normal;
+    type Output = Self;
 
     #[inline(always)]
-    fn div(self, other: f32) -> Normal {
-        Normal {
-            co: self.co / other,
-        }
+    fn div(self, other: f32) -> Self {
+        Self(self.0 / other)
     }
 }
 
 impl Neg for Normal {
-    type Output = Normal;
+    type Output = Self;
 
     #[inline(always)]
-    fn neg(self) -> Normal {
-        Normal { co: self.co * -1.0 }
+    fn neg(self) -> Self {
+        Self(-self.0)
     }
 }
 
-impl DotProduct for Normal {
-    #[inline(always)]
-    fn dot(self, other: Normal) -> f32 {
-        self.co.dot(other.co)
-    }
-}
+// impl DotProduct for Normal {
+//     #[inline(always)]
+//     fn dot(self, other: Normal) -> f32 {
+//         self.co.dot(other.co)
+//     }
+// }
 
-impl CrossProduct for Normal {
-    #[inline]
-    fn cross(self, other: Normal) -> Normal {
-        Normal {
-            co: self.co.cross(other.co),
-        }
-    }
-}
+// impl CrossProduct for Normal {
+//     #[inline]
+//     fn cross(self, other: Normal) -> Normal {
+//         Normal {
+//             co: self.co.cross(other.co),
+//         }
+//     }
+// }
 
-#[cfg(test)]
-mod tests {
-    use super::super::{CrossProduct, DotProduct, Transform};
-    use super::*;
-    use approx::assert_ulps_eq;
+//-------------------------------------------------------------
 
-    #[test]
-    fn add() {
-        let v1 = Normal::new(1.0, 2.0, 3.0);
-        let v2 = Normal::new(1.5, 4.5, 2.5);
-        let v3 = Normal::new(2.5, 6.5, 5.5);
+// #[cfg(test)]
+// mod tests {
+//     use super::super::{CrossProduct, DotProduct, Transform};
+//     use super::*;
+//     use approx::assert_ulps_eq;
 
-        assert_eq!(v3, v1 + v2);
-    }
+//     #[test]
+//     fn add() {
+//         let v1 = Normal::new(1.0, 2.0, 3.0);
+//         let v2 = Normal::new(1.5, 4.5, 2.5);
+//         let v3 = Normal::new(2.5, 6.5, 5.5);
 
-    #[test]
-    fn sub() {
-        let v1 = Normal::new(1.0, 2.0, 3.0);
-        let v2 = Normal::new(1.5, 4.5, 2.5);
-        let v3 = Normal::new(-0.5, -2.5, 0.5);
+//         assert_eq!(v3, v1 + v2);
+//     }
 
-        assert_eq!(v3, v1 - v2);
-    }
+//     #[test]
+//     fn sub() {
+//         let v1 = Normal::new(1.0, 2.0, 3.0);
+//         let v2 = Normal::new(1.5, 4.5, 2.5);
+//         let v3 = Normal::new(-0.5, -2.5, 0.5);
 
-    #[test]
-    fn mul_scalar() {
-        let v1 = Normal::new(1.0, 2.0, 3.0);
-        let v2 = 2.0;
-        let v3 = Normal::new(2.0, 4.0, 6.0);
+//         assert_eq!(v3, v1 - v2);
+//     }
 
-        assert_eq!(v3, v1 * v2);
-    }
+//     #[test]
+//     fn mul_scalar() {
+//         let v1 = Normal::new(1.0, 2.0, 3.0);
+//         let v2 = 2.0;
+//         let v3 = Normal::new(2.0, 4.0, 6.0);
 
-    #[test]
-    fn mul_matrix_1() {
-        let n = Normal::new(1.0, 2.5, 4.0);
-        let m = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        let nm = n * m;
-        let nm2 = Normal::new(-4.0625, 1.78125, -0.03125);
-        for i in 0..3 {
-            assert_ulps_eq!(nm.co[i], nm2.co[i], max_ulps = 4);
-        }
-    }
+//         assert_eq!(v3, v1 * v2);
+//     }
 
-    #[test]
-    fn div() {
-        let v1 = Normal::new(1.0, 2.0, 3.0);
-        let v2 = 2.0;
-        let v3 = Normal::new(0.5, 1.0, 1.5);
+//     #[test]
+//     fn mul_matrix_1() {
+//         let n = Normal::new(1.0, 2.5, 4.0);
+//         let m = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         let nm = n * m;
+//         let nm2 = Normal::new(-4.0625, 1.78125, -0.03125);
+//         for i in 0..3 {
+//             assert_ulps_eq!(nm.co[i], nm2.co[i], max_ulps = 4);
+//         }
+//     }
 
-        assert_eq!(v3, v1 / v2);
-    }
+//     #[test]
+//     fn div() {
+//         let v1 = Normal::new(1.0, 2.0, 3.0);
+//         let v2 = 2.0;
+//         let v3 = Normal::new(0.5, 1.0, 1.5);
 
-    #[test]
-    fn length() {
-        let n = Normal::new(1.0, 2.0, 3.0);
-        assert!((n.length() - 3.7416573867739413).abs() < 0.000001);
-    }
+//         assert_eq!(v3, v1 / v2);
+//     }
 
-    #[test]
-    fn length2() {
-        let n = Normal::new(1.0, 2.0, 3.0);
-        assert_eq!(n.length2(), 14.0);
-    }
+//     #[test]
+//     fn length() {
+//         let n = Normal::new(1.0, 2.0, 3.0);
+//         assert!((n.length() - 3.7416573867739413).abs() < 0.000001);
+//     }
 
-    #[test]
-    fn normalized() {
-        let n1 = Normal::new(1.0, 2.0, 3.0);
-        let n2 = Normal::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
-        let n3 = n1.normalized();
-        assert!((n3.x() - n2.x()).abs() < 0.000001);
-        assert!((n3.y() - n2.y()).abs() < 0.000001);
-        assert!((n3.z() - n2.z()).abs() < 0.000001);
-    }
+//     #[test]
+//     fn length2() {
+//         let n = Normal::new(1.0, 2.0, 3.0);
+//         assert_eq!(n.length2(), 14.0);
+//     }
 
-    #[test]
-    fn dot_test() {
-        let v1 = Normal::new(1.0, 2.0, 3.0);
-        let v2 = Normal::new(1.5, 4.5, 2.5);
-        let v3 = 18.0f32;
+//     #[test]
+//     fn normalized() {
+//         let n1 = Normal::new(1.0, 2.0, 3.0);
+//         let n2 = Normal::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
+//         let n3 = n1.normalized();
+//         assert!((n3.x() - n2.x()).abs() < 0.000001);
+//         assert!((n3.y() - n2.y()).abs() < 0.000001);
+//         assert!((n3.z() - n2.z()).abs() < 0.000001);
+//     }
 
-        assert_eq!(v3, v1.dot(v2));
-    }
+//     #[test]
+//     fn dot_test() {
+//         let v1 = Normal::new(1.0, 2.0, 3.0);
+//         let v2 = Normal::new(1.5, 4.5, 2.5);
+//         let v3 = 18.0f32;
 
-    #[test]
-    fn cross_test() {
-        let v1 = Normal::new(1.0, 0.0, 0.0);
-        let v2 = Normal::new(0.0, 1.0, 0.0);
-        let v3 = Normal::new(0.0, 0.0, 1.0);
+//         assert_eq!(v3, v1.dot(v2));
+//     }
 
-        assert_eq!(v3, v1.cross(v2));
-    }
-}
+//     #[test]
+//     fn cross_test() {
+//         let v1 = Normal::new(1.0, 0.0, 0.0);
+//         let v2 = Normal::new(0.0, 1.0, 0.0);
+//         let v3 = Normal::new(0.0, 0.0, 1.0);
+
+//         assert_eq!(v3, v1.cross(v2));
+//     }
+// }
diff --git a/sub_crates/math3d/src/point.rs b/sub_crates/math3d/src/point.rs
index 19deb56..a70d11e 100644
--- a/sub_crates/math3d/src/point.rs
+++ b/sub_crates/math3d/src/point.rs
@@ -1,109 +1,75 @@
 #![allow(dead_code)]
+use std::ops::{Add, Sub};
 
-use std::{
-    cmp::PartialEq,
-    ops::{Add, Mul, Sub},
-};
+use crate::vector::Vector;
+use crate::wide4::f32x4;
 
-use glam::Vec3A;
-
-use super::{Transform, Vector};
-
-/// A position in 3d homogeneous space.
+/// A position in 3D space.
 #[derive(Debug, Copy, Clone)]
-pub struct Point {
-    pub co: Vec3A,
-}
+#[repr(transparent)]
+pub struct Point(pub(crate) f32x4);
 
 impl Point {
     #[inline(always)]
-    pub fn new(x: f32, y: f32, z: f32) -> Point {
-        Point {
-            co: Vec3A::new(x, y, z),
-        }
+    pub fn new(x: f32, y: f32, z: f32) -> Self {
+        Self(f32x4::new(x, y, z, 0.0))
     }
 
     #[inline(always)]
-    pub fn min(&self, other: Point) -> Point {
-        let n1 = self;
-        let n2 = other;
-
-        Point {
-            co: n1.co.min(n2.co),
-        }
+    pub fn min(self, other: Self) -> Self {
+        Self(self.0.min(other.0))
     }
 
     #[inline(always)]
-    pub fn max(&self, other: Point) -> Point {
-        let n1 = self;
-        let n2 = other;
-
-        Point {
-            co: n1.co.max(n2.co),
-        }
+    pub fn max(self, other: Self) -> Self {
+        Self(self.0.max(other.0))
     }
 
     #[inline(always)]
     pub fn into_vector(self) -> Vector {
-        Vector { co: self.co }
+        Vector(self.0)
     }
 
     #[inline(always)]
-    pub fn get_n(&self, n: usize) -> f32 {
-        match n {
-            0 => self.x(),
-            1 => self.y(),
-            2 => self.z(),
-            _ => panic!("Attempt to access dimension beyond z."),
-        }
+    pub fn x(self) -> f32 {
+        self.0.a()
     }
 
     #[inline(always)]
-    pub fn x(&self) -> f32 {
-        self.co[0]
+    pub fn y(self) -> f32 {
+        self.0.b()
     }
 
     #[inline(always)]
-    pub fn y(&self) -> f32 {
-        self.co[1]
+    pub fn z(self) -> f32 {
+        self.0.c()
     }
 
     #[inline(always)]
-    pub fn z(&self) -> f32 {
-        self.co[2]
+    #[must_use]
+    pub fn set_x(self, x: f32) -> Self {
+        Self(self.0.set_a(x))
     }
 
     #[inline(always)]
-    pub fn set_x(&mut self, x: f32) {
-        self.co[0] = x;
+    #[must_use]
+    pub fn set_y(self, y: f32) -> Self {
+        Self(self.0.set_b(y))
     }
 
     #[inline(always)]
-    pub fn set_y(&mut self, y: f32) {
-        self.co[1] = y;
-    }
-
-    #[inline(always)]
-    pub fn set_z(&mut self, z: f32) {
-        self.co[2] = z;
-    }
-}
-
-impl PartialEq for Point {
-    #[inline(always)]
-    fn eq(&self, other: &Point) -> bool {
-        self.co == other.co
+    #[must_use]
+    pub fn set_z(self, z: f32) -> Self {
+        Self(self.0.set_c(z))
     }
 }
 
 impl Add<Vector> for Point {
-    type Output = Point;
+    type Output = Self;
 
     #[inline(always)]
-    fn add(self, other: Vector) -> Point {
-        Point {
-            co: self.co + other.co,
-        }
+    fn add(self, other: Vector) -> Self {
+        Self(self.0 + other.0)
     }
 }
 
@@ -111,92 +77,90 @@ impl Sub for Point {
     type Output = Vector;
 
     #[inline(always)]
-    fn sub(self, other: Point) -> Vector {
-        Vector {
-            co: self.co - other.co,
-        }
+    fn sub(self, other: Self) -> Vector {
+        Vector(self.0 - other.0)
     }
 }
 
 impl Sub<Vector> for Point {
-    type Output = Point;
+    type Output = Self;
 
     #[inline(always)]
-    fn sub(self, other: Vector) -> Point {
-        Point {
-            co: self.co - other.co,
-        }
+    fn sub(self, other: Vector) -> Self {
+        Self(self.0 - other.0)
     }
 }
 
-impl Mul<Transform> for Point {
-    type Output = Point;
+// impl Mul<Transform> for Point {
+//     type Output = Self;
 
-    #[inline]
-    fn mul(self, other: Transform) -> Point {
-        Point {
-            co: other.0.transform_point3a(self.co),
-        }
-    }
-}
+//     #[inline]
+//     fn mul(self, other: Transform) -> Self {
+//         Self {
+//             co: other.0.transform_point3a(self.0),
+//         }
+//     }
+// }
 
-#[cfg(test)]
-mod tests {
-    use super::super::{Transform, Vector};
-    use super::*;
+//-------------------------------------------------------------
 
-    #[test]
-    fn add() {
-        let p1 = Point::new(1.0, 2.0, 3.0);
-        let v1 = Vector::new(1.5, 4.5, 2.5);
-        let p2 = Point::new(2.5, 6.5, 5.5);
+// #[cfg(test)]
+// mod tests {
+//     use super::super::{Transform, Vector};
+//     use super::*;
 
-        assert_eq!(p2, p1 + v1);
-    }
+//     #[test]
+//     fn add() {
+//         let p1 = Point::new(1.0, 2.0, 3.0);
+//         let v1 = Vector::new(1.5, 4.5, 2.5);
+//         let p2 = Point::new(2.5, 6.5, 5.5);
 
-    #[test]
-    fn sub() {
-        let p1 = Point::new(1.0, 2.0, 3.0);
-        let p2 = Point::new(1.5, 4.5, 2.5);
-        let v1 = Vector::new(-0.5, -2.5, 0.5);
+//         assert_eq!(p2, p1 + v1);
+//     }
 
-        assert_eq!(v1, p1 - p2);
-    }
+//     #[test]
+//     fn sub() {
+//         let p1 = Point::new(1.0, 2.0, 3.0);
+//         let p2 = Point::new(1.5, 4.5, 2.5);
+//         let v1 = Vector::new(-0.5, -2.5, 0.5);
 
-    #[test]
-    fn mul_matrix_1() {
-        let p = Point::new(1.0, 2.5, 4.0);
-        let m = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        let pm = Point::new(15.5, 54.0, 70.0);
-        assert_eq!(p * m, pm);
-    }
+//         assert_eq!(v1, p1 - p2);
+//     }
 
-    #[test]
-    fn mul_matrix_2() {
-        let p = Point::new(1.0, 2.5, 4.0);
-        let m = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        let pm = Point::new(15.5, 54.0, 70.0);
-        assert_eq!(p * m, pm);
-    }
+//     #[test]
+//     fn mul_matrix_1() {
+//         let p = Point::new(1.0, 2.5, 4.0);
+//         let m = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         let pm = Point::new(15.5, 54.0, 70.0);
+//         assert_eq!(p * m, pm);
+//     }
 
-    #[test]
-    fn mul_matrix_3() {
-        // Make sure matrix multiplication composes the way one would expect
-        let p = Point::new(1.0, 2.5, 4.0);
-        let m1 = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        let m2 =
-            Transform::new_from_values(4.0, 1.0, 2.0, 3.5, 3.0, 6.0, 5.0, 2.0, 2.0, 2.0, 4.0, 12.0);
-        println!("{:?}", m1 * m2);
+//     #[test]
+//     fn mul_matrix_2() {
+//         let p = Point::new(1.0, 2.5, 4.0);
+//         let m = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         let pm = Point::new(15.5, 54.0, 70.0);
+//         assert_eq!(p * m, pm);
+//     }
 
-        let pmm1 = p * (m1 * m2);
-        let pmm2 = (p * m1) * m2;
+//     #[test]
+//     fn mul_matrix_3() {
+//         // Make sure matrix multiplication composes the way one would expect
+//         let p = Point::new(1.0, 2.5, 4.0);
+//         let m1 = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         let m2 =
+//             Transform::new_from_values(4.0, 1.0, 2.0, 3.5, 3.0, 6.0, 5.0, 2.0, 2.0, 2.0, 4.0, 12.0);
+//         println!("{:?}", m1 * m2);
 
-        assert!((pmm1 - pmm2).length2() <= 0.00001); // Assert pmm1 and pmm2 are roughly equal
-    }
-}
+//         let pmm1 = p * (m1 * m2);
+//         let pmm2 = (p * m1) * m2;
+
+//         assert!((pmm1 - pmm2).length2() <= 0.00001); // Assert pmm1 and pmm2 are roughly equal
+//     }
+// }
diff --git a/sub_crates/math3d/src/transform.rs b/sub_crates/math3d/src/transform.rs
index 29cd069..67a88e5 100644
--- a/sub_crates/math3d/src/transform.rs
+++ b/sub_crates/math3d/src/transform.rs
@@ -3,30 +3,34 @@
 use std::ops::{Add, Mul};
 
 use approx::relative_eq;
-use glam::{Affine3A, Mat3, Mat4, Vec3};
 
-use super::Point;
+use crate::mat3x3::Mat3x3;
+use crate::point::Point;
+use crate::transform_dual::TransformDual;
+use crate::wide4::f32x4;
 
-/// A 4x3 affine transform matrix, used for transforms.
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct Transform(pub Affine3A);
+/// An affine transform.
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub struct Transform {
+    pub(crate) m: Mat3x3, // Scale, rotation, and shear.
+    pub(crate) t: f32x4,  // Translation.
+}
 
 impl Transform {
-    /// Creates a new identity matrix
-    #[inline]
-    pub fn new() -> Transform {
-        Transform(Affine3A::IDENTITY)
-    }
-
-    /// Creates a new matrix with the specified values:
-    /// a b c d
-    /// e f g h
-    /// i j k l
-    /// m n o p
+    /// Creates a new affine transform the specified values:
+    ///
+    /// ```
+    /// a d g j
+    /// b e h k
+    /// c f i l
+    /// ```
+    ///
+    /// Where j, k, and l are the xyz translation component.
     #[inline]
     #[allow(clippy::many_single_char_names)]
     #[allow(clippy::too_many_arguments)]
-    pub fn new_from_values(
+    pub fn new(
         a: f32,
         b: f32,
         c: f32,
@@ -39,16 +43,32 @@ impl Transform {
         j: f32,
         k: f32,
         l: f32,
-    ) -> Transform {
-        Transform(Affine3A::from_mat3_translation(
-            Mat3::from_cols(Vec3::new(a, e, i), Vec3::new(b, f, j), Vec3::new(c, g, k)),
-            Vec3::new(d, h, l),
-        ))
+    ) -> Self {
+        Self {
+            m: Mat3x3::new(
+                f32x4::new(a, b, c, 0.0),
+                f32x4::new(d, e, f, 0.0),
+                f32x4::new(g, h, i, 0.0),
+            ),
+            t: f32x4::new(j, k, l, 0.0),
+        }
+    }
+
+    /// Creates a new identity transform.
+    #[inline]
+    pub fn identity() -> Self {
+        Self {
+            m: Mat3x3::identity(),
+            t: f32x4::splat(0.0),
+        }
     }
 
     #[inline]
     pub fn from_location(loc: Point) -> Transform {
-        Transform(Affine3A::from_translation(loc.co.into()))
+        Self {
+            m: Mat3x3::identity(),
+            t: loc.0,
+        }
     }
 
     /// Returns whether the matrices are approximately equal to each other.
@@ -57,51 +77,57 @@ impl Transform {
     #[inline]
     pub fn aprx_eq(&self, other: Transform, epsilon: f32) -> bool {
         let mut eq = true;
-        for c in 0..3 {
-            for r in 0..3 {
-                let a = self.0.matrix3.col(c)[r];
-                let b = other.0.matrix3.col(c)[r];
-                eq &= relative_eq!(a, b, epsilon = epsilon);
-            }
-        }
-        for i in 0..3 {
-            let a = self.0.translation[i];
-            let b = other.0.translation[i];
-            eq &= relative_eq!(a, b, epsilon = epsilon);
+        for (t1, t2) in self
+            .m
+            .0
+            .iter()
+            .chain(&[self.t])
+            .zip(other.m.0.iter().chain(&[other.t]))
+        {
+            eq &= relative_eq!(t1.a(), t2.a(), epsilon = epsilon);
+            eq &= relative_eq!(t1.b(), t2.b(), epsilon = epsilon);
+            eq &= relative_eq!(t1.c(), t2.c(), epsilon = epsilon);
         }
         eq
     }
 
     /// Returns the inverse of the Matrix
     #[inline]
-    pub fn inverse(&self) -> Transform {
-        Transform(self.0.inverse())
+    pub fn compute_dual(self) -> TransformDual {
+        TransformDual {
+            m: self.m,
+            m_inv: self.m.inverse(),
+            t: self.t,
+        }
     }
 }
 
 impl Default for Transform {
     fn default() -> Self {
-        Self::new()
+        Self::identity()
     }
 }
 
-/// Multiply two matrices together
-impl Mul for Transform {
-    type Output = Self;
+// /// Multiply two matrices together
+// impl Mul for Transform {
+//     type Output = Self;
 
-    #[inline]
-    fn mul(self, other: Self) -> Self {
-        Self(other.0 * self.0)
-    }
-}
+//     #[inline]
+//     fn mul(self, rhs: Self) -> Self {
+//         Self(rhs.0 * self.0)
+//     }
+// }
 
 /// Multiply a matrix by a f32
 impl Mul<f32> for Transform {
     type Output = Self;
 
     #[inline]
-    fn mul(self, other: f32) -> Self {
-        Self(Affine3A::from_mat4(Mat4::from(self.0) * other))
+    fn mul(self, rhs: f32) -> Self {
+        Self {
+            m: self.m * rhs,
+            t: self.t * rhs,
+        }
     }
 }
 
@@ -110,69 +136,72 @@ impl Add for Transform {
     type Output = Self;
 
     #[inline]
-    fn add(self, other: Self) -> Self {
-        Self(Affine3A::from_mat4(
-            Mat4::from(self.0) + Mat4::from(other.0),
-        ))
+    fn add(self, rhs: Self) -> Self {
+        Self {
+            m: self.m + rhs.m,
+            t: self.t + rhs.t,
+        }
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use super::*;
+//-------------------------------------------------------------
 
-    #[test]
-    fn equality_test() {
-        let a = Transform::new();
-        let b = Transform::new();
-        let c =
-            Transform::new_from_values(1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0);
+// #[cfg(test)]
+// mod tests {
+//     use super::*;
 
-        assert_eq!(a, b);
-        assert!(a != c);
-    }
+//     #[test]
+//     fn equality_test() {
+//         let a = Transform::new();
+//         let b = Transform::new();
+//         let c =
+//             Transform::new_from_values(1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0);
 
-    #[test]
-    fn approximate_equality_test() {
-        let a = Transform::new();
-        let b = Transform::new_from_values(
-            1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0,
-        );
-        let c = Transform::new_from_values(
-            1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0,
-        );
-        let d = Transform::new_from_values(
-            -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0,
-        );
+//         assert_eq!(a, b);
+//         assert!(a != c);
+//     }
 
-        assert!(a.aprx_eq(b, 0.000001));
-        assert!(!a.aprx_eq(c, 0.000001));
-        assert!(!a.aprx_eq(d, 0.000001));
-    }
+//     #[test]
+//     fn approximate_equality_test() {
+//         let a = Transform::new();
+//         let b = Transform::new_from_values(
+//             1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0,
+//         );
+//         let c = Transform::new_from_values(
+//             1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0,
+//         );
+//         let d = Transform::new_from_values(
+//             -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0,
+//         );
 
-    #[test]
-    fn multiply_test() {
-        let a = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        let b = Transform::new_from_values(
-            1.0, 5.0, 9.0, 13.0, 2.0, 6.0, 10.0, 14.0, 3.0, 7.0, 11.0, 15.0,
-        );
-        let c = Transform::new_from_values(
-            97.0, 50.0, 136.0, 162.5, 110.0, 60.0, 156.0, 185.0, 123.0, 70.0, 176.0, 207.5,
-        );
+//         assert!(a.aprx_eq(b, 0.000001));
+//         assert!(!a.aprx_eq(c, 0.000001));
+//         assert!(!a.aprx_eq(d, 0.000001));
+//     }
 
-        assert_eq!(a * b, c);
-    }
+//     #[test]
+//     fn multiply_test() {
+//         let a = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         let b = Transform::new_from_values(
+//             1.0, 5.0, 9.0, 13.0, 2.0, 6.0, 10.0, 14.0, 3.0, 7.0, 11.0, 15.0,
+//         );
+//         let c = Transform::new_from_values(
+//             97.0, 50.0, 136.0, 162.5, 110.0, 60.0, 156.0, 185.0, 123.0, 70.0, 176.0, 207.5,
+//         );
 
-    #[test]
-    fn inverse_test() {
-        let a = Transform::new_from_values(
-            1.0, 0.33, 0.0, -2.0, 0.0, 1.0, 0.0, 0.0, 2.1, 0.7, 1.3, 0.0,
-        );
-        let b = a.inverse();
-        let c = Transform::new();
+//         assert_eq!(a * b, c);
+//     }
 
-        assert!((dbg!(a * b)).aprx_eq(dbg!(c), 0.0000001));
-    }
-}
+//     #[test]
+//     fn inverse_test() {
+//         let a = Transform::new_from_values(
+//             1.0, 0.33, 0.0, -2.0, 0.0, 1.0, 0.0, 0.0, 2.1, 0.7, 1.3, 0.0,
+//         );
+//         let b = a.inverse();
+//         let c = Transform::new();
+
+//         assert!((dbg!(a * b)).aprx_eq(dbg!(c), 0.0000001));
+//     }
+// }
diff --git a/sub_crates/math3d/src/transform_dual.rs b/sub_crates/math3d/src/transform_dual.rs
new file mode 100644
index 0000000..d0f191e
--- /dev/null
+++ b/sub_crates/math3d/src/transform_dual.rs
@@ -0,0 +1,12 @@
+use crate::mat3x3::Mat3x3;
+use crate::wide4::f32x4;
+
+/// An affine transform with precomputed data for performing reverse
+/// transforms, among other things.
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub struct TransformDual {
+    pub(crate) m: Mat3x3,     // Scale, rotation, and shear.
+    pub(crate) m_inv: Mat3x3, // Inverse scale, rotation, and shear.
+    pub(crate) t: f32x4,      // Forward translation.
+}
diff --git a/sub_crates/math3d/src/vector.rs b/sub_crates/math3d/src/vector.rs
index 365e892..cecb4b6 100644
--- a/sub_crates/math3d/src/vector.rs
+++ b/sub_crates/math3d/src/vector.rs
@@ -1,286 +1,251 @@
 #![allow(dead_code)]
 
-use std::{
-    cmp::PartialEq,
-    ops::{Add, Div, Mul, Neg, Sub},
-};
+use std::ops::{Add, Div, Mul, Neg, Sub};
 
-use glam::Vec3A;
+use crate::normal::Normal;
+use crate::point::Point;
+use crate::wide4::f32x4;
 
-use super::{CrossProduct, DotProduct, Normal, Point, Transform};
-
-/// A direction vector in 3d homogeneous space.
+/// A direction vector in 3D space.
 #[derive(Debug, Copy, Clone)]
-pub struct Vector {
-    pub co: Vec3A,
-}
+#[repr(transparent)]
+pub struct Vector(pub(crate) f32x4);
 
 impl Vector {
     #[inline(always)]
-    pub fn new(x: f32, y: f32, z: f32) -> Vector {
-        Vector {
-            co: Vec3A::new(x, y, z),
-        }
+    pub fn new(x: f32, y: f32, z: f32) -> Self {
+        Self(f32x4::new(x, y, z, 0.0))
     }
 
     #[inline(always)]
-    pub fn length(&self) -> f32 {
-        self.co.length()
+    pub fn length(self) -> f32 {
+        self.length2().sqrt()
     }
 
     #[inline(always)]
-    pub fn length2(&self) -> f32 {
-        self.co.length_squared()
+    pub fn length2(self) -> f32 {
+        let sqr = self.0 * self.0;
+        sqr.a() + sqr.b() + sqr.c()
     }
 
     #[inline(always)]
-    pub fn normalized(&self) -> Vector {
-        Vector {
-            co: self.co.normalize(),
-        }
-    }
-
-    #[inline(always)]
-    pub fn abs(&self) -> Vector {
-        Vector {
-            co: self.co * self.co.signum(),
-        }
+    #[must_use]
+    pub fn normalized(self) -> Self {
+        Self(self.0 / self.length())
     }
 
     #[inline(always)]
     pub fn into_point(self) -> Point {
-        Point { co: self.co }
+        Point(self.0)
     }
 
     #[inline(always)]
     pub fn into_normal(self) -> Normal {
-        Normal { co: self.co }
+        Normal(self.0)
     }
 
     #[inline(always)]
-    pub fn get_n(&self, n: usize) -> f32 {
-        match n {
-            0 => self.x(),
-            1 => self.y(),
-            2 => self.z(),
-            _ => panic!("Attempt to access dimension beyond z."),
-        }
+    pub fn x(self) -> f32 {
+        self.0.a()
     }
 
     #[inline(always)]
-    pub fn x(&self) -> f32 {
-        self.co[0]
+    pub fn y(self) -> f32 {
+        self.0.b()
     }
 
     #[inline(always)]
-    pub fn y(&self) -> f32 {
-        self.co[1]
+    pub fn z(self) -> f32 {
+        self.0.c()
     }
 
     #[inline(always)]
-    pub fn z(&self) -> f32 {
-        self.co[2]
+    #[must_use]
+    pub fn set_x(self, x: f32) -> Self {
+        Self(self.0.set_a(x))
     }
 
     #[inline(always)]
-    pub fn set_x(&mut self, x: f32) {
-        self.co[0] = x;
+    #[must_use]
+    pub fn set_y(self, y: f32) -> Self {
+        Self(self.0.set_b(y))
     }
 
     #[inline(always)]
-    pub fn set_y(&mut self, y: f32) {
-        self.co[1] = y;
-    }
-
-    #[inline(always)]
-    pub fn set_z(&mut self, z: f32) {
-        self.co[2] = z;
-    }
-}
-
-impl PartialEq for Vector {
-    #[inline(always)]
-    fn eq(&self, other: &Vector) -> bool {
-        self.co == other.co
+    #[must_use]
+    pub fn set_z(self, z: f32) -> Self {
+        Self(self.0.set_c(z))
     }
 }
 
 impl Add for Vector {
-    type Output = Vector;
+    type Output = Self;
 
     #[inline(always)]
-    fn add(self, other: Vector) -> Vector {
-        Vector {
-            co: self.co + other.co,
-        }
+    fn add(self, other: Self) -> Self {
+        Self(self.0 + other.0)
     }
 }
 
 impl Sub for Vector {
-    type Output = Vector;
+    type Output = Self;
 
     #[inline(always)]
-    fn sub(self, other: Vector) -> Vector {
-        Vector {
-            co: self.co - other.co,
-        }
+    fn sub(self, other: Self) -> Self {
+        Self(self.0 - other.0)
     }
 }
 
 impl Mul<f32> for Vector {
-    type Output = Vector;
+    type Output = Self;
 
     #[inline(always)]
-    fn mul(self, other: f32) -> Vector {
-        Vector {
-            co: self.co * other,
-        }
+    fn mul(self, other: f32) -> Self {
+        Self(self.0 * other)
     }
 }
 
-impl Mul<Transform> for Vector {
-    type Output = Vector;
+// impl Mul<Transform> for Vector {
+//     type Output = Self;
 
-    #[inline]
-    fn mul(self, other: Transform) -> Vector {
-        Vector {
-            co: other.0.transform_vector3a(self.co),
-        }
-    }
-}
+//     #[inline]
+//     fn mul(self, other: Transform) -> Self {
+//         Self(other.0.transform_vector3a(self.0))
+//     }
+// }
 
 impl Div<f32> for Vector {
-    type Output = Vector;
+    type Output = Self;
 
     #[inline(always)]
-    fn div(self, other: f32) -> Vector {
-        Vector {
-            co: self.co / other,
-        }
+    fn div(self, other: f32) -> Self {
+        Self(self.0 / other)
     }
 }
 
 impl Neg for Vector {
-    type Output = Vector;
+    type Output = Self;
 
     #[inline(always)]
-    fn neg(self) -> Vector {
-        Vector { co: self.co * -1.0 }
+    fn neg(self) -> Self {
+        Self(-self.0)
     }
 }
 
-impl DotProduct for Vector {
-    #[inline(always)]
-    fn dot(self, other: Vector) -> f32 {
-        self.co.dot(other.co)
-    }
-}
+// impl DotProduct for Vector {
+//     #[inline(always)]
+//     fn dot(self, other: Self) -> f32 {
+//         self.co.dot(other.co)
+//     }
+// }
 
-impl CrossProduct for Vector {
-    #[inline]
-    fn cross(self, other: Vector) -> Vector {
-        Vector {
-            co: self.co.cross(other.co),
-        }
-    }
-}
+// impl CrossProduct for Vector {
+//     #[inline]
+//     fn cross(self, other: Self) -> Self {
+//         Self {
+//             co: self.co.cross(other.co),
+//         }
+//     }
+// }
 
-#[cfg(test)]
-mod tests {
-    use super::super::{CrossProduct, DotProduct, Transform};
-    use super::*;
+//-------------------------------------------------------------
 
-    #[test]
-    fn add() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = Vector::new(1.5, 4.5, 2.5);
-        let v3 = Vector::new(2.5, 6.5, 5.5);
+// #[cfg(test)]
+// mod tests {
+//     use super::super::{CrossProduct, DotProduct, Transform};
+//     use super::*;
 
-        assert_eq!(v3, v1 + v2);
-    }
+//     #[test]
+//     fn add() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = Vector::new(1.5, 4.5, 2.5);
+//         let v3 = Vector::new(2.5, 6.5, 5.5);
 
-    #[test]
-    fn sub() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = Vector::new(1.5, 4.5, 2.5);
-        let v3 = Vector::new(-0.5, -2.5, 0.5);
+//         assert_eq!(v3, v1 + v2);
+//     }
 
-        assert_eq!(v3, v1 - v2);
-    }
+//     #[test]
+//     fn sub() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = Vector::new(1.5, 4.5, 2.5);
+//         let v3 = Vector::new(-0.5, -2.5, 0.5);
 
-    #[test]
-    fn mul_scalar() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = 2.0;
-        let v3 = Vector::new(2.0, 4.0, 6.0);
+//         assert_eq!(v3, v1 - v2);
+//     }
 
-        assert_eq!(v3, v1 * v2);
-    }
+//     #[test]
+//     fn mul_scalar() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = 2.0;
+//         let v3 = Vector::new(2.0, 4.0, 6.0);
 
-    #[test]
-    fn mul_matrix_1() {
-        let v = Vector::new(1.0, 2.5, 4.0);
-        let m = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
-    }
+//         assert_eq!(v3, v1 * v2);
+//     }
 
-    #[test]
-    fn mul_matrix_2() {
-        let v = Vector::new(1.0, 2.5, 4.0);
-        let m = Transform::new_from_values(
-            1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
-        );
-        assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
-    }
+//     #[test]
+//     fn mul_matrix_1() {
+//         let v = Vector::new(1.0, 2.5, 4.0);
+//         let m = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
+//     }
 
-    #[test]
-    fn div() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = 2.0;
-        let v3 = Vector::new(0.5, 1.0, 1.5);
+//     #[test]
+//     fn mul_matrix_2() {
+//         let v = Vector::new(1.0, 2.5, 4.0);
+//         let m = Transform::new_from_values(
+//             1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
+//         );
+//         assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
+//     }
 
-        assert_eq!(v3, v1 / v2);
-    }
+//     #[test]
+//     fn div() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = 2.0;
+//         let v3 = Vector::new(0.5, 1.0, 1.5);
 
-    #[test]
-    fn length() {
-        let v = Vector::new(1.0, 2.0, 3.0);
-        assert!((v.length() - 3.7416573867739413).abs() < 0.000001);
-    }
+//         assert_eq!(v3, v1 / v2);
+//     }
 
-    #[test]
-    fn length2() {
-        let v = Vector::new(1.0, 2.0, 3.0);
-        assert_eq!(v.length2(), 14.0);
-    }
+//     #[test]
+//     fn length() {
+//         let v = Vector::new(1.0, 2.0, 3.0);
+//         assert!((v.length() - 3.7416573867739413).abs() < 0.000001);
+//     }
 
-    #[test]
-    fn normalized() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = Vector::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
-        let v3 = v1.normalized();
-        assert!((v3.x() - v2.x()).abs() < 0.000001);
-        assert!((v3.y() - v2.y()).abs() < 0.000001);
-        assert!((v3.z() - v2.z()).abs() < 0.000001);
-    }
+//     #[test]
+//     fn length2() {
+//         let v = Vector::new(1.0, 2.0, 3.0);
+//         assert_eq!(v.length2(), 14.0);
+//     }
 
-    #[test]
-    fn dot_test() {
-        let v1 = Vector::new(1.0, 2.0, 3.0);
-        let v2 = Vector::new(1.5, 4.5, 2.5);
-        let v3 = 18.0f32;
+//     #[test]
+//     fn normalized() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = Vector::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
+//         let v3 = v1.normalized();
+//         assert!((v3.x() - v2.x()).abs() < 0.000001);
+//         assert!((v3.y() - v2.y()).abs() < 0.000001);
+//         assert!((v3.z() - v2.z()).abs() < 0.000001);
+//     }
 
-        assert_eq!(v3, v1.dot(v2));
-    }
+//     #[test]
+//     fn dot_test() {
+//         let v1 = Vector::new(1.0, 2.0, 3.0);
+//         let v2 = Vector::new(1.5, 4.5, 2.5);
+//         let v3 = 18.0f32;
 
-    #[test]
-    fn cross_test() {
-        let v1 = Vector::new(1.0, 0.0, 0.0);
-        let v2 = Vector::new(0.0, 1.0, 0.0);
-        let v3 = Vector::new(0.0, 0.0, 1.0);
+//         assert_eq!(v3, v1.dot(v2));
+//     }
 
-        assert_eq!(v3, v1.cross(v2));
-    }
-}
+//     #[test]
+//     fn cross_test() {
+//         let v1 = Vector::new(1.0, 0.0, 0.0);
+//         let v2 = Vector::new(0.0, 1.0, 0.0);
+//         let v3 = Vector::new(0.0, 0.0, 1.0);
+
+//         assert_eq!(v3, v1.cross(v2));
+//     }
+// }
diff --git a/sub_crates/math3d/src/wide4.rs b/sub_crates/math3d/src/wide4.rs
new file mode 100644
index 0000000..3001e2f
--- /dev/null
+++ b/sub_crates/math3d/src/wide4.rs
@@ -0,0 +1,317 @@
+use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign};
+
+pub use fallback::f32x4;
+mod fallback {
+    use std::ops::{Add, Div, Mul, Neg, Sub};
+
+    #[allow(non_camel_case_types)]
+    #[derive(Debug, Copy, Clone)]
+    #[repr(C, align(16))]
+    pub struct f32x4 {
+        n: [f32; 4],
+    }
+
+    impl f32x4 {
+        /// Create a new `f32x4` with the given components.
+        #[inline(always)]
+        pub fn new(a: f32, b: f32, c: f32, d: f32) -> Self {
+            Self { n: [a, b, c, d] }
+        }
+
+        /// Create a new `f32x4` with all elements set to `n`.
+        #[inline(always)]
+        pub fn splat(n: f32) -> Self {
+            Self { n: [n, n, n, n] }
+        }
+
+        /// Component-wise fused multiply-add.
+        ///
+        /// `(self * a) + b` with only one rounding error.
+        #[inline(always)]
+        pub fn mul_add(self, a: Self, b: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0].mul_add(a.n[0], b.n[0]),
+                    self.n[1].mul_add(a.n[1], b.n[1]),
+                    self.n[2].mul_add(a.n[2], b.n[2]),
+                    self.n[3].mul_add(a.n[3], b.n[3]),
+                ],
+            }
+        }
+
+        /// Vertical minimum.
+        #[inline(always)]
+        pub fn min(self, a: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0].min(a.n[0]),
+                    self.n[1].min(a.n[1]),
+                    self.n[2].min(a.n[2]),
+                    self.n[3].min(a.n[3]),
+                ],
+            }
+        }
+
+        /// Vertical maximum.
+        #[inline(always)]
+        pub fn max(self, a: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0].max(a.n[0]),
+                    self.n[1].max(a.n[1]),
+                    self.n[2].max(a.n[2]),
+                    self.n[3].max(a.n[3]),
+                ],
+            }
+        }
+
+        // /// Horizontal minimum.
+        // #[inline(always)]
+        // pub fn hmin(self) -> f32 {
+        //     let a = self.n[0].min(self.n[1]);
+        //     let b = self.n[2].min(self.n[3]);
+        //     a.min(b)
+        // }
+
+        // /// Horizontal maximum.
+        // #[inline(always)]
+        // pub fn hmax(self) -> f32 {
+        //     let a = self.n[0].max(self.n[1]);
+        //     let b = self.n[2].max(self.n[3]);
+        //     a.max(b)
+        // }
+
+        //-----------------------------------------------------
+        // Individual components.
+
+        #[inline(always)]
+        pub fn a(self) -> f32 {
+            self.n[0]
+        }
+
+        #[inline(always)]
+        pub fn b(self) -> f32 {
+            self.n[1]
+        }
+
+        #[inline(always)]
+        pub fn c(self) -> f32 {
+            self.n[2]
+        }
+
+        #[inline(always)]
+        pub fn d(self) -> f32 {
+            self.n[3]
+        }
+
+        #[inline(always)]
+        #[must_use]
+        pub fn set_a(self, n: f32) -> Self {
+            Self {
+                n: [n, self.n[1], self.n[2], self.n[3]],
+            }
+        }
+
+        #[inline(always)]
+        #[must_use]
+        pub fn set_b(self, n: f32) -> Self {
+            Self {
+                n: [self.n[0], n, self.n[2], self.n[3]],
+            }
+        }
+
+        #[inline(always)]
+        #[must_use]
+        pub fn set_c(self, n: f32) -> Self {
+            Self {
+                n: [self.n[0], self.n[1], n, self.n[3]],
+            }
+        }
+
+        #[inline(always)]
+        #[must_use]
+        pub fn set_d(self, n: f32) -> Self {
+            Self {
+                n: [self.n[0], self.n[1], self.n[2], n],
+            }
+        }
+
+        //-----------------------------------------------------
+        // Shuffles.
+
+        #[inline(always)]
+        pub fn aaaa(self) -> Self {
+            let a = self.n[0];
+            Self { n: [a, a, a, a] }
+        }
+
+        #[inline(always)]
+        pub fn bbbb(self) -> Self {
+            let b = self.n[1];
+            Self { n: [b, b, b, b] }
+        }
+
+        #[inline(always)]
+        pub fn cccc(self) -> Self {
+            let c = self.n[2];
+            Self { n: [c, c, c, c] }
+        }
+
+        #[inline(always)]
+        pub fn dddd(self) -> Self {
+            let d = self.n[3];
+            Self { n: [d, d, d, d] }
+        }
+    }
+
+    impl Add for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn add(self, rhs: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0] + rhs.n[0],
+                    self.n[1] + rhs.n[1],
+                    self.n[2] + rhs.n[2],
+                    self.n[3] + rhs.n[3],
+                ],
+            }
+        }
+    }
+
+    impl Sub for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn sub(self, rhs: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0] - rhs.n[0],
+                    self.n[1] - rhs.n[1],
+                    self.n[2] - rhs.n[2],
+                    self.n[3] - rhs.n[3],
+                ],
+            }
+        }
+    }
+
+    impl Mul for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn mul(self, rhs: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0] * rhs.n[0],
+                    self.n[1] * rhs.n[1],
+                    self.n[2] * rhs.n[2],
+                    self.n[3] * rhs.n[3],
+                ],
+            }
+        }
+    }
+
+    impl Mul<f32> for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn mul(self, rhs: f32) -> Self {
+            Self {
+                n: [
+                    self.n[0] * rhs,
+                    self.n[1] * rhs,
+                    self.n[2] * rhs,
+                    self.n[3] * rhs,
+                ],
+            }
+        }
+    }
+
+    impl Div for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn div(self, rhs: Self) -> Self {
+            Self {
+                n: [
+                    self.n[0] / rhs.n[0],
+                    self.n[1] / rhs.n[1],
+                    self.n[2] / rhs.n[2],
+                    self.n[3] / rhs.n[3],
+                ],
+            }
+        }
+    }
+
+    impl Div<f32> for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn div(self, rhs: f32) -> Self {
+            Self {
+                n: [
+                    self.n[0] / rhs,
+                    self.n[1] / rhs,
+                    self.n[2] / rhs,
+                    self.n[3] / rhs,
+                ],
+            }
+        }
+    }
+
+    impl Neg for f32x4 {
+        type Output = Self;
+
+        #[inline(always)]
+        fn neg(self) -> Self {
+            Self {
+                n: [-self.n[0], -self.n[1], -self.n[2], -self.n[3]],
+            }
+        }
+    }
+}
+
+//-------------------------------------------------------------
+
+impl AddAssign for f32x4 {
+    #[inline(always)]
+    fn add_assign(&mut self, rhs: Self) {
+        *self = *self + rhs;
+    }
+}
+
+impl SubAssign for f32x4 {
+    #[inline(always)]
+    fn sub_assign(&mut self, rhs: Self) {
+        *self = *self - rhs;
+    }
+}
+
+impl MulAssign for f32x4 {
+    #[inline(always)]
+    fn mul_assign(&mut self, rhs: Self) {
+        *self = *self * rhs;
+    }
+}
+
+impl MulAssign<f32> for f32x4 {
+    #[inline(always)]
+    fn mul_assign(&mut self, rhs: f32) {
+        *self = *self * rhs;
+    }
+}
+
+impl DivAssign for f32x4 {
+    #[inline(always)]
+    fn div_assign(&mut self, rhs: Self) {
+        *self = *self / rhs;
+    }
+}
+
+impl DivAssign<f32> for f32x4 {
+    #[inline(always)]
+    fn div_assign(&mut self, rhs: f32) {
+        *self = *self / rhs;
+    }
+}