diff --git a/sub_crates/rmath/src/wide4/mod.rs b/sub_crates/rmath/src/wide4/mod.rs
index 9e4d72c..7fcdeb6 100644
--- a/sub_crates/rmath/src/wide4/mod.rs
+++ b/sub_crates/rmath/src/wide4/mod.rs
@@ -748,4 +748,38 @@ mod tests {
         assert_eq!(all ^ all, none);
         assert_eq!(none ^ none, none);
     }
+
+    #[test]
+    fn matches_fallback() {
+        fn tf1(n: Float4) -> [f32; 4] {
+            [n.a(), n.b(), n.c(), n.d()]
+        }
+        fn tf2(n: fallback::Float4) -> [f32; 4] {
+            [n.a(), n.b(), n.c(), n.d()]
+        }
+
+        let a1 = Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
+        let b1 = Float4::new(74.63, -9.65436, 3.0, -1003.3);
+        let c1 = Float4::new(-0.4216, -132.52, 8.9452, 42.0);
+
+        let a2 = fallback::Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
+        let b2 = fallback::Float4::new(74.63, -9.65436, 3.0, -1003.3);
+        let c2 = fallback::Float4::new(-0.4216, -132.52, 8.9452, 42.0);
+
+        assert_eq!(tf1(a1), tf2(a2));
+        assert_eq!(tf1(b1), tf2(b2));
+        assert_eq!(tf1(c1), tf2(c2));
+
+        assert_eq!(tf1(a1 + b1), tf2(a2 + b2));
+        assert_eq!(tf1(a1 - b1), tf2(a2 - b2));
+        assert_eq!(tf1(a1 * b1), tf2(a2 * b2));
+        assert_eq!(tf1(a1 / b1), tf2(a2 / b2));
+        assert_eq!(tf1(a1.mul_add(b1, c1)), tf2(a2.mul_add(b2, c2)));
+        assert_eq!(tf1(a1.min(b1)), tf2(a2.min(b2)));
+        assert_eq!(tf1(a1.max(b1)), tf2(a2.max(b2)));
+        assert_eq!(a1.min_element(), a2.min_element());
+        assert_eq!(a1.max_element(), a2.max_element());
+        assert_eq!(tf1(a1.recip()), tf2(a2.recip()));
+        assert_eq!(tf1(a1.abs()), tf2(a2.abs()));
+    }
 }
diff --git a/sub_crates/rmath/src/wide4/sse.rs b/sub_crates/rmath/src/wide4/sse.rs
index 6f45e27..a8511d0 100644
--- a/sub_crates/rmath/src/wide4/sse.rs
+++ b/sub_crates/rmath/src/wide4/sse.rs
@@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
 use std::arch::x86_64::{
     __m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
     _mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
-    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps,
-    _mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
+    _mm_mul_ps, _mm_or_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps, _mm_setzero_ps,
+    _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
 };
 
 use crate::FMulAdd;
@@ -77,7 +77,10 @@ impl Float4 {
     /// 1.0 / self
     #[inline(always)]
     pub fn recip(self) -> Self {
-        Self(unsafe { _mm_rcp_ps(self.0) })
+        // The reciprocal intrinsic is not precise enough.
+        // Self(unsafe { std::arch::x86_64::_mm_rcp_ps(self.0) })
+
+        Self::splat(1.0) / self
     }
 
     #[inline(always)]