Fix bug resulting in cracks between triangles in some cases.

2022-08-03 10:23:26 -07:00 · 2022-08-03 10:23:26 -07:00 · 15cd261026
commit 15cd261026
parent 9569f772f0
2 changed files with 40 additions and 3 deletions
--- a/sub_crates/rmath/src/wide4/mod.rs
+++ b/sub_crates/rmath/src/wide4/mod.rs
@ -748,4 +748,38 @@ mod tests {
        assert_eq!(all ^ all, none);
        assert_eq!(none ^ none, none);
    }
+
+    #[test]
+    fn matches_fallback() {
+        fn tf1(n: Float4) -> [f32; 4] {
+            [n.a(), n.b(), n.c(), n.d()]
+        }
+        fn tf2(n: fallback::Float4) -> [f32; 4] {
+            [n.a(), n.b(), n.c(), n.d()]
+        }
+
+        let a1 = Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
+        let b1 = Float4::new(74.63, -9.65436, 3.0, -1003.3);
+        let c1 = Float4::new(-0.4216, -132.52, 8.9452, 42.0);
+
+        let a2 = fallback::Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
+        let b2 = fallback::Float4::new(74.63, -9.65436, 3.0, -1003.3);
+        let c2 = fallback::Float4::new(-0.4216, -132.52, 8.9452, 42.0);
+
+        assert_eq!(tf1(a1), tf2(a2));
+        assert_eq!(tf1(b1), tf2(b2));
+        assert_eq!(tf1(c1), tf2(c2));
+
+        assert_eq!(tf1(a1 + b1), tf2(a2 + b2));
+        assert_eq!(tf1(a1 - b1), tf2(a2 - b2));
+        assert_eq!(tf1(a1 * b1), tf2(a2 * b2));
+        assert_eq!(tf1(a1 / b1), tf2(a2 / b2));
+        assert_eq!(tf1(a1.mul_add(b1, c1)), tf2(a2.mul_add(b2, c2)));
+        assert_eq!(tf1(a1.min(b1)), tf2(a2.min(b2)));
+        assert_eq!(tf1(a1.max(b1)), tf2(a2.max(b2)));
+        assert_eq!(a1.min_element(), a2.min_element());
+        assert_eq!(a1.max_element(), a2.max_element());
+        assert_eq!(tf1(a1.recip()), tf2(a2.recip()));
+        assert_eq!(tf1(a1.abs()), tf2(a2.abs()));
+    }
 }
--- a/sub_crates/rmath/src/wide4/sse.rs
+++ b/sub_crates/rmath/src/wide4/sse.rs
@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
 use std::arch::x86_64::{
    __m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
    _mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
-    _mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps,
-    _mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
+    _mm_mul_ps, _mm_or_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps, _mm_setzero_ps,
+    _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
 };

 use crate::FMulAdd;
@ -77,7 +77,10 @@ impl Float4 {
    /// 1.0 / self
    #[inline(always)]
    pub fn recip(self) -> Self {
-        Self(unsafe { _mm_rcp_ps(self.0) })
+        // The reciprocal intrinsic is not precise enough.
+        // Self(unsafe { std::arch::x86_64::_mm_rcp_ps(self.0) })
+
+        Self::splat(1.0) / self
    }

    #[inline(always)]