use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign}; use approx::relative_eq; use crate::{difference_of_products, two_prod, two_sum}; pub use fallback::Float4; mod fallback { use std::ops::{Add, Div, Mul, Neg, Sub}; use crate::FMulAdd; #[derive(Debug, Copy, Clone)] #[repr(C, align(16))] pub struct Float4 { n: [f32; 4], } impl Float4 { /// Create a new `Float4` with the given components. #[inline(always)] pub fn new(a: f32, b: f32, c: f32, d: f32) -> Self { Self { n: [a, b, c, d] } } /// Create a new `Float4` with all elements set to `n`. #[inline(always)] pub fn splat(n: f32) -> Self { Self { n: [n, n, n, n] } } /// Component-wise fused multiply-add. /// /// `(self * a) + b` with only one rounding error. #[inline(always)] pub fn mul_add(self, a: Self, b: Self) -> Self { Self { n: [ self.n[0].mul_add(a.n[0], b.n[0]), self.n[1].mul_add(a.n[1], b.n[1]), self.n[2].mul_add(a.n[2], b.n[2]), self.n[3].mul_add(a.n[3], b.n[3]), ], } } /// Vertical minimum. #[inline(always)] pub fn min(self, a: Self) -> Self { Self { n: [ self.n[0].min(a.n[0]), self.n[1].min(a.n[1]), self.n[2].min(a.n[2]), self.n[3].min(a.n[3]), ], } } /// Vertical maximum. #[inline(always)] pub fn max(self, a: Self) -> Self { Self { n: [ self.n[0].max(a.n[0]), self.n[1].max(a.n[1]), self.n[2].max(a.n[2]), self.n[3].max(a.n[3]), ], } } // /// Horizontal minimum. // #[inline(always)] // pub fn hmin(self) -> f32 { // let a = self.n[0].min(self.n[1]); // let b = self.n[2].min(self.n[3]); // a.min(b) // } // /// Horizontal maximum. // #[inline(always)] // pub fn hmax(self) -> f32 { // let a = self.n[0].max(self.n[1]); // let b = self.n[2].max(self.n[3]); // a.max(b) // } //----------------------------------------------------- // Individual components. #[inline(always)] pub fn a(self) -> f32 { self.n[0] } #[inline(always)] pub fn b(self) -> f32 { self.n[1] } #[inline(always)] pub fn c(self) -> f32 { self.n[2] } #[inline(always)] pub fn d(self) -> f32 { self.n[3] } #[inline(always)] #[must_use] pub fn set_a(self, n: f32) -> Self { Self { n: [n, self.n[1], self.n[2], self.n[3]], } } #[inline(always)] #[must_use] pub fn set_b(self, n: f32) -> Self { Self { n: [self.n[0], n, self.n[2], self.n[3]], } } #[inline(always)] #[must_use] pub fn set_c(self, n: f32) -> Self { Self { n: [self.n[0], self.n[1], n, self.n[3]], } } #[inline(always)] #[must_use] pub fn set_d(self, n: f32) -> Self { Self { n: [self.n[0], self.n[1], self.n[2], n], } } //----------------------------------------------------- // Shuffles. #[inline(always)] pub fn aaaa(self) -> Self { let a = self.n[0]; Self { n: [a, a, a, a] } } #[inline(always)] pub fn bbbb(self) -> Self { let b = self.n[1]; Self { n: [b, b, b, b] } } #[inline(always)] pub fn cccc(self) -> Self { let c = self.n[2]; Self { n: [c, c, c, c] } } #[inline(always)] pub fn dddd(self) -> Self { let d = self.n[3]; Self { n: [d, d, d, d] } } #[inline(always)] pub fn bcad(self) -> Self { let a = self.n[0]; let b = self.n[1]; let c = self.n[2]; let d = self.n[3]; Self { n: [b, c, a, d] } } #[inline(always)] pub fn cabd(self) -> Self { let a = self.n[0]; let b = self.n[1]; let c = self.n[2]; let d = self.n[3]; Self { n: [c, a, b, d] } } } impl Add for Float4 { type Output = Self; #[inline(always)] fn add(self, rhs: Self) -> Self { Self { n: [ self.n[0] + rhs.n[0], self.n[1] + rhs.n[1], self.n[2] + rhs.n[2], self.n[3] + rhs.n[3], ], } } } impl Sub for Float4 { type Output = Self; #[inline(always)] fn sub(self, rhs: Self) -> Self { Self { n: [ self.n[0] - rhs.n[0], self.n[1] - rhs.n[1], self.n[2] - rhs.n[2], self.n[3] - rhs.n[3], ], } } } impl Mul for Float4 { type Output = Self; #[inline(always)] fn mul(self, rhs: Self) -> Self { Self { n: [ self.n[0] * rhs.n[0], self.n[1] * rhs.n[1], self.n[2] * rhs.n[2], self.n[3] * rhs.n[3], ], } } } impl Mul for Float4 { type Output = Self; #[inline(always)] fn mul(self, rhs: f32) -> Self { Self { n: [ self.n[0] * rhs, self.n[1] * rhs, self.n[2] * rhs, self.n[3] * rhs, ], } } } impl Div for Float4 { type Output = Self; #[inline(always)] fn div(self, rhs: Self) -> Self { Self { n: [ self.n[0] / rhs.n[0], self.n[1] / rhs.n[1], self.n[2] / rhs.n[2], self.n[3] / rhs.n[3], ], } } } impl Div for Float4 { type Output = Self; #[inline(always)] fn div(self, rhs: f32) -> Self { Self { n: [ self.n[0] / rhs, self.n[1] / rhs, self.n[2] / rhs, self.n[3] / rhs, ], } } } impl Neg for Float4 { type Output = Self; #[inline(always)] fn neg(self) -> Self { Self { n: [-self.n[0], -self.n[1], -self.n[2], -self.n[3]], } } } impl FMulAdd for Float4 { fn fma(self, b: Self, c: Self) -> Self { self.mul_add(b, c) } } } //------------------------------------------------------------- // Float4 impls that don't depend on its inner representation. impl Float4 { /// 3D dot product (only uses the first 3 components). #[inline(always)] pub fn dot_3(a: Self, b: Self) -> f32 { let (p, p_err) = two_prod(a, b); // Products. let (x, x_err) = (p.a(), p_err.a()); let (y, y_err) = (p.b(), p_err.b()); let (z, z_err) = (p.c(), p_err.c()); // Sums. let (s1, s1_err) = two_sum(x, y); let err1 = x_err + (y_err + s1_err); let (s2, s2_err) = two_sum(s1, z); let err2 = z_err + (err1 + s2_err); // Final result with rounding error compensation. s2 + err2 } /// 3D dot product (only uses the first 3 components). /// /// Faster but less precise version. #[inline(always)] pub fn dot_3_fast(a: Self, b: Self) -> f32 { let c = a * b; c.a() + c.b() + c.c() } #[inline(always)] pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] { [ // The fourth component in each row below is arbitrary, // but in this case chosen so that it matches the // behavior of the SSE version of transpose_3x3. Self::new(m[0].a(), m[1].a(), m[2].a(), m[2].d()), Self::new(m[0].b(), m[1].b(), m[2].b(), m[2].d()), Self::new(m[0].c(), m[1].c(), m[2].c(), m[2].d()), ] } /// Invert a 3x3 matrix. /// /// Returns `None` if not invertible. #[inline] pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> { let m0_bca = m[0].bcad(); let m1_bca = m[1].bcad(); let m2_bca = m[2].bcad(); let m0_cab = m[0].cabd(); let m1_cab = m[1].cabd(); let m2_cab = m[2].cabd(); let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca); let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca); let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca); let det = Self::dot_3( Self::new(abc.a(), def.a(), ghi.a(), 0.0), Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0), ); if det == 0.0 { None } else { Some(Self::transpose_3x3([abc / det, def / det, ghi / det])) } } /// Invert a 3x3 matrix. Faster but less precise version. /// /// Returns `None` if not invertible. #[inline] pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> { let m0_bca = m[0].bcad(); let m1_bca = m[1].bcad(); let m2_bca = m[2].bcad(); let m0_cab = m[0].cabd(); let m1_cab = m[1].cabd(); let m2_cab = m[2].cabd(); let abc = (m1_bca * m2_cab) - (m1_cab * m2_bca); let def = (m2_bca * m0_cab) - (m2_cab * m0_bca); let ghi = (m0_bca * m1_cab) - (m0_cab * m1_bca); let det = Self::dot_3_fast( Self::new(abc.a(), def.a(), ghi.a(), 0.0), Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0), ); if det == 0.0 { None } else { Some(Self::transpose_3x3([abc / det, def / det, ghi / det])) } } /// Multiplies a 3D vector with a 3x3 matrix. #[inline] pub fn vec_mul_3x3(self, m: &[Self; 3]) -> Self { let x = self.aaaa(); let y = self.bbbb(); let z = self.cccc(); // Products. let (a, a_err) = two_prod(x, m[0]); let (b, b_err) = two_prod(y, m[1]); let (c, c_err) = two_prod(z, m[2]); // Sums. let (s1, s1_err) = two_sum(a, b); let err1 = a_err + (b_err + s1_err); let (s2, s2_err) = two_sum(c, s1); let err2 = c_err + (err1 + s2_err); s2 + err2 } /// Multiplies a 3D vector with a 3x3 matrix. /// /// Faster but less precise version. #[inline] pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self { let x = self.aaaa(); let y = self.bbbb(); let z = self.cccc(); (x * m[0]) + (y * m[1]) + (z * m[2]) } /// Transforms a 3d point by an affine transform. /// /// `m` is the 3x3 part of the affine transform, `t` is the translation part. #[inline] pub fn vec_mul_affine(self, m: &[Self; 3], t: Self) -> Self { let x = self.aaaa(); let y = self.bbbb(); let z = self.cccc(); // Products. let (a, a_err) = two_prod(x, m[0]); let (b, b_err) = two_prod(y, m[1]); let (c, c_err) = two_prod(z, m[2]); // Sums. let (s1, s1_err) = two_sum(a, b); let err1 = a_err + (b_err + s1_err); let (s2, s2_err) = two_sum(c, s1); let err2 = c_err + (err1 + s2_err); let (s3, s3_err) = two_sum(t, s2); let err3 = err2 + s3_err; s3 + err3 } /// Transforms a 3d point by an affine transform. /// /// Faster but less precise version. #[inline] pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self { let x = self.aaaa(); let y = self.bbbb(); let z = self.cccc(); (x * m[0]) + (y * m[1]) + (z * m[2]) + t } /// Transforms a 3d point by an affine transform, except it applies /// the translation part before the 3x3 part. /// /// This is primarily useful for performing efficient inverse transforms by /// passing an inverted 3x3 part and a negated translation part. /// /// `m` is the 3x3 part of the affine transform, `t` is the translation part. #[inline] pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self { let (v, v_err) = two_sum(self, t); let (x, x_err) = (v.aaaa(), v_err.aaaa()); let (y, y_err) = (v.bbbb(), v_err.bbbb()); let (z, z_err) = (v.cccc(), v_err.cccc()); // Products. let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]); let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]); let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]); let a_err = a_err1 + a_err2; let b_err = b_err1 + b_err2; let c_err = c_err1 + c_err2; // Sums. let (s1, s1_err) = two_sum(a, b); let err1 = a_err + (b_err + s1_err); let (s2, s2_err) = two_sum(c, s1); let err2 = c_err + (err1 + s2_err); let (s3, s3_err) = two_sum(t, s2); let err3 = err2 + s3_err; s3 + err3 } /// Transforms a 3d point by an affine transform, except it applies /// the translation part before the 3x3 part. /// /// Faster but less precise version. #[inline] pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self { let v = self + t; let x = v.aaaa(); let y = v.bbbb(); let z = v.cccc(); (x * m[0]) + (y * m[1]) + (z * m[2]) } /// Returns whether the `Float4`s are approximately equal to each /// other. /// /// Each corresponding element cannot have a relative error exceeding /// `epsilon`. pub(crate) fn aprx_eq(a: Self, b: Self, epsilon: f32) -> bool { let mut eq = true; eq &= relative_eq!(a.a(), b.a(), epsilon = epsilon); eq &= relative_eq!(a.b(), b.b(), epsilon = epsilon); eq &= relative_eq!(a.c(), b.c(), epsilon = epsilon); eq &= relative_eq!(a.d(), b.d(), epsilon = epsilon); eq } } impl AddAssign for Float4 { #[inline(always)] fn add_assign(&mut self, rhs: Self) { *self = *self + rhs; } } impl SubAssign for Float4 { #[inline(always)] fn sub_assign(&mut self, rhs: Self) { *self = *self - rhs; } } impl MulAssign for Float4 { #[inline(always)] fn mul_assign(&mut self, rhs: Self) { *self = *self * rhs; } } impl MulAssign for Float4 { #[inline(always)] fn mul_assign(&mut self, rhs: f32) { *self = *self * rhs; } } impl DivAssign for Float4 { #[inline(always)] fn div_assign(&mut self, rhs: Self) { *self = *self / rhs; } } impl DivAssign for Float4 { #[inline(always)] fn div_assign(&mut self, rhs: f32) { *self = *self / rhs; } }