584 lines
15 KiB
Rust
584 lines
15 KiB
Rust
use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign};
|
|
|
|
use approx::relative_eq;
|
|
|
|
use crate::{difference_of_products, two_prod, two_sum};
|
|
|
|
pub use fallback::Float4;
|
|
mod fallback {
|
|
use std::ops::{Add, Div, Mul, Neg, Sub};
|
|
|
|
use crate::FMulAdd;
|
|
|
|
#[derive(Debug, Copy, Clone)]
|
|
#[repr(C, align(16))]
|
|
pub struct Float4 {
|
|
n: [f32; 4],
|
|
}
|
|
|
|
impl Float4 {
|
|
/// Create a new `Float4` with the given components.
|
|
#[inline(always)]
|
|
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Self {
|
|
Self { n: [a, b, c, d] }
|
|
}
|
|
|
|
/// Create a new `Float4` with all elements set to `n`.
|
|
#[inline(always)]
|
|
pub fn splat(n: f32) -> Self {
|
|
Self { n: [n, n, n, n] }
|
|
}
|
|
|
|
/// Component-wise fused multiply-add.
|
|
///
|
|
/// `(self * a) + b` with only one rounding error.
|
|
#[inline(always)]
|
|
pub fn mul_add(self, a: Self, b: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0].mul_add(a.n[0], b.n[0]),
|
|
self.n[1].mul_add(a.n[1], b.n[1]),
|
|
self.n[2].mul_add(a.n[2], b.n[2]),
|
|
self.n[3].mul_add(a.n[3], b.n[3]),
|
|
],
|
|
}
|
|
}
|
|
|
|
/// Vertical minimum.
|
|
#[inline(always)]
|
|
pub fn min(self, a: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0].min(a.n[0]),
|
|
self.n[1].min(a.n[1]),
|
|
self.n[2].min(a.n[2]),
|
|
self.n[3].min(a.n[3]),
|
|
],
|
|
}
|
|
}
|
|
|
|
/// Vertical maximum.
|
|
#[inline(always)]
|
|
pub fn max(self, a: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0].max(a.n[0]),
|
|
self.n[1].max(a.n[1]),
|
|
self.n[2].max(a.n[2]),
|
|
self.n[3].max(a.n[3]),
|
|
],
|
|
}
|
|
}
|
|
|
|
// /// Horizontal minimum.
|
|
// #[inline(always)]
|
|
// pub fn hmin(self) -> f32 {
|
|
// let a = self.n[0].min(self.n[1]);
|
|
// let b = self.n[2].min(self.n[3]);
|
|
// a.min(b)
|
|
// }
|
|
|
|
// /// Horizontal maximum.
|
|
// #[inline(always)]
|
|
// pub fn hmax(self) -> f32 {
|
|
// let a = self.n[0].max(self.n[1]);
|
|
// let b = self.n[2].max(self.n[3]);
|
|
// a.max(b)
|
|
// }
|
|
|
|
//-----------------------------------------------------
|
|
// Individual components.
|
|
|
|
#[inline(always)]
|
|
pub fn a(self) -> f32 {
|
|
self.n[0]
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn b(self) -> f32 {
|
|
self.n[1]
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn c(self) -> f32 {
|
|
self.n[2]
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn d(self) -> f32 {
|
|
self.n[3]
|
|
}
|
|
|
|
#[inline(always)]
|
|
#[must_use]
|
|
pub fn set_a(self, n: f32) -> Self {
|
|
Self {
|
|
n: [n, self.n[1], self.n[2], self.n[3]],
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
#[must_use]
|
|
pub fn set_b(self, n: f32) -> Self {
|
|
Self {
|
|
n: [self.n[0], n, self.n[2], self.n[3]],
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
#[must_use]
|
|
pub fn set_c(self, n: f32) -> Self {
|
|
Self {
|
|
n: [self.n[0], self.n[1], n, self.n[3]],
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
#[must_use]
|
|
pub fn set_d(self, n: f32) -> Self {
|
|
Self {
|
|
n: [self.n[0], self.n[1], self.n[2], n],
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------
|
|
// Shuffles.
|
|
|
|
#[inline(always)]
|
|
pub fn aaaa(self) -> Self {
|
|
let a = self.n[0];
|
|
Self { n: [a, a, a, a] }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn bbbb(self) -> Self {
|
|
let b = self.n[1];
|
|
Self { n: [b, b, b, b] }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn cccc(self) -> Self {
|
|
let c = self.n[2];
|
|
Self { n: [c, c, c, c] }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn dddd(self) -> Self {
|
|
let d = self.n[3];
|
|
Self { n: [d, d, d, d] }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn bcad(self) -> Self {
|
|
let a = self.n[0];
|
|
let b = self.n[1];
|
|
let c = self.n[2];
|
|
let d = self.n[3];
|
|
Self { n: [b, c, a, d] }
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn cabd(self) -> Self {
|
|
let a = self.n[0];
|
|
let b = self.n[1];
|
|
let c = self.n[2];
|
|
let d = self.n[3];
|
|
Self { n: [c, a, b, d] }
|
|
}
|
|
}
|
|
|
|
impl Add for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn add(self, rhs: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] + rhs.n[0],
|
|
self.n[1] + rhs.n[1],
|
|
self.n[2] + rhs.n[2],
|
|
self.n[3] + rhs.n[3],
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Sub for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn sub(self, rhs: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] - rhs.n[0],
|
|
self.n[1] - rhs.n[1],
|
|
self.n[2] - rhs.n[2],
|
|
self.n[3] - rhs.n[3],
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Mul for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn mul(self, rhs: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] * rhs.n[0],
|
|
self.n[1] * rhs.n[1],
|
|
self.n[2] * rhs.n[2],
|
|
self.n[3] * rhs.n[3],
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Mul<f32> for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn mul(self, rhs: f32) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] * rhs,
|
|
self.n[1] * rhs,
|
|
self.n[2] * rhs,
|
|
self.n[3] * rhs,
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Div for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn div(self, rhs: Self) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] / rhs.n[0],
|
|
self.n[1] / rhs.n[1],
|
|
self.n[2] / rhs.n[2],
|
|
self.n[3] / rhs.n[3],
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Div<f32> for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn div(self, rhs: f32) -> Self {
|
|
Self {
|
|
n: [
|
|
self.n[0] / rhs,
|
|
self.n[1] / rhs,
|
|
self.n[2] / rhs,
|
|
self.n[3] / rhs,
|
|
],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Neg for Float4 {
|
|
type Output = Self;
|
|
|
|
#[inline(always)]
|
|
fn neg(self) -> Self {
|
|
Self {
|
|
n: [-self.n[0], -self.n[1], -self.n[2], -self.n[3]],
|
|
}
|
|
}
|
|
}
|
|
|
|
impl FMulAdd for Float4 {
|
|
fn fma(self, b: Self, c: Self) -> Self {
|
|
self.mul_add(b, c)
|
|
}
|
|
}
|
|
}
|
|
|
|
//-------------------------------------------------------------
|
|
// Float4 impls that don't depend on its inner representation.
|
|
|
|
impl Float4 {
|
|
/// 3D dot product (only uses the first 3 components).
|
|
#[inline(always)]
|
|
pub fn dot_3(a: Self, b: Self) -> f32 {
|
|
let (p, p_err) = two_prod(a, b);
|
|
|
|
// Products.
|
|
let (x, x_err) = (p.a(), p_err.a());
|
|
let (y, y_err) = (p.b(), p_err.b());
|
|
let (z, z_err) = (p.c(), p_err.c());
|
|
|
|
// Sums.
|
|
let (s1, s1_err) = two_sum(x, y);
|
|
let err1 = x_err + (y_err + s1_err);
|
|
|
|
let (s2, s2_err) = two_sum(s1, z);
|
|
let err2 = z_err + (err1 + s2_err);
|
|
|
|
// Final result with rounding error compensation.
|
|
s2 + err2
|
|
}
|
|
|
|
/// 3D dot product (only uses the first 3 components).
|
|
///
|
|
/// Faster but less precise version.
|
|
#[inline(always)]
|
|
pub fn dot_3_fast(a: Self, b: Self) -> f32 {
|
|
let c = a * b;
|
|
c.a() + c.b() + c.c()
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
|
|
[
|
|
// The fourth component in each row below is arbitrary,
|
|
// but in this case chosen so that it matches the
|
|
// behavior of the SSE version of transpose_3x3.
|
|
Self::new(m[0].a(), m[1].a(), m[2].a(), m[2].d()),
|
|
Self::new(m[0].b(), m[1].b(), m[2].b(), m[2].d()),
|
|
Self::new(m[0].c(), m[1].c(), m[2].c(), m[2].d()),
|
|
]
|
|
}
|
|
|
|
/// Invert a 3x3 matrix.
|
|
///
|
|
/// Returns `None` if not invertible.
|
|
#[inline]
|
|
pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> {
|
|
let m0_bca = m[0].bcad();
|
|
let m1_bca = m[1].bcad();
|
|
let m2_bca = m[2].bcad();
|
|
let m0_cab = m[0].cabd();
|
|
let m1_cab = m[1].cabd();
|
|
let m2_cab = m[2].cabd();
|
|
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
|
|
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
|
|
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
|
|
|
|
let det = Self::dot_3(
|
|
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
|
|
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
|
|
);
|
|
|
|
if det == 0.0 {
|
|
None
|
|
} else {
|
|
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
|
|
}
|
|
}
|
|
|
|
/// Invert a 3x3 matrix. Faster but less precise version.
|
|
///
|
|
/// Returns `None` if not invertible.
|
|
#[inline]
|
|
pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> {
|
|
let m0_bca = m[0].bcad();
|
|
let m1_bca = m[1].bcad();
|
|
let m2_bca = m[2].bcad();
|
|
let m0_cab = m[0].cabd();
|
|
let m1_cab = m[1].cabd();
|
|
let m2_cab = m[2].cabd();
|
|
let abc = (m1_bca * m2_cab) - (m1_cab * m2_bca);
|
|
let def = (m2_bca * m0_cab) - (m2_cab * m0_bca);
|
|
let ghi = (m0_bca * m1_cab) - (m0_cab * m1_bca);
|
|
|
|
let det = Self::dot_3_fast(
|
|
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
|
|
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
|
|
);
|
|
|
|
if det == 0.0 {
|
|
None
|
|
} else {
|
|
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
|
|
}
|
|
}
|
|
|
|
/// Multiplies a 3D vector with a 3x3 matrix.
|
|
#[inline]
|
|
pub fn vec_mul_3x3(self, m: &[Self; 3]) -> Self {
|
|
let x = self.aaaa();
|
|
let y = self.bbbb();
|
|
let z = self.cccc();
|
|
|
|
// Products.
|
|
let (a, a_err) = two_prod(x, m[0]);
|
|
let (b, b_err) = two_prod(y, m[1]);
|
|
let (c, c_err) = two_prod(z, m[2]);
|
|
|
|
// Sums.
|
|
let (s1, s1_err) = two_sum(a, b);
|
|
let err1 = a_err + (b_err + s1_err);
|
|
|
|
let (s2, s2_err) = two_sum(c, s1);
|
|
let err2 = c_err + (err1 + s2_err);
|
|
|
|
s2 + err2
|
|
}
|
|
|
|
/// Multiplies a 3D vector with a 3x3 matrix.
|
|
///
|
|
/// Faster but less precise version.
|
|
#[inline]
|
|
pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self {
|
|
let x = self.aaaa();
|
|
let y = self.bbbb();
|
|
let z = self.cccc();
|
|
|
|
(x * m[0]) + (y * m[1]) + (z * m[2])
|
|
}
|
|
|
|
/// Transforms a 3d point by an affine transform.
|
|
///
|
|
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
|
|
#[inline]
|
|
pub fn vec_mul_affine(self, m: &[Self; 3], t: Self) -> Self {
|
|
let x = self.aaaa();
|
|
let y = self.bbbb();
|
|
let z = self.cccc();
|
|
|
|
// Products.
|
|
let (a, a_err) = two_prod(x, m[0]);
|
|
let (b, b_err) = two_prod(y, m[1]);
|
|
let (c, c_err) = two_prod(z, m[2]);
|
|
|
|
// Sums.
|
|
let (s1, s1_err) = two_sum(a, b);
|
|
let err1 = a_err + (b_err + s1_err);
|
|
|
|
let (s2, s2_err) = two_sum(c, s1);
|
|
let err2 = c_err + (err1 + s2_err);
|
|
|
|
let (s3, s3_err) = two_sum(t, s2);
|
|
let err3 = err2 + s3_err;
|
|
|
|
s3 + err3
|
|
}
|
|
|
|
/// Transforms a 3d point by an affine transform.
|
|
///
|
|
/// Faster but less precise version.
|
|
#[inline]
|
|
pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self {
|
|
let x = self.aaaa();
|
|
let y = self.bbbb();
|
|
let z = self.cccc();
|
|
|
|
(x * m[0]) + (y * m[1]) + (z * m[2]) + t
|
|
}
|
|
|
|
/// Transforms a 3d point by an affine transform, except it applies
|
|
/// the translation part before the 3x3 part.
|
|
///
|
|
/// This is primarily useful for performing efficient inverse transforms by
|
|
/// passing an inverted 3x3 part and a negated translation part.
|
|
///
|
|
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
|
|
#[inline]
|
|
pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self {
|
|
let (v, v_err) = two_sum(self, t);
|
|
|
|
let (x, x_err) = (v.aaaa(), v_err.aaaa());
|
|
let (y, y_err) = (v.bbbb(), v_err.bbbb());
|
|
let (z, z_err) = (v.cccc(), v_err.cccc());
|
|
|
|
// Products.
|
|
let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]);
|
|
let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]);
|
|
let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]);
|
|
let a_err = a_err1 + a_err2;
|
|
let b_err = b_err1 + b_err2;
|
|
let c_err = c_err1 + c_err2;
|
|
|
|
// Sums.
|
|
let (s1, s1_err) = two_sum(a, b);
|
|
let err1 = a_err + (b_err + s1_err);
|
|
|
|
let (s2, s2_err) = two_sum(c, s1);
|
|
let err2 = c_err + (err1 + s2_err);
|
|
|
|
let (s3, s3_err) = two_sum(t, s2);
|
|
let err3 = err2 + s3_err;
|
|
|
|
s3 + err3
|
|
}
|
|
|
|
/// Transforms a 3d point by an affine transform, except it applies
|
|
/// the translation part before the 3x3 part.
|
|
///
|
|
/// Faster but less precise version.
|
|
#[inline]
|
|
pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self {
|
|
let v = self + t;
|
|
|
|
let x = v.aaaa();
|
|
let y = v.bbbb();
|
|
let z = v.cccc();
|
|
|
|
(x * m[0]) + (y * m[1]) + (z * m[2])
|
|
}
|
|
|
|
/// Returns whether the `Float4`s are approximately equal to each
|
|
/// other.
|
|
///
|
|
/// Each corresponding element cannot have a relative error exceeding
|
|
/// `epsilon`.
|
|
pub(crate) fn aprx_eq(a: Self, b: Self, epsilon: f32) -> bool {
|
|
let mut eq = true;
|
|
eq &= relative_eq!(a.a(), b.a(), epsilon = epsilon);
|
|
eq &= relative_eq!(a.b(), b.b(), epsilon = epsilon);
|
|
eq &= relative_eq!(a.c(), b.c(), epsilon = epsilon);
|
|
eq &= relative_eq!(a.d(), b.d(), epsilon = epsilon);
|
|
eq
|
|
}
|
|
}
|
|
|
|
impl AddAssign for Float4 {
|
|
#[inline(always)]
|
|
fn add_assign(&mut self, rhs: Self) {
|
|
*self = *self + rhs;
|
|
}
|
|
}
|
|
|
|
impl SubAssign for Float4 {
|
|
#[inline(always)]
|
|
fn sub_assign(&mut self, rhs: Self) {
|
|
*self = *self - rhs;
|
|
}
|
|
}
|
|
|
|
impl MulAssign for Float4 {
|
|
#[inline(always)]
|
|
fn mul_assign(&mut self, rhs: Self) {
|
|
*self = *self * rhs;
|
|
}
|
|
}
|
|
|
|
impl MulAssign<f32> for Float4 {
|
|
#[inline(always)]
|
|
fn mul_assign(&mut self, rhs: f32) {
|
|
*self = *self * rhs;
|
|
}
|
|
}
|
|
|
|
impl DivAssign for Float4 {
|
|
#[inline(always)]
|
|
fn div_assign(&mut self, rhs: Self) {
|
|
*self = *self / rhs;
|
|
}
|
|
}
|
|
|
|
impl DivAssign<f32> for Float4 {
|
|
#[inline(always)]
|
|
fn div_assign(&mut self, rhs: f32) {
|
|
*self = *self / rhs;
|
|
}
|
|
}
|