Whole bunch of cleanup on RMath.

This commit is contained in:
Nathan Vegdahl 2022-07-17 16:37:15 -07:00
parent e2044e6579
commit 6dbdcba91a
9 changed files with 573 additions and 114 deletions

View File

@ -4,6 +4,7 @@
mod normal;
mod point;
mod sealed;
mod utils;
mod vector;
pub mod wide4;
@ -11,7 +12,9 @@ mod xform;
use std::ops::{Add, Mul, Neg, Sub};
pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform::XformFull};
pub use self::{
normal::Normal, point::Point, vector::Vector, xform::AsXform, xform::Xform, xform::XformFull,
};
/// Trait for calculating dot products.
pub trait DotProduct {
@ -111,3 +114,16 @@ where
let delta = sum - a;
(sum, (a - (sum - delta)) + (b - delta))
}
/// `a - b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_diff<T>(a: T, b: T) -> (T, T)
// (diff, rounding_err)
where
T: Copy + Add<Output = T> + Sub<Output = T>,
{
let diff = a - b;
let delta = diff - a;
(diff, (a - (diff - delta)) - (b + delta))
}

View File

@ -4,7 +4,7 @@ use std::cmp::PartialEq;
use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::wide4::Float4;
use crate::xform::XformFull;
use crate::xform::{AsXform, XformFull};
use crate::Vector;
use crate::{CrossProduct, DotProduct};
@ -87,20 +87,37 @@ impl Normal {
//-------------
// Transforms.
/// Forward-transform the normal.
#[inline(always)]
pub fn xform(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m_inv)))
Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(&xform.inv_m)))
}
pub fn xform_inv(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(xform.m)))
/// Inverse-transform the normal.
#[inline(always)]
pub fn xform_inv<T: AsXform>(self, xform: &T) -> Self {
Self(
self.0
.vec_mul_3x3(&Float4::transpose_3x3(&xform.as_xform().m)),
)
}
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m_inv)))
Self(
self.0
.vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.inv_m)),
)
}
pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3_fast(&Float4::transpose_3x3(xform.m)))
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast<T: AsXform>(self, xform: &T) -> Self {
Self(
self.0
.vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.as_xform().m)),
)
}
}
@ -218,7 +235,7 @@ mod tests {
fn xform() {
let n = Normal::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(n.xform(&m), Normal::new(-4.0625, 1.78125, -0.03125));
@ -229,7 +246,7 @@ mod tests {
fn xform_fast() {
let n = Normal::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(n.xform_fast(&m), Normal::new(-4.0625, 1.78125, -0.03125));

View File

@ -4,7 +4,7 @@ use std::ops::{Add, Sub};
use crate::vector::Vector;
use crate::wide4::Float4;
use crate::xform::XformFull;
use crate::xform::{AsXform, XformFull};
/// A position in 3D space.
#[derive(Debug, Copy, Clone)]
@ -78,20 +78,30 @@ impl Point {
//-------------
// Transforms.
pub fn xform(self, xform: &XformFull) -> Self {
/// Forward-transform the point.
#[inline(always)]
pub fn xform<T: AsXform>(self, xform: &T) -> Self {
let xform = xform.as_xform();
Self(self.0.vec_mul_affine(&xform.m, xform.t))
}
/// Inverse-transform the point.
#[inline(always)]
pub fn xform_inv(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_affine_rev(&xform.m_inv, -xform.t))
Self(self.0.vec_mul_affine_rev(&xform.inv_m, xform.fwd.t))
}
pub fn xform_fast(self, xform: &XformFull) -> Self {
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
let xform = xform.as_xform();
Self(self.0.vec_mul_affine_fast(&xform.m, xform.t))
}
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_affine_rev_fast(&xform.m_inv, -xform.t))
Self(self.0.vec_mul_affine_rev_fast(&xform.inv_m, xform.fwd.t))
}
}
@ -158,7 +168,7 @@ mod tests {
fn xform() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(p.xform(&m), Point::new(15.5, 54.0, 70.0));
assert_eq!(p.xform(&m).xform_inv(&m), p);
@ -168,7 +178,7 @@ mod tests {
fn xform_fast() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(p.xform_fast(&m), Point::new(15.5, 54.0, 70.0));
assert_eq!(p.xform_fast(&m).xform_inv_fast(&m), p);

View File

@ -0,0 +1,5 @@
/// For sealing other traits.
///
/// Even though this is marked as public, this module isn't, and
/// therefore this trait is not available outside the crate.
pub trait Sealed {}

View File

@ -6,7 +6,7 @@ use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::normal::Normal;
use crate::point::Point;
use crate::wide4::Float4;
use crate::xform::XformFull;
use crate::xform::{AsXform, XformFull};
use crate::{CrossProduct, DotProduct};
/// A direction vector in 3D space.
@ -98,20 +98,28 @@ impl Vector {
//-------------
// Transforms.
pub fn xform(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&xform.m))
/// Forward-transform the vector.
#[inline(always)]
pub fn xform<T: AsXform>(self, xform: &T) -> Self {
Self(self.0.vec_mul_3x3(&xform.as_xform().m))
}
/// Inverse-transform the vector.
#[inline(always)]
pub fn xform_inv(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&xform.m_inv))
Self(self.0.vec_mul_3x3(&xform.inv_m))
}
pub fn xform_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3_fast(&xform.m))
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
Self(self.0.vec_mul_3x3_fast(&xform.as_xform().m))
}
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3_fast(&xform.m_inv))
Self(self.0.vec_mul_3x3_fast(&xform.inv_m))
}
}
@ -229,7 +237,7 @@ mod tests {
fn xform() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(v.xform(&m), Vector::new(14.0, 46.0, 58.0));
@ -240,7 +248,7 @@ mod tests {
fn xform_fast() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.into_full()
.to_full()
.unwrap();
assert_eq!(v.xform_fast(&m), Vector::new(14.0, 46.0, 58.0));

View File

@ -38,22 +38,40 @@ impl Float4 {
/// Vertical minimum.
#[inline(always)]
pub fn min(self, a: Self) -> Self {
// Custom min to match behavior of SSE.
#[inline(always)]
pub fn minf(a: f32, b: f32) -> f32 {
if a < b {
a
} else {
b
}
}
Self([
self.0[0].min(a.0[0]),
self.0[1].min(a.0[1]),
self.0[2].min(a.0[2]),
self.0[3].min(a.0[3]),
minf(self.0[0], a.0[0]),
minf(self.0[1], a.0[1]),
minf(self.0[2], a.0[2]),
minf(self.0[3], a.0[3]),
])
}
/// Vertical maximum.
#[inline(always)]
pub fn max(self, a: Self) -> Self {
// Custom max to match behavior of SSE.
#[inline(always)]
pub fn maxf(a: f32, b: f32) -> f32 {
if a > b {
a
} else {
b
}
}
Self([
self.0[0].max(a.0[0]),
self.0[1].max(a.0[1]),
self.0[2].max(a.0[2]),
self.0[3].max(a.0[3]),
maxf(self.0[0], a.0[0]),
maxf(self.0[1], a.0[1]),
maxf(self.0[2], a.0[2]),
maxf(self.0[3], a.0[3]),
])
}
@ -352,11 +370,16 @@ impl FMulAdd for Float4 {
//=============================================================
// Bool4
#[derive(Debug, Copy, Clone)]
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Bool4([bool; 4]);
impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
Bool4([a, b, c, d])
}
#[inline(always)]
pub fn new_false() -> Self {
Self([false, false, false, false])

View File

@ -1,15 +1,14 @@
use std::{
cmp::PartialEq,
cmp::{Eq, PartialEq},
ops::{AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, SubAssign},
};
use crate::utils::ulps_eq;
use crate::{difference_of_products, two_prod, two_sum};
use crate::{difference_of_products, two_diff, two_prod, two_sum};
//-------------------------------------------------------------
// Which implementation to use.
#[cfg(not(any(target_arch = "x86_64")))]
mod fallback;
#[cfg(not(any(target_arch = "x86_64")))]
pub use fallback::{Bool4, Float4};
@ -43,9 +42,7 @@ impl Float4 {
s2 + err2
}
/// 3D dot product (only uses the first 3 components).
///
/// Faster but less precise version.
/// Faster but less precise version of `dot_3()`.
#[inline(always)]
pub fn dot_3_fast(a: Self, b: Self) -> f32 {
let c = a * b;
@ -58,16 +55,14 @@ impl Float4 {
difference_of_products(a.bcad(), b.cabd(), a.cabd(), b.bcad())
}
/// 3D cross product (only uses the first 3 components).
///
/// Faster but less precise version.
/// Faster but less precise version `cross_3()`.
#[inline(always)]
pub fn cross_3_fast(a: Self, b: Self) -> Self {
(a.bcad() * b.cabd()) - (a.cabd() * b.bcad())
}
#[inline(always)]
pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
pub fn transpose_3x3(m: &[Self; 3]) -> [Self; 3] {
[
// The fourth component in each row below is arbitrary,
// but in this case chosen so that it matches the
@ -82,7 +77,7 @@ impl Float4 {
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> {
pub fn invert_3x3(m: &[Self; 3]) -> Option<[Self; 3]> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
@ -101,15 +96,13 @@ impl Float4 {
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
}
}
/// Invert a 3x3 matrix. Faster but less precise version.
///
/// Returns `None` if not invertible.
/// Faster but less precise version of `invert_3x3()`.
#[inline]
pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> {
pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
@ -128,7 +121,7 @@ impl Float4 {
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
}
}
@ -154,9 +147,7 @@ impl Float4 {
s2 + err2
}
/// Multiplies a 3D vector with a 3x3 matrix.
///
/// Faster but less precise version.
/// Faster but less precise version of `vec_mul_3x3()`.
#[inline]
pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self {
let x = self.aaaa();
@ -193,9 +184,7 @@ impl Float4 {
s3 + err3
}
/// Transforms a 3d point by an affine transform.
///
/// Faster but less precise version.
/// Faster but less precise version of `vec_mul_affine()`.
#[inline]
pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self {
let x = self.aaaa();
@ -205,25 +194,26 @@ impl Float4 {
(x * m[0]) + (y * m[1]) + (z * m[2]) + t
}
/// Transforms a 3d point by an affine transform, except it applies
/// the translation part before the 3x3 part.
/// Transforms a 3d point by an affine transform, except it does
/// `(vec - t) * inv_m` instead of `vec * m + t`.
///
/// This is primarily useful for performing efficient inverse transforms by
/// passing an inverted 3x3 part and a negated translation part.
/// This is useful for performing efficient inverse transforms while
/// only having to invert the 3x3 part of the transform itself.
///
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
/// `inv_m` is the inverse 3x3 part of the affine transform, `t` is
/// the forward translation part.
#[inline]
pub fn vec_mul_affine_rev(self, m: &[Self; 3], t: Self) -> Self {
let (v, v_err) = two_sum(self, t);
pub fn vec_mul_affine_rev(self, inv_m: &[Self; 3], t: Self) -> Self {
let (v, v_err) = two_diff(self, t);
let (x, x_err) = (v.aaaa(), v_err.aaaa());
let (y, y_err) = (v.bbbb(), v_err.bbbb());
let (z, z_err) = (v.cccc(), v_err.cccc());
// Products.
let ((a, a_err1), a_err2) = (two_prod(x, m[0]), x_err * m[0]);
let ((b, b_err1), b_err2) = (two_prod(y, m[1]), y_err * m[1]);
let ((c, c_err1), c_err2) = (two_prod(z, m[2]), z_err * m[2]);
let ((a, a_err1), a_err2) = (two_prod(x, inv_m[0]), x_err * inv_m[0]);
let ((b, b_err1), b_err2) = (two_prod(y, inv_m[1]), y_err * inv_m[1]);
let ((c, c_err1), c_err2) = (two_prod(z, inv_m[2]), z_err * inv_m[2]);
let a_err = a_err1 + a_err2;
let b_err = b_err1 + b_err2;
let c_err = c_err1 + c_err2;
@ -238,19 +228,16 @@ impl Float4 {
s2 + err2
}
/// Transforms a 3d point by an affine transform, except it applies
/// the translation part before the 3x3 part.
///
/// Faster but less precise version.
/// Faster but less precise version of `vec_mul_affine_rev()`.
#[inline]
pub fn vec_mul_affine_rev_fast(self, m: &[Self; 3], t: Self) -> Self {
let v = self + t;
pub fn vec_mul_affine_rev_fast(self, inv_m: &[Self; 3], t: Self) -> Self {
let v = self - t;
let x = v.aaaa();
let y = v.bbbb();
let z = v.cccc();
(x * m[0]) + (y * m[1]) + (z * m[2])
(x * inv_m[0]) + (y * inv_m[1]) + (z * inv_m[2])
}
/// Returns whether the `Float4`s are approximately equal to each
@ -290,9 +277,7 @@ impl Float4 {
)
}
/// Transforms one affine transform by another.
///
/// Faster but less precise version.
/// Faster but less precise version of `affine_mul_affine()`.
#[inline]
pub fn affine_mul_affine_fast(
m1: &[Self; 3],
@ -389,12 +374,34 @@ impl BitXorAssign for Bool4 {
}
}
impl PartialEq for Bool4 {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.bitmask() == rhs.bitmask()
}
}
impl Eq for Bool4 {}
impl std::fmt::Debug for Bool4 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
f.write_str("Bool4(")?;
f.debug_list().entries(self.to_bools().iter()).finish()?;
f.write_str(")")?;
Ok(())
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
//------------
// Float4
#[test]
fn approximate_equality_test() {
let a = Float4::new(1.0, 2.0, 3.0, 4.0);
@ -420,6 +427,26 @@ mod tests {
assert_eq!(v[3], 3.0);
}
#[test]
fn get() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v.a(), 0.0);
assert_eq!(v.b(), 1.0);
assert_eq!(v.c(), 2.0);
assert_eq!(v.d(), 3.0);
}
#[test]
fn set() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v.set_a(9.0), Float4::new(9.0, 1.0, 2.0, 3.0));
assert_eq!(v.set_b(9.0), Float4::new(0.0, 9.0, 2.0, 3.0));
assert_eq!(v.set_c(9.0), Float4::new(0.0, 1.0, 9.0, 3.0));
assert_eq!(v.set_d(9.0), Float4::new(0.0, 1.0, 2.0, 9.0));
}
#[test]
fn shuffle() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
@ -434,10 +461,291 @@ mod tests {
}
#[test]
fn bitmask() {
let v1 = Float4::new(0.0, 1.0, 2.0, 3.0);
let v2 = Float4::new(9.0, 1.0, 9.0, 3.0);
fn abs() {
let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
assert_eq!(v1.cmpeq(v2).bitmask(), 0b1010);
let r = Float4::new(1.0, 2.0, 3.0, 4.0);
assert_eq!(v1.abs(), r);
assert_eq!(v2.abs(), r);
}
#[test]
fn neg() {
let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
assert_eq!(-v1, v2);
assert_eq!(-v2, v1);
}
#[test]
fn cmp_ops() {
let a = Float4::new(1.0, 2.0, -2.0, 0.0);
let b = Float4::new(1.0, -2.0, 2.0, -0.0);
assert_eq!(a.cmplt(b), Bool4::new(false, false, true, false));
assert_eq!(a.cmplte(b), Bool4::new(true, false, true, true));
assert_eq!(a.cmpgt(b), Bool4::new(false, true, false, false));
assert_eq!(a.cmpgte(b), Bool4::new(true, true, false, true));
assert_eq!(a.cmpeq(b), Bool4::new(true, false, false, true));
}
#[test]
fn min_max() {
let a = Float4::new(1.0, 2.0, -2.0, 4.0);
let b = Float4::new(1.0, -2.0, 2.0, 5.0);
assert_eq!(a.min(b), Float4::new(1.0, -2.0, -2.0, 4.0));
assert_eq!(a.max(b), Float4::new(1.0, 2.0, 2.0, 5.0));
let c = Float4::new(std::f32::INFINITY, 2.0, std::f32::NAN, 4.0);
let d = Float4::new(1.0, -std::f32::INFINITY, 2.0, std::f32::NAN);
let r_min = c.min(d);
let r_max = c.max(d);
assert_eq!(r_min.a(), 1.0);
assert_eq!(r_min.b(), -std::f32::INFINITY);
assert_eq!(r_min.c(), 2.0);
assert!(r_min.d().is_nan());
assert_eq!(r_max.a(), std::f32::INFINITY);
assert_eq!(r_max.b(), 2.0);
assert_eq!(r_max.c(), 2.0);
assert!(r_max.d().is_nan());
}
#[test]
fn dot_3() {
let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
assert_eq!(Float4::dot_3(v1, v2), -24.0);
assert_eq!(Float4::dot_3_fast(v1, v2), -24.0);
}
#[test]
fn cross_3() {
let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
let r = Float4::new(-3.0, -18.0, -13.0, 0.0);
assert_eq!(Float4::cross_3(v1, v2), r);
assert_eq!(Float4::cross_3(v2, v1), -r);
assert_eq!(Float4::cross_3_fast(v1, v2), r);
assert_eq!(Float4::cross_3_fast(v2, v1), -r);
}
#[test]
fn transpose_3x3() {
let m1 = [
Float4::new(1.0, 4.0, 7.0, 0.0),
Float4::new(2.0, 5.0, 8.0, 0.0),
Float4::new(3.0, 6.0, 9.0, 0.0),
];
let m2 = [
Float4::new(1.0, 2.0, 3.0, 0.0),
Float4::new(4.0, 5.0, 6.0, 0.0),
Float4::new(7.0, 8.0, 9.0, 0.0),
];
assert_eq!(Float4::transpose_3x3(&m1), m2);
assert_eq!(Float4::transpose_3x3(&m2), m1);
}
#[test]
fn invert_3x3() {
let m = [
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
];
let inv_m = [
Float4::new(3.25, 1.875, -3.0, 0.0),
Float4::new(-1.125, -0.4375, 1.0, 0.0),
Float4::new(0.125, -0.0625, 0.0, 0.0),
];
assert_eq!(Float4::invert_3x3(&m).unwrap(), inv_m);
assert_eq!(Float4::invert_3x3(&inv_m).unwrap(), m);
assert_eq!(Float4::invert_3x3_fast(&m).unwrap(), inv_m);
assert_eq!(Float4::invert_3x3_fast(&inv_m).unwrap(), m);
}
#[test]
fn vec_mul_3x3() {
let v = Float4::new(1.0, 2.5, 4.0, 0.0);
let m = [
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
];
let r = Float4::new(14.0, 46.0, 58.0, 0.0);
assert_eq!(v.vec_mul_3x3(&m), r);
assert_eq!(v.vec_mul_3x3_fast(&m), r);
}
#[test]
fn vec_mul_affine() {
let p = Float4::new(1.0, 2.5, 4.0, 0.0);
let xform = (
[
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
],
Float4::new(1.5, 8.0, 12.0, 0.0),
);
let r = Float4::new(15.5, 54.0, 70.0, 0.0);
assert_eq!(p.vec_mul_affine(&xform.0, xform.1), r);
}
#[test]
fn vec_mul_affine_rev() {
let p = Float4::new(15.5, 54.0, 70.0, 0.0);
let inv_m = [
Float4::new(3.25, 1.875, -3.0, 0.0),
Float4::new(-1.125, -0.4375, 1.0, 0.0),
Float4::new(0.125, -0.0625, 0.0, 0.0),
];
let t = Float4::new(1.5, 8.0, 12.0, 0.0);
let r = Float4::new(1.0, 2.5, 4.0, 0.0);
assert_eq!(p.vec_mul_affine_rev(&inv_m, t), r);
assert_eq!(p.vec_mul_affine_rev_fast(&inv_m, t), r);
}
#[test]
fn affine_mul_affine() {
let a = (
[
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
],
Float4::new(1.5, 8.0, 12.0, 0.0),
);
let b = (
[
Float4::new(1.0, 2.0, 3.0, 0.0),
Float4::new(5.0, 6.0, 7.0, 0.0),
Float4::new(9.0, 10.0, 11.0, 0.0),
],
Float4::new(13.0, 14.0, 15.0, 0.0),
);
let r = (
[
Float4::new(97.0, 110.0, 123.0, 0.0),
Float4::new(50.0, 60.0, 70.0, 0.0),
Float4::new(136.0, 156.0, 176.0, 0.0),
],
Float4::new(162.5, 185.0, 207.5, 0.0),
);
assert_eq!(Float4::affine_mul_affine(&a.0, a.1, &b.0, b.1), r);
assert_eq!(Float4::affine_mul_affine_fast(&a.0, a.1, &b.0, b.1), r);
}
//------------
// Bool4
#[test]
fn bitmask() {
assert_eq!(Bool4::new(true, false, false, false).bitmask(), 0b0001);
assert_eq!(Bool4::new(false, true, false, false).bitmask(), 0b0010);
assert_eq!(Bool4::new(false, false, true, false).bitmask(), 0b0100);
assert_eq!(Bool4::new(false, false, false, true).bitmask(), 0b1000);
assert_eq!(Bool4::new(false, true, false, true).bitmask(), 0b1010);
assert_eq!(Bool4::new(true, false, true, false).bitmask(), 0b0101);
}
#[test]
fn to_bools() {
assert_eq!(
Bool4::new(true, false, false, false).to_bools(),
[true, false, false, false]
);
assert_eq!(
Bool4::new(false, true, false, false).to_bools(),
[false, true, false, false]
);
assert_eq!(
Bool4::new(false, false, true, false).to_bools(),
[false, false, true, false]
);
assert_eq!(
Bool4::new(false, false, false, true).to_bools(),
[false, false, false, true]
);
assert_eq!(
Bool4::new(false, true, false, true).to_bools(),
[false, true, false, true]
);
assert_eq!(
Bool4::new(true, false, true, false).to_bools(),
[true, false, true, false]
);
}
#[test]
fn any() {
assert_eq!(Bool4::new(true, false, false, false).any(), true);
assert_eq!(Bool4::new(false, true, false, false).any(), true);
assert_eq!(Bool4::new(false, false, true, false).any(), true);
assert_eq!(Bool4::new(false, false, false, true).any(), true);
assert_eq!(Bool4::new(false, false, false, false).any(), false);
}
#[test]
fn all() {
assert_eq!(Bool4::new(false, true, true, true).all(), false);
assert_eq!(Bool4::new(true, false, true, true).all(), false);
assert_eq!(Bool4::new(true, true, false, true).all(), false);
assert_eq!(Bool4::new(true, true, true, false).all(), false);
assert_eq!(Bool4::new(true, true, true, true).all(), true);
}
#[test]
fn boolean_ops() {
let all = Bool4::new(true, true, true, true);
let none = Bool4::new(false, false, false, false);
let a = Bool4::new(true, false, true, false);
let b = Bool4::new(false, true, false, true);
// Not.
assert_eq!(!a, b);
assert_eq!(!b, a);
assert_eq!(!all, none);
assert_eq!(!none, all);
// And.
assert_eq!(a & b, none);
assert_eq!(all & none, none);
assert_eq!(all & all, all);
assert_eq!(none & none, none);
// Or.
assert_eq!(a | b, all);
assert_eq!(all | none, all);
assert_eq!(all | all, all);
assert_eq!(none | none, none);
// Xor.
assert_eq!(a ^ b, all);
assert_eq!(all ^ none, all);
assert_eq!(all ^ all, none);
assert_eq!(none ^ none, none);
}
}

View File

@ -3,8 +3,8 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
use std::arch::x86_64::{
__m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
_mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
_mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_ps, _mm_setzero_ps,
_mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
_mm_mul_ps, _mm_or_ps, _mm_rcp_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps,
_mm_setzero_ps, _mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
};
use crate::FMulAdd;
@ -277,7 +277,10 @@ impl Neg for Float4 {
#[inline(always)]
fn neg(self) -> Self {
Self(unsafe { _mm_mul_ps(self.0, _mm_set1_ps(-1.0)) })
Self(unsafe {
let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(1 << 31));
_mm_xor_ps(self.0, abs_mask)
})
}
}
@ -291,11 +294,26 @@ impl FMulAdd for Float4 {
//=============================================================
// Bool4
#[derive(Debug, Copy, Clone)]
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Bool4(__m128);
impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
const ONES: i32 = unsafe { std::mem::transmute(0xffffffffu32) };
unsafe {
let ints = _mm_set_epi32(
d as i32 * ONES,
c as i32 * ONES,
b as i32 * ONES,
a as i32 * ONES,
);
Bool4(_mm_castsi128_ps(ints))
}
}
#[inline(always)]
pub fn new_false() -> Self {
Self(unsafe { _mm_setzero_ps() })

View File

@ -3,14 +3,33 @@
use std::ops::{Add, Mul};
use crate::point::Point;
use crate::sealed::Sealed;
use crate::wide4::Float4;
/// An affine transform.
/// A forward affine transform.
///
/// Use this for working with transforms that still need to be
/// manipulated or composed with other transforms, or for storing
/// transforms more compactly.
///
/// Note: slightly counter-intuitively, even though this can perform
/// forward (but not inverse) transforms on points and vectors, it is
/// capable of *inverse* (but not forward) transforms on surface normals.
/// This is because forward transforms on surface normals require the
/// inverse transform matrix.
///
/// Convert to an [`XformFull`] for a larger-format type capable of
/// efficiently performing both forward and inverse transforms on all
/// types, but which is effectively "frozen" in terms of further
/// manipulation of the transform itself.
#[derive(Debug, Copy, Clone, PartialEq)]
#[repr(C)]
pub struct Xform {
pub m: [Float4; 3], // Linear matrix.
pub t: Float4, // Translation.
/// Rotation/scale/shear matrix.
pub m: [Float4; 3],
/// Translation.
pub t: Float4,
}
impl Xform {
@ -87,29 +106,26 @@ impl Xform {
eq
}
/// Computes a "full" version of the transform, which can do both
/// forward and inverse transforms.
/// Compute the "full" version of the transform.
#[inline]
pub fn into_full(self) -> Option<XformFull> {
if let Some(m_inv) = Float4::invert_3x3(self.m) {
pub fn to_full(&self) -> Option<XformFull> {
if let Some(inv_m) = Float4::invert_3x3(&self.m) {
Some(XformFull {
m: self.m,
m_inv: m_inv,
t: self.t,
fwd: *self,
inv_m: inv_m,
})
} else {
None
}
}
/// Faster but less precise version of `compute_full()`.
/// Faster but less precise version of `to_full()`.
#[inline]
pub fn into_full_fast(self) -> Option<XformFull> {
if let Some(m_inv) = Float4::invert_3x3_fast(self.m) {
pub fn to_full_fast(&self) -> Option<XformFull> {
if let Some(inv_m) = Float4::invert_3x3_fast(&self.m) {
Some(XformFull {
m: self.m,
m_inv: m_inv,
t: self.t,
fwd: *self,
inv_m: inv_m,
})
} else {
None
@ -172,36 +188,74 @@ impl Add for Xform {
}
}
impl AsXform for Xform {
#[inline(always)]
fn as_xform(&self) -> &Xform {
self
}
}
impl Sealed for Xform {}
//-------------------------------------------------------------
/// An affine transform with precomputed data for performing reverse
/// transforms, among other things.
/// A combined forward/inverse affine transform.
///
/// Unlike [`Xform`], this can perform both forward and inverse
/// transforms on all types. However, it also takes up more space and
/// is effectively "frozen" in terms of further manipulation. Prefer
/// [`Xform`] when manipulating or composing transforms, and also
/// when storing transforms if space is a consideration.
///
/// Note: only the 3x3 part of the transform is stored inverted. This
/// is because it's both trivial and more numerically stable to reuse
/// the forward translation vector to do inverse transforms, as
/// `(point - fwd.t) * inv_m`.
#[derive(Debug, Copy, Clone)]
#[repr(C)]
pub struct XformFull {
pub m: [Float4; 3], // Forward linear matrix.
pub m_inv: [Float4; 3], // Inverse linear matrix.
pub t: Float4, // Forward translation.
/// Forward transform.
pub fwd: Xform,
/// Inverse rotation/scale/shear matrix.
pub inv_m: [Float4; 3],
}
impl XformFull {
pub fn identity() -> Self {
Self {
fwd: Xform {
m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
m_inv: [
t: Float4::splat(0.0),
},
inv_m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
t: Float4::splat(0.0),
}
}
}
impl AsXform for XformFull {
#[inline(always)]
fn as_xform(&self) -> &Xform {
&self.fwd
}
}
impl Sealed for XformFull {}
//-------------------------------------------------------------
pub trait AsXform: Sealed {
fn as_xform(&self) -> &Xform;
}
//-------------------------------------------------------------
#[cfg(test)]