Implement dot products and 3x3 matrix inversion.

Both precise and fast versions.  But untested, so might be
incorrect!
This commit is contained in:
Nathan Vegdahl 2022-07-14 15:30:30 -07:00
parent 8a695a7694
commit c398387b55
5 changed files with 302 additions and 72 deletions

View File

@ -8,17 +8,26 @@ mod vector;
pub mod wide4;
mod xform;
use std::ops::{Add, Mul, Neg, Sub};
pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform::XformFull};
// /// Trait for calculating dot products.
// pub trait DotProduct {
// fn dot(self, other: Self) -> f32;
// }
/// Trait for calculating dot products.
pub trait DotProduct {
fn dot(self, other: Self) -> f32;
// #[inline]
// pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
// a.dot(b)
// }
fn dot_fast(self, other: Self) -> f32;
}
#[inline(always)]
pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
a.dot(b)
}
#[inline(always)]
pub fn dot_fast<T: DotProduct>(a: T, b: T) -> f32 {
a.dot_fast(b)
}
// /// Trait for calculating cross products.
// pub trait CrossProduct {
@ -29,3 +38,68 @@ pub use self::{normal::Normal, point::Point, vector::Vector, xform::Xform, xform
// pub fn cross<T: CrossProduct>(a: T, b: T) -> T {
// a.cross(b)
// }
//-------------------------------------------------------------
/// Trait representing types that can do fused multiply-add.
trait FMulAdd {
/// `(self * b) + c` with only one floating point rounding error.
fn fma(self, b: Self, c: Self) -> Self;
}
impl FMulAdd for f32 {
fn fma(self, b: Self, c: Self) -> Self {
self.mul_add(b, c)
}
}
/// `(a * b) - (c * d)` but done with high precision via floating point tricks.
///
/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats
#[inline(always)]
fn difference_of_products<T>(a: T, b: T, c: T, d: T) -> T
where
T: Copy + FMulAdd + Add<Output = T> + Mul<Output = T> + Neg<Output = T>,
{
let cd = c * d;
let dop = a.fma(b, -cd);
let err = (-c).fma(d, cd);
dop + err
}
/// `(a * b) + (c * d)` but done with high precision via floating point tricks.
#[inline(always)]
fn sum_of_products<T>(a: T, b: T, c: T, d: T) -> T
where
T: Copy + FMulAdd + Add<Output = T> + Mul<Output = T> + Neg<Output = T>,
{
let cd = c * d;
let sop = a.fma(b, cd);
let err = c.fma(d, -cd);
sop + err
}
/// `a * b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_prod<T>(a: T, b: T) -> (T, T)
// (product, rounding_err)
where
T: Copy + FMulAdd + Mul<Output = T> + Neg<Output = T>,
{
let ab = a * b;
(ab, a.fma(b, -ab))
}
/// `a + b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_sum<T>(a: T, b: T) -> (T, T)
// (sum, rounding_err)
where
T: Copy + Add<Output = T> + Sub<Output = T>,
{
let sum = a + b;
let delta = sum - a;
(sum, (a - (sum - delta)) + (b - delta))
}

View File

@ -3,7 +3,7 @@
use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::wide4::Float4;
use crate::DotProduct;
use crate::Vector;
/// A surface normal in 3D space.
@ -100,17 +100,6 @@ impl Mul<f32> for Normal {
}
}
// impl Mul<Transform> for Normal {
// type Output = Normal;
// #[inline]
// fn mul(self, other: Transform) -> Normal {
// Normal {
// co: other.0.matrix3.inverse().transpose().mul_vec3a(self.co),
// }
// }
// }
impl Div<f32> for Normal {
type Output = Self;
@ -129,12 +118,17 @@ impl Neg for Normal {
}
}
// impl DotProduct for Normal {
// #[inline(always)]
// fn dot(self, other: Normal) -> f32 {
// self.co.dot(other.co)
// }
// }
impl DotProduct for Normal {
#[inline(always)]
fn dot(self, other: Self) -> f32 {
Float4::dot_3(self.0, other.0)
}
#[inline(always)]
fn dot_fast(self, other: Self) -> f32 {
Float4::dot_3_fast(self.0, other.0)
}
}
// impl CrossProduct for Normal {
// #[inline]

View File

@ -5,6 +5,7 @@ use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::normal::Normal;
use crate::point::Point;
use crate::wide4::Float4;
use crate::DotProduct;
/// A direction vector in 3D space.
#[derive(Debug, Copy, Clone)]
@ -105,15 +106,6 @@ impl Mul<f32> for Vector {
}
}
// impl Mul<Transform> for Vector {
// type Output = Self;
// #[inline]
// fn mul(self, other: Transform) -> Self {
// Self(other.0.transform_vector3a(self.0))
// }
// }
impl Div<f32> for Vector {
type Output = Self;
@ -132,12 +124,17 @@ impl Neg for Vector {
}
}
// impl DotProduct for Vector {
// #[inline(always)]
// fn dot(self, other: Self) -> f32 {
// self.co.dot(other.co)
// }
// }
impl DotProduct for Vector {
#[inline(always)]
fn dot(self, other: Self) -> f32 {
Float4::dot_3(self.0, other.0)
}
#[inline(always)]
fn dot_fast(self, other: Self) -> f32 {
Float4::dot_3_fast(self.0, other.0)
}
}
// impl CrossProduct for Vector {
// #[inline]

View File

@ -1,9 +1,13 @@
use std::ops::{AddAssign, DivAssign, MulAssign, SubAssign};
use crate::{difference_of_products, two_prod, two_sum};
pub use fallback::Float4;
mod fallback {
use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::FMulAdd;
#[allow(non_camel_case_types)]
#[derive(Debug, Copy, Clone)]
#[repr(C, align(16))]
@ -81,31 +85,6 @@ mod fallback {
// a.max(b)
// }
//-----------------------------------------------------
// For matrix stuff.
#[inline(always)]
pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
[
// The fourth component in each row below is arbitrary,
// but in this case chosen so that it matches the
// behavior of the SSE version of transpose_3x3.
Self::new(m[0].a(), m[1].a(), m[2].a(), m[2].d()),
Self::new(m[0].b(), m[1].b(), m[2].b(), m[2].d()),
Self::new(m[0].c(), m[1].c(), m[2].c(), m[2].d()),
]
}
#[inline]
pub fn invert_3x3(_m: [Self; 3]) -> [Self; 3] {
todo!()
}
#[inline]
pub fn invert_3x3_precise(_m: [Self; 3]) -> [Self; 3] {
todo!()
}
//-----------------------------------------------------
// Individual components.
@ -187,6 +166,24 @@ mod fallback {
let d = self.n[3];
Self { n: [d, d, d, d] }
}
#[inline(always)]
pub fn bcad(self) -> Self {
let a = self.n[0];
let b = self.n[1];
let c = self.n[2];
let d = self.n[3];
Self { n: [b, c, a, d] }
}
#[inline(always)]
pub fn cabd(self) -> Self {
let a = self.n[0];
let b = self.n[1];
let c = self.n[2];
let d = self.n[3];
Self { n: [c, a, b, d] }
}
}
impl Add for Float4 {
@ -295,9 +292,168 @@ mod fallback {
}
}
}
impl FMulAdd for Float4 {
fn fma(self, b: Self, c: Self) -> Self {
self.mul_add(b, c)
}
}
}
//-------------------------------------------------------------
// Float4 impls that don't depend on its inner representation.
impl Float4 {
/// 3D dot product (only uses the first 3 components).
#[inline(always)]
pub fn dot_3(a: Self, b: Self) -> f32 {
let (p, p_err) = two_prod(a, b);
// Products.
let (x, x_err) = (p.a(), p_err.a());
let (y, y_err) = (p.b(), p_err.b());
let (z, z_err) = (p.c(), p_err.c());
// Sums.
let (s1, s1_err) = two_sum(x, y);
let err1 = x_err + (y_err + s1_err);
let (s2, s2_err) = two_sum(s1, z);
let err2 = z_err + (err1 + s2_err);
// Final result with rounding error compensation.
s2 + err2
}
/// 3D dot product (only uses the first 3 components).
///
/// Faster but less precise version.
#[inline(always)]
pub fn dot_3_fast(a: Self, b: Self) -> f32 {
let c = a * b;
c.a() + c.b() + c.c()
}
#[inline(always)]
pub fn transpose_3x3(m: [Self; 3]) -> [Self; 3] {
[
// The fourth component in each row below is arbitrary,
// but in this case chosen so that it matches the
// behavior of the SSE version of transpose_3x3.
Self::new(m[0].a(), m[1].a(), m[2].a(), m[2].d()),
Self::new(m[0].b(), m[1].b(), m[2].b(), m[2].d()),
Self::new(m[0].c(), m[1].c(), m[2].c(), m[2].d()),
]
}
/// Invert a 3x3 matrix.
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3(m: [Self; 3]) -> Option<[Self; 3]> {
// let a = difference_of_products(m[1].b(), m[2].c(), m[1].c(), m[2].b());
// let b = difference_of_products(m[1].c(), m[2].a(), m[1].a(), m[2].c());
// let c = difference_of_products(m[1].a(), m[2].b(), m[1].b(), m[2].a());
// let d = difference_of_products(m[2].b(), m[0].c(), m[2].c(), m[0].b());
// let e = difference_of_products(m[2].c(), m[0].a(), m[2].a(), m[0].c());
// let f = difference_of_products(m[2].a(), m[0].b(), m[2].b(), m[0].a());
// let g = difference_of_products(m[0].b(), m[1].c(), m[0].c(), m[1].b());
// let h = difference_of_products(m[0].c(), m[1].a(), m[0].a(), m[1].c());
// let i = difference_of_products(m[0].a(), m[1].b(), m[0].b(), m[1].a());
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
// TODO: use precise inner product.
let det = Self::dot_3(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
}
}
/// Invert a 3x3 matrix. Faster but less precise version.
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3_fast(m: [Self; 3]) -> Option<[Self; 3]> {
// let a = (m[1].b() * m[2].c()) - (m[1].c() * m[2].b());
// let b = (m[1].c() * m[2].a()) - (m[1].a() * m[2].c());
// let c = (m[1].a() * m[2].b()) - (m[1].b() * m[2].a());
// let e = (m[2].c() * m[0].a()) - (m[2].a() * m[0].c());
// let f = (m[2].a() * m[0].b()) - (m[2].b() * m[0].a());
// let g = (m[0].b() * m[1].c()) - (m[0].c() * m[1].b());
// let h = (m[0].c() * m[1].a()) - (m[0].a() * m[1].c());
// let i = (m[0].a() * m[1].b()) - (m[0].b() * m[1].a());
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = (m1_bca * m2_cab) - (m1_cab * m2_bca);
let def = (m2_bca * m0_cab) - (m2_cab * m0_bca);
let ghi = (m0_bca * m1_cab) - (m0_cab * m1_bca);
let det = Self::dot_3(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3([abc / det, def / det, ghi / det]))
}
}
/// Multiplies a 3D vector with a 3x3 matrix.
#[inline]
pub fn vec3_mul_3x3(_v: Self, _m: &[Self; 3]) -> Self {
todo!()
}
/// Multiplies a 3D vector with a 3x3 matrix.
///
/// Faster but less precise version.
#[inline]
pub fn vec3_mul_3x3_fast(_v: Self, _m: &[Self; 3]) -> Self {
todo!()
}
/// Transforms a 3d point by an affine transform.
///
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
#[inline]
pub fn pnt3_mul_affine(_p: Self, _m: &[Self; 3], _t: Self) -> Self {
todo!()
}
/// Transforms a 3d point by an affine transform, except it applies
/// the translation part before the 3x3 part.
///
/// This is primarily useful for performing efficient inverse transforms by
/// passing an inverted 3x3 part and a negated translation part.
///
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
#[inline]
pub fn pnt3_mul_affine_rev(_p: Self, _m: &[Self; 3], _t: Self) -> Self {
todo!()
}
}
impl AddAssign for Float4 {
#[inline(always)]

View File

@ -96,22 +96,31 @@ impl Xform {
eq
}
/// Returns the dual transform, which can do inverse transforms.
/// Computes a "full" version of the transform, which can do both
/// forward and inverse transforms.
#[inline]
pub fn compute_dual(self) -> XformFull {
pub fn compute_full(self) -> XformFull {
XformFull {
m: self.m,
m_inv: Float4::invert_3x3(self.m),
m_inv: Float4::invert_3x3(self.m).unwrap_or([
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
]),
t: self.t,
}
}
/// Slower but precise version of `compute_dual()`.
/// Faster but less precise version of `compute_full()`.
#[inline]
pub fn compute_dual_precise(self) -> XformFull {
pub fn compute_full_fast(self) -> XformFull {
XformFull {
m: self.m,
m_inv: Float4::invert_3x3_precise(self.m),
m_inv: Float4::invert_3x3_fast(self.m).unwrap_or([
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
]),
t: self.t,
}
}