diff --git a/Cargo.lock b/Cargo.lock index af82969..5b1e70c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -560,6 +560,10 @@ dependencies = [ [[package]] name = "rmath" version = "0.1.0" +dependencies = [ + "bencher", + "rand 0.6.5", +] [[package]] name = "rustc-serialize" diff --git a/sub_crates/rmath/Cargo.toml b/sub_crates/rmath/Cargo.toml index 15a9d3a..30d7d10 100644 --- a/sub_crates/rmath/Cargo.toml +++ b/sub_crates/rmath/Cargo.toml @@ -8,3 +8,11 @@ license = "MIT, Apache 2.0" [lib] name = "rmath" path = "src/lib.rs" + +[dev-dependencies] +bencher = "0.1.5" +rand = "0.6" + +[[bench]] +name = "bench" +harness = false \ No newline at end of file diff --git a/sub_crates/rmath/benches/bench.rs b/sub_crates/rmath/benches/bench.rs new file mode 100644 index 0000000..138f604 --- /dev/null +++ b/sub_crates/rmath/benches/bench.rs @@ -0,0 +1,202 @@ +use bencher::{benchmark_group, benchmark_main, black_box, Bencher}; +use rand::{rngs::SmallRng, FromEntropy, Rng}; +use rmath::{CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull}; + +//---- + +fn vector_cross_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let v1 = Vector::new(rng.gen::(), rng.gen::(), rng.gen::()); + let v2 = Vector::new(rng.gen::(), rng.gen::(), rng.gen::()); + for _ in 0..10000 { + black_box(black_box(v1).cross(black_box(v2))); + } + }); +} + +fn vector_dot_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let v1 = Vector::new(rng.gen::(), rng.gen::(), rng.gen::()); + let v2 = Vector::new(rng.gen::(), rng.gen::(), rng.gen::()); + for _ in 0..10000 { + black_box(black_box(v1).dot(black_box(v2))); + } + }); +} + +fn xform_vector_mul_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let v = Vector::new(rng.gen::(), rng.gen::(), rng.gen::()); + let x = Xform::new( + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + ); + for _ in 0..10000 { + black_box(black_box(v).xform(black_box(&x))); + } + }); +} + +fn xform_point_mul_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let p = Point::new(rng.gen::(), rng.gen::(), rng.gen::()); + let x = Xform::new( + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + ); + for _ in 0..10000 { + black_box(black_box(p).xform(black_box(&x))); + } + }); +} + +fn xform_point_mul_inv_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let p = Point::new(rng.gen::(), rng.gen::(), rng.gen::()); + let x = Xform::new( + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + ) + .to_full() + .unwrap(); + for _ in 0..10000 { + black_box(black_box(p).xform_inv(black_box(&x))); + } + }); +} + +fn xform_normal_mul_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let n = Normal::new(rng.gen::(), rng.gen::(), rng.gen::()); + let x = Xform::new( + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + 1.0, + rng.gen::(), + rng.gen::(), + rng.gen::(), + ) + .to_full() + .unwrap(); + for _ in 0..10000 { + black_box(black_box(n).xform(black_box(&x))); + } + }); +} + +fn xform_xform_mul_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let x1 = Xform::new( + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + ); + let x2 = Xform::new( + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + ); + for _ in 0..10000 { + black_box(black_box(x1).compose(black_box(&x2))); + } + }); +} + +fn xform_to_xformfull_10000(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let x = Xform::new( + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + rng.gen::(), + ); + for _ in 0..10000 { + black_box(black_box(x).to_full()); + } + }); +} + +//---- + +benchmark_group!( + benches, + vector_cross_10000, + vector_dot_10000, + xform_vector_mul_10000, + xform_point_mul_10000, + xform_point_mul_inv_10000, + xform_normal_mul_10000, + xform_xform_mul_10000, + xform_to_xformfull_10000, +); +benchmark_main!(benches); diff --git a/sub_crates/rmath/examples/precision.rs b/sub_crates/rmath/examples/precision.rs new file mode 100644 index 0000000..e0f1990 --- /dev/null +++ b/sub_crates/rmath/examples/precision.rs @@ -0,0 +1,268 @@ +use rand::{rngs::SmallRng, FromEntropy, Rng}; + +use rmath::{utils::ulp_diff, wide4::Float4}; + +type D4 = [f64; 4]; + +fn main() { + let mut rng = SmallRng::from_entropy(); + + // Convenience functions for generating random Float4's. + let mut rf4 = || { + let mut rf = || { + let range = 268435456.0; + let n = rng.gen::(); + ((n * range * 2.0) - range) as f32 + }; + Float4::new(rf(), rf(), rf(), rf()) + }; + + // Dot product test. + println!("Dot product:"); + { + let mut max_ulp_diff = 0u32; + for _ in 0..10000000 { + let v1 = rf4(); + let v2 = rf4(); + + let dpa = Float4::dot_3(v1, v2); + let dpb = dot_3(f4_to_d4(v1), f4_to_d4(v2)); + + let ud = ulp_diff(dpa, dpb as f32); + max_ulp_diff = max_ulp_diff.max(ud); + } + + println!(" Max error (ulps):\n {:?}\n", max_ulp_diff); + } + + // Cross product test. + println!("Cross product:"); + { + let mut max_ulp_diff = [0u32; 4]; + for _ in 0..10000000 { + let v1 = rf4(); + let v2 = rf4(); + + let v3a = Float4::cross_3(v1, v2); + let v3b = cross_3(f4_to_d4(v1), f4_to_d4(v2)); + + let ud = ulp_diff_f4d4(v3a, v3b); + for i in 0..4 { + max_ulp_diff[i] = max_ulp_diff[i].max(ud[i]); + } + } + + println!(" Max error (ulps):\n {:?}\n", max_ulp_diff); + } + + // Matrix inversion test. + println!("Matrix inversion:"); + { + let mut max_ulp_diff = [[0u32; 4]; 3]; + let mut det_ulp_hist = [0u32; 9]; + for _ in 0..2000000 { + let m = [rf4(), rf4(), rf4()]; + let ima = Float4::invert_3x3_w_det(&m); + let imb = invert_3x3([f4_to_d4(m[0]), f4_to_d4(m[1]), f4_to_d4(m[2])]); + + if let (Some((ima, deta)), Some((imb, detb))) = (ima, imb) { + let det_ulp_diff = ulp_diff(deta, detb as f32); + let mut hist_upper = 0; + for i in 0..det_ulp_hist.len() { + if det_ulp_diff <= hist_upper { + det_ulp_hist[i] += 1; + break; + } + if hist_upper == 0 { + hist_upper += 1; + } else { + hist_upper *= 10; + } + } + + if det_ulp_diff == 0 { + for i in 0..3 { + let ud = ulp_diff_f4d4(ima[i], imb[i]); + for j in 0..4 { + max_ulp_diff[i][j] = max_ulp_diff[i][j].max(ud[j]); + } + } + } + } + } + + println!( + " Max error when determinant has 0-ulp error (ulps):\n {:?}", + max_ulp_diff + ); + + let total: u32 = det_ulp_hist.iter().sum(); + let mut ulp = 0; + let mut sum = 0; + println!(" Determinant error distribution:"); + for h in det_ulp_hist.iter() { + sum += *h; + println!( + " {:.8}% <= {} ulps", + sum as f64 / total as f64 * 100.0, + ulp + ); + if ulp == 0 { + ulp += 1; + } else { + ulp *= 10; + } + } + println!(); + } +} + +//------------------------------------------------------------- + +fn f4_to_d4(v: Float4) -> D4 { + [v.a() as f64, v.b() as f64, v.c() as f64, v.d() as f64] +} + +fn ulp_diff_f4d4(a: Float4, b: D4) -> [u32; 4] { + [ + ulp_diff(a.a(), b[0] as f32), + ulp_diff(a.b(), b[1] as f32), + ulp_diff(a.c(), b[2] as f32), + ulp_diff(a.d(), b[3] as f32), + ] +} + +//------------------------------------------------------------- + +fn dot_3(a: D4, b: D4) -> f64 { + // Products. + let (x, x_err) = two_prod(a[0], b[0]); + let (y, y_err) = two_prod(a[1], b[1]); + let (z, z_err) = two_prod(a[2], b[2]); + + // Sums. + let (s1, s1_err) = two_sum(x, y); + let err1 = x_err + (y_err + s1_err); + + let (s2, s2_err) = two_sum(s1, z); + let err2 = z_err + (err1 + s2_err); + + // Final result with rounding error compensation. + s2 + err2 +} + +fn cross_3(a: D4, b: D4) -> D4 { + [ + difference_of_products(a[1], b[2], a[2], b[1]), + difference_of_products(a[2], b[0], a[0], b[2]), + difference_of_products(a[0], b[1], a[1], b[0]), + difference_of_products(a[3], b[3], a[3], b[3]), + ] +} + +fn invert_3x3(m: [D4; 3]) -> Option<([D4; 3], f64)> { + let m0_bca = [m[0][1], m[0][2], m[0][0], m[0][3]]; + let m1_bca = [m[1][1], m[1][2], m[1][0], m[1][3]]; + let m2_bca = [m[2][1], m[2][2], m[2][0], m[2][3]]; + let m0_cab = [m[0][2], m[0][0], m[0][1], m[0][3]]; + let m1_cab = [m[1][2], m[1][0], m[1][1], m[1][3]]; + let m2_cab = [m[2][2], m[2][0], m[2][1], m[2][3]]; + + let abc = [ + difference_of_products(m1_bca[0], m2_cab[0], m1_cab[0], m2_bca[0]), + difference_of_products(m1_bca[1], m2_cab[1], m1_cab[1], m2_bca[1]), + difference_of_products(m1_bca[2], m2_cab[2], m1_cab[2], m2_bca[2]), + difference_of_products(m1_bca[3], m2_cab[3], m1_cab[3], m2_bca[3]), + ]; + let def = [ + difference_of_products(m2_bca[0], m0_cab[0], m2_cab[0], m0_bca[0]), + difference_of_products(m2_bca[1], m0_cab[1], m2_cab[1], m0_bca[1]), + difference_of_products(m2_bca[2], m0_cab[2], m2_cab[2], m0_bca[2]), + difference_of_products(m2_bca[3], m0_cab[3], m2_cab[3], m0_bca[3]), + ]; + let ghi = [ + difference_of_products(m0_bca[0], m1_cab[0], m0_cab[0], m1_bca[0]), + difference_of_products(m0_bca[1], m1_cab[1], m0_cab[1], m1_bca[1]), + difference_of_products(m0_bca[2], m1_cab[2], m0_cab[2], m1_bca[2]), + difference_of_products(m0_bca[3], m1_cab[3], m0_cab[3], m1_bca[3]), + ]; + + let det = dot_3( + [abc[0], def[0], ghi[0], 0.0], + [m[0][0], m[1][0], m[2][0], 0.0], + ); + + if det == 0.0 { + None + } else { + Some(( + [ + [abc[0] / det, def[0] / det, ghi[0] / det, 0.0], + [abc[1] / det, def[1] / det, ghi[1] / det, 0.0], + [abc[2] / det, def[2] / det, ghi[2] / det, 0.0], + ], + // [ + // [abc[0], def[0], ghi[0], 0.0], + // [abc[1], def[1], ghi[1], 0.0], + // [abc[2], def[2], ghi[2], 0.0], + // ], + det, + )) + } +} + +fn rel_diff(a: f64, b: f64) -> f64 { + (a - b).abs() / a.abs().max(b.abs()) +} + +//------------------------------------------------------------- + +/// `(a * b) - (c * d)` but done with high precision via floating point tricks. +/// +/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats +#[inline(always)] +fn difference_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 { + let cd = c * d; + let dop = a.mul_add(b, -cd); + let err = (-c).mul_add(d, cd); + dop + err +} + +/// `(a * b) + (c * d)` but done with high precision via floating point tricks. +#[inline(always)] +fn sum_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 { + let cd = c * d; + let sop = a.mul_add(b, cd); + let err = c.mul_add(d, -cd); + sop + err +} + +/// `a * b` but also returns a rounding error for precise composition +/// with other operations. +#[inline(always)] +fn two_prod(a: f64, b: f64) -> (f64, f64) +// (product, rounding_err) +{ + let ab = a * b; + (ab, a.mul_add(b, -ab)) +} + +/// `a + b` but also returns a rounding error for precise composition +/// with other operations. +#[inline(always)] +fn two_sum(a: f64, b: f64) -> (f64, f64) +// (sum, rounding_err) +{ + let sum = a + b; + let delta = sum - a; + (sum, (a - (sum - delta)) + (b - delta)) +} + +#[inline(always)] +fn two_diff(a: f64, b: f64) -> (f64, f64) +// (diff, rounding_err) +{ + let diff = a - b; + let delta = diff - a; + (diff, (a - (diff - delta)) - (b + delta)) +} diff --git a/sub_crates/rmath/src/lib.rs b/sub_crates/rmath/src/lib.rs index 7d8cf7e..3be2d82 100644 --- a/sub_crates/rmath/src/lib.rs +++ b/sub_crates/rmath/src/lib.rs @@ -5,7 +5,7 @@ mod normal; mod point; mod sealed; -mod utils; +pub mod utils; mod vector; pub mod wide4; mod xform; diff --git a/sub_crates/rmath/src/utils.rs b/sub_crates/rmath/src/utils.rs index 454da83..051e956 100644 --- a/sub_crates/rmath/src/utils.rs +++ b/sub_crates/rmath/src/utils.rs @@ -2,7 +2,7 @@ const TOP_BIT: u32 = 1 << 31; /// Compute how different two floats are in ulps. #[inline(always)] -pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 { +pub fn ulp_diff(a: f32, b: f32) -> u32 { let a = a.to_bits(); let b = b.to_bits(); @@ -20,7 +20,7 @@ pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 { /// Checks if two floats are approximately equal, within `max_ulps`. #[inline(always)] -pub(crate) fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool { +pub fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool { !a.is_nan() && !b.is_nan() && (ulp_diff(a, b) <= max_ulps) } diff --git a/sub_crates/rmath/src/wide4/mod.rs b/sub_crates/rmath/src/wide4/mod.rs index 7fcdeb6..eca076a 100644 --- a/sub_crates/rmath/src/wide4/mod.rs +++ b/sub_crates/rmath/src/wide4/mod.rs @@ -100,6 +100,33 @@ impl Float4 { } } + /// Invert a 3x3 matrix, and also return the computed determinant. + /// + /// Returns `None` if not invertible. + #[inline] + pub fn invert_3x3_w_det(m: &[Self; 3]) -> Option<([Self; 3], f32)> { + let m0_bca = m[0].bcad(); + let m1_bca = m[1].bcad(); + let m2_bca = m[2].bcad(); + let m0_cab = m[0].cabd(); + let m1_cab = m[1].cabd(); + let m2_cab = m[2].cabd(); + let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca); + let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca); + let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca); + + let det = Self::dot_3( + Self::new(abc.a(), def.a(), ghi.a(), 0.0), + Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0), + ); + + if det == 0.0 { + None + } else { + Some((Self::transpose_3x3(&[abc / det, def / det, ghi / det]), det)) + } + } + /// Faster but less precise version of `invert_3x3()`. #[inline] pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {