Benchmarks and precision tests for RMath sub-crate.

This commit is contained in:
Nathan Vegdahl 2022-08-03 17:10:27 -07:00
parent 167d70b8df
commit 1f7c412e25
7 changed files with 512 additions and 3 deletions

4
Cargo.lock generated
View File

@ -560,6 +560,10 @@ dependencies = [
[[package]] [[package]]
name = "rmath" name = "rmath"
version = "0.1.0" version = "0.1.0"
dependencies = [
"bencher",
"rand 0.6.5",
]
[[package]] [[package]]
name = "rustc-serialize" name = "rustc-serialize"

View File

@ -8,3 +8,11 @@ license = "MIT, Apache 2.0"
[lib] [lib]
name = "rmath" name = "rmath"
path = "src/lib.rs" path = "src/lib.rs"
[dev-dependencies]
bencher = "0.1.5"
rand = "0.6"
[[bench]]
name = "bench"
harness = false

View File

@ -0,0 +1,202 @@
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
use rand::{rngs::SmallRng, FromEntropy, Rng};
use rmath::{CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull};
//----
fn vector_cross_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
for _ in 0..10000 {
black_box(black_box(v1).cross(black_box(v2)));
}
});
}
fn vector_dot_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
for _ in 0..10000 {
black_box(black_box(v1).dot(black_box(v2)));
}
});
}
fn xform_vector_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(v).xform(black_box(&x)));
}
});
}
fn xform_point_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(p).xform(black_box(&x)));
}
});
}
fn xform_point_mul_inv_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
)
.to_full()
.unwrap();
for _ in 0..10000 {
black_box(black_box(p).xform_inv(black_box(&x)));
}
});
}
fn xform_normal_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let n = Normal::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
)
.to_full()
.unwrap();
for _ in 0..10000 {
black_box(black_box(n).xform(black_box(&x)));
}
});
}
fn xform_xform_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let x1 = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
let x2 = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(x1).compose(black_box(&x2)));
}
});
}
fn xform_to_xformfull_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(x).to_full());
}
});
}
//----
benchmark_group!(
benches,
vector_cross_10000,
vector_dot_10000,
xform_vector_mul_10000,
xform_point_mul_10000,
xform_point_mul_inv_10000,
xform_normal_mul_10000,
xform_xform_mul_10000,
xform_to_xformfull_10000,
);
benchmark_main!(benches);

View File

@ -0,0 +1,268 @@
use rand::{rngs::SmallRng, FromEntropy, Rng};
use rmath::{utils::ulp_diff, wide4::Float4};
type D4 = [f64; 4];
fn main() {
let mut rng = SmallRng::from_entropy();
// Convenience functions for generating random Float4's.
let mut rf4 = || {
let mut rf = || {
let range = 268435456.0;
let n = rng.gen::<f64>();
((n * range * 2.0) - range) as f32
};
Float4::new(rf(), rf(), rf(), rf())
};
// Dot product test.
println!("Dot product:");
{
let mut max_ulp_diff = 0u32;
for _ in 0..10000000 {
let v1 = rf4();
let v2 = rf4();
let dpa = Float4::dot_3(v1, v2);
let dpb = dot_3(f4_to_d4(v1), f4_to_d4(v2));
let ud = ulp_diff(dpa, dpb as f32);
max_ulp_diff = max_ulp_diff.max(ud);
}
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
}
// Cross product test.
println!("Cross product:");
{
let mut max_ulp_diff = [0u32; 4];
for _ in 0..10000000 {
let v1 = rf4();
let v2 = rf4();
let v3a = Float4::cross_3(v1, v2);
let v3b = cross_3(f4_to_d4(v1), f4_to_d4(v2));
let ud = ulp_diff_f4d4(v3a, v3b);
for i in 0..4 {
max_ulp_diff[i] = max_ulp_diff[i].max(ud[i]);
}
}
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
}
// Matrix inversion test.
println!("Matrix inversion:");
{
let mut max_ulp_diff = [[0u32; 4]; 3];
let mut det_ulp_hist = [0u32; 9];
for _ in 0..2000000 {
let m = [rf4(), rf4(), rf4()];
let ima = Float4::invert_3x3_w_det(&m);
let imb = invert_3x3([f4_to_d4(m[0]), f4_to_d4(m[1]), f4_to_d4(m[2])]);
if let (Some((ima, deta)), Some((imb, detb))) = (ima, imb) {
let det_ulp_diff = ulp_diff(deta, detb as f32);
let mut hist_upper = 0;
for i in 0..det_ulp_hist.len() {
if det_ulp_diff <= hist_upper {
det_ulp_hist[i] += 1;
break;
}
if hist_upper == 0 {
hist_upper += 1;
} else {
hist_upper *= 10;
}
}
if det_ulp_diff == 0 {
for i in 0..3 {
let ud = ulp_diff_f4d4(ima[i], imb[i]);
for j in 0..4 {
max_ulp_diff[i][j] = max_ulp_diff[i][j].max(ud[j]);
}
}
}
}
}
println!(
" Max error when determinant has 0-ulp error (ulps):\n {:?}",
max_ulp_diff
);
let total: u32 = det_ulp_hist.iter().sum();
let mut ulp = 0;
let mut sum = 0;
println!(" Determinant error distribution:");
for h in det_ulp_hist.iter() {
sum += *h;
println!(
" {:.8}% <= {} ulps",
sum as f64 / total as f64 * 100.0,
ulp
);
if ulp == 0 {
ulp += 1;
} else {
ulp *= 10;
}
}
println!();
}
}
//-------------------------------------------------------------
fn f4_to_d4(v: Float4) -> D4 {
[v.a() as f64, v.b() as f64, v.c() as f64, v.d() as f64]
}
fn ulp_diff_f4d4(a: Float4, b: D4) -> [u32; 4] {
[
ulp_diff(a.a(), b[0] as f32),
ulp_diff(a.b(), b[1] as f32),
ulp_diff(a.c(), b[2] as f32),
ulp_diff(a.d(), b[3] as f32),
]
}
//-------------------------------------------------------------
fn dot_3(a: D4, b: D4) -> f64 {
// Products.
let (x, x_err) = two_prod(a[0], b[0]);
let (y, y_err) = two_prod(a[1], b[1]);
let (z, z_err) = two_prod(a[2], b[2]);
// Sums.
let (s1, s1_err) = two_sum(x, y);
let err1 = x_err + (y_err + s1_err);
let (s2, s2_err) = two_sum(s1, z);
let err2 = z_err + (err1 + s2_err);
// Final result with rounding error compensation.
s2 + err2
}
fn cross_3(a: D4, b: D4) -> D4 {
[
difference_of_products(a[1], b[2], a[2], b[1]),
difference_of_products(a[2], b[0], a[0], b[2]),
difference_of_products(a[0], b[1], a[1], b[0]),
difference_of_products(a[3], b[3], a[3], b[3]),
]
}
fn invert_3x3(m: [D4; 3]) -> Option<([D4; 3], f64)> {
let m0_bca = [m[0][1], m[0][2], m[0][0], m[0][3]];
let m1_bca = [m[1][1], m[1][2], m[1][0], m[1][3]];
let m2_bca = [m[2][1], m[2][2], m[2][0], m[2][3]];
let m0_cab = [m[0][2], m[0][0], m[0][1], m[0][3]];
let m1_cab = [m[1][2], m[1][0], m[1][1], m[1][3]];
let m2_cab = [m[2][2], m[2][0], m[2][1], m[2][3]];
let abc = [
difference_of_products(m1_bca[0], m2_cab[0], m1_cab[0], m2_bca[0]),
difference_of_products(m1_bca[1], m2_cab[1], m1_cab[1], m2_bca[1]),
difference_of_products(m1_bca[2], m2_cab[2], m1_cab[2], m2_bca[2]),
difference_of_products(m1_bca[3], m2_cab[3], m1_cab[3], m2_bca[3]),
];
let def = [
difference_of_products(m2_bca[0], m0_cab[0], m2_cab[0], m0_bca[0]),
difference_of_products(m2_bca[1], m0_cab[1], m2_cab[1], m0_bca[1]),
difference_of_products(m2_bca[2], m0_cab[2], m2_cab[2], m0_bca[2]),
difference_of_products(m2_bca[3], m0_cab[3], m2_cab[3], m0_bca[3]),
];
let ghi = [
difference_of_products(m0_bca[0], m1_cab[0], m0_cab[0], m1_bca[0]),
difference_of_products(m0_bca[1], m1_cab[1], m0_cab[1], m1_bca[1]),
difference_of_products(m0_bca[2], m1_cab[2], m0_cab[2], m1_bca[2]),
difference_of_products(m0_bca[3], m1_cab[3], m0_cab[3], m1_bca[3]),
];
let det = dot_3(
[abc[0], def[0], ghi[0], 0.0],
[m[0][0], m[1][0], m[2][0], 0.0],
);
if det == 0.0 {
None
} else {
Some((
[
[abc[0] / det, def[0] / det, ghi[0] / det, 0.0],
[abc[1] / det, def[1] / det, ghi[1] / det, 0.0],
[abc[2] / det, def[2] / det, ghi[2] / det, 0.0],
],
// [
// [abc[0], def[0], ghi[0], 0.0],
// [abc[1], def[1], ghi[1], 0.0],
// [abc[2], def[2], ghi[2], 0.0],
// ],
det,
))
}
}
fn rel_diff(a: f64, b: f64) -> f64 {
(a - b).abs() / a.abs().max(b.abs())
}
//-------------------------------------------------------------
/// `(a * b) - (c * d)` but done with high precision via floating point tricks.
///
/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats
#[inline(always)]
fn difference_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
let cd = c * d;
let dop = a.mul_add(b, -cd);
let err = (-c).mul_add(d, cd);
dop + err
}
/// `(a * b) + (c * d)` but done with high precision via floating point tricks.
#[inline(always)]
fn sum_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
let cd = c * d;
let sop = a.mul_add(b, cd);
let err = c.mul_add(d, -cd);
sop + err
}
/// `a * b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_prod(a: f64, b: f64) -> (f64, f64)
// (product, rounding_err)
{
let ab = a * b;
(ab, a.mul_add(b, -ab))
}
/// `a + b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_sum(a: f64, b: f64) -> (f64, f64)
// (sum, rounding_err)
{
let sum = a + b;
let delta = sum - a;
(sum, (a - (sum - delta)) + (b - delta))
}
#[inline(always)]
fn two_diff(a: f64, b: f64) -> (f64, f64)
// (diff, rounding_err)
{
let diff = a - b;
let delta = diff - a;
(diff, (a - (diff - delta)) - (b + delta))
}

View File

@ -5,7 +5,7 @@
mod normal; mod normal;
mod point; mod point;
mod sealed; mod sealed;
mod utils; pub mod utils;
mod vector; mod vector;
pub mod wide4; pub mod wide4;
mod xform; mod xform;

View File

@ -2,7 +2,7 @@ const TOP_BIT: u32 = 1 << 31;
/// Compute how different two floats are in ulps. /// Compute how different two floats are in ulps.
#[inline(always)] #[inline(always)]
pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 { pub fn ulp_diff(a: f32, b: f32) -> u32 {
let a = a.to_bits(); let a = a.to_bits();
let b = b.to_bits(); let b = b.to_bits();
@ -20,7 +20,7 @@ pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 {
/// Checks if two floats are approximately equal, within `max_ulps`. /// Checks if two floats are approximately equal, within `max_ulps`.
#[inline(always)] #[inline(always)]
pub(crate) fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool { pub fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool {
!a.is_nan() && !b.is_nan() && (ulp_diff(a, b) <= max_ulps) !a.is_nan() && !b.is_nan() && (ulp_diff(a, b) <= max_ulps)
} }

View File

@ -100,6 +100,33 @@ impl Float4 {
} }
} }
/// Invert a 3x3 matrix, and also return the computed determinant.
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3_w_det(m: &[Self; 3]) -> Option<([Self; 3], f32)> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
let det = Self::dot_3(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some((Self::transpose_3x3(&[abc / det, def / det, ghi / det]), det))
}
}
/// Faster but less precise version of `invert_3x3()`. /// Faster but less precise version of `invert_3x3()`.
#[inline] #[inline]
pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> { pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {