Benchmarks and precision tests for RMath sub-crate.
This commit is contained in:
parent
167d70b8df
commit
1f7c412e25
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -560,6 +560,10 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "rmath"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bencher",
|
||||
"rand 0.6.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-serialize"
|
||||
|
|
|
@ -8,3 +8,11 @@ license = "MIT, Apache 2.0"
|
|||
[lib]
|
||||
name = "rmath"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dev-dependencies]
|
||||
bencher = "0.1.5"
|
||||
rand = "0.6"
|
||||
|
||||
[[bench]]
|
||||
name = "bench"
|
||||
harness = false
|
202
sub_crates/rmath/benches/bench.rs
Normal file
202
sub_crates/rmath/benches/bench.rs
Normal file
|
@ -0,0 +1,202 @@
|
|||
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
|
||||
use rand::{rngs::SmallRng, FromEntropy, Rng};
|
||||
use rmath::{CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull};
|
||||
|
||||
//----
|
||||
|
||||
fn vector_cross_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(v1).cross(black_box(v2)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn vector_dot_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(v1).dot(black_box(v2)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_vector_mul_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let v = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let x = Xform::new(
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
);
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(v).xform(black_box(&x)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_point_mul_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let x = Xform::new(
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
);
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(p).xform(black_box(&x)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_point_mul_inv_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let x = Xform::new(
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
)
|
||||
.to_full()
|
||||
.unwrap();
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(p).xform_inv(black_box(&x)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_normal_mul_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let n = Normal::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
|
||||
let x = Xform::new(
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
1.0,
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
)
|
||||
.to_full()
|
||||
.unwrap();
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(n).xform(black_box(&x)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_xform_mul_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let x1 = Xform::new(
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
);
|
||||
let x2 = Xform::new(
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
);
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(x1).compose(black_box(&x2)));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn xform_to_xformfull_10000(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let x = Xform::new(
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
rng.gen::<f32>(),
|
||||
);
|
||||
for _ in 0..10000 {
|
||||
black_box(black_box(x).to_full());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
//----
|
||||
|
||||
benchmark_group!(
|
||||
benches,
|
||||
vector_cross_10000,
|
||||
vector_dot_10000,
|
||||
xform_vector_mul_10000,
|
||||
xform_point_mul_10000,
|
||||
xform_point_mul_inv_10000,
|
||||
xform_normal_mul_10000,
|
||||
xform_xform_mul_10000,
|
||||
xform_to_xformfull_10000,
|
||||
);
|
||||
benchmark_main!(benches);
|
268
sub_crates/rmath/examples/precision.rs
Normal file
268
sub_crates/rmath/examples/precision.rs
Normal file
|
@ -0,0 +1,268 @@
|
|||
use rand::{rngs::SmallRng, FromEntropy, Rng};
|
||||
|
||||
use rmath::{utils::ulp_diff, wide4::Float4};
|
||||
|
||||
type D4 = [f64; 4];
|
||||
|
||||
fn main() {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
|
||||
// Convenience functions for generating random Float4's.
|
||||
let mut rf4 = || {
|
||||
let mut rf = || {
|
||||
let range = 268435456.0;
|
||||
let n = rng.gen::<f64>();
|
||||
((n * range * 2.0) - range) as f32
|
||||
};
|
||||
Float4::new(rf(), rf(), rf(), rf())
|
||||
};
|
||||
|
||||
// Dot product test.
|
||||
println!("Dot product:");
|
||||
{
|
||||
let mut max_ulp_diff = 0u32;
|
||||
for _ in 0..10000000 {
|
||||
let v1 = rf4();
|
||||
let v2 = rf4();
|
||||
|
||||
let dpa = Float4::dot_3(v1, v2);
|
||||
let dpb = dot_3(f4_to_d4(v1), f4_to_d4(v2));
|
||||
|
||||
let ud = ulp_diff(dpa, dpb as f32);
|
||||
max_ulp_diff = max_ulp_diff.max(ud);
|
||||
}
|
||||
|
||||
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
|
||||
}
|
||||
|
||||
// Cross product test.
|
||||
println!("Cross product:");
|
||||
{
|
||||
let mut max_ulp_diff = [0u32; 4];
|
||||
for _ in 0..10000000 {
|
||||
let v1 = rf4();
|
||||
let v2 = rf4();
|
||||
|
||||
let v3a = Float4::cross_3(v1, v2);
|
||||
let v3b = cross_3(f4_to_d4(v1), f4_to_d4(v2));
|
||||
|
||||
let ud = ulp_diff_f4d4(v3a, v3b);
|
||||
for i in 0..4 {
|
||||
max_ulp_diff[i] = max_ulp_diff[i].max(ud[i]);
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
|
||||
}
|
||||
|
||||
// Matrix inversion test.
|
||||
println!("Matrix inversion:");
|
||||
{
|
||||
let mut max_ulp_diff = [[0u32; 4]; 3];
|
||||
let mut det_ulp_hist = [0u32; 9];
|
||||
for _ in 0..2000000 {
|
||||
let m = [rf4(), rf4(), rf4()];
|
||||
let ima = Float4::invert_3x3_w_det(&m);
|
||||
let imb = invert_3x3([f4_to_d4(m[0]), f4_to_d4(m[1]), f4_to_d4(m[2])]);
|
||||
|
||||
if let (Some((ima, deta)), Some((imb, detb))) = (ima, imb) {
|
||||
let det_ulp_diff = ulp_diff(deta, detb as f32);
|
||||
let mut hist_upper = 0;
|
||||
for i in 0..det_ulp_hist.len() {
|
||||
if det_ulp_diff <= hist_upper {
|
||||
det_ulp_hist[i] += 1;
|
||||
break;
|
||||
}
|
||||
if hist_upper == 0 {
|
||||
hist_upper += 1;
|
||||
} else {
|
||||
hist_upper *= 10;
|
||||
}
|
||||
}
|
||||
|
||||
if det_ulp_diff == 0 {
|
||||
for i in 0..3 {
|
||||
let ud = ulp_diff_f4d4(ima[i], imb[i]);
|
||||
for j in 0..4 {
|
||||
max_ulp_diff[i][j] = max_ulp_diff[i][j].max(ud[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
" Max error when determinant has 0-ulp error (ulps):\n {:?}",
|
||||
max_ulp_diff
|
||||
);
|
||||
|
||||
let total: u32 = det_ulp_hist.iter().sum();
|
||||
let mut ulp = 0;
|
||||
let mut sum = 0;
|
||||
println!(" Determinant error distribution:");
|
||||
for h in det_ulp_hist.iter() {
|
||||
sum += *h;
|
||||
println!(
|
||||
" {:.8}% <= {} ulps",
|
||||
sum as f64 / total as f64 * 100.0,
|
||||
ulp
|
||||
);
|
||||
if ulp == 0 {
|
||||
ulp += 1;
|
||||
} else {
|
||||
ulp *= 10;
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------
|
||||
|
||||
fn f4_to_d4(v: Float4) -> D4 {
|
||||
[v.a() as f64, v.b() as f64, v.c() as f64, v.d() as f64]
|
||||
}
|
||||
|
||||
fn ulp_diff_f4d4(a: Float4, b: D4) -> [u32; 4] {
|
||||
[
|
||||
ulp_diff(a.a(), b[0] as f32),
|
||||
ulp_diff(a.b(), b[1] as f32),
|
||||
ulp_diff(a.c(), b[2] as f32),
|
||||
ulp_diff(a.d(), b[3] as f32),
|
||||
]
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------
|
||||
|
||||
fn dot_3(a: D4, b: D4) -> f64 {
|
||||
// Products.
|
||||
let (x, x_err) = two_prod(a[0], b[0]);
|
||||
let (y, y_err) = two_prod(a[1], b[1]);
|
||||
let (z, z_err) = two_prod(a[2], b[2]);
|
||||
|
||||
// Sums.
|
||||
let (s1, s1_err) = two_sum(x, y);
|
||||
let err1 = x_err + (y_err + s1_err);
|
||||
|
||||
let (s2, s2_err) = two_sum(s1, z);
|
||||
let err2 = z_err + (err1 + s2_err);
|
||||
|
||||
// Final result with rounding error compensation.
|
||||
s2 + err2
|
||||
}
|
||||
|
||||
fn cross_3(a: D4, b: D4) -> D4 {
|
||||
[
|
||||
difference_of_products(a[1], b[2], a[2], b[1]),
|
||||
difference_of_products(a[2], b[0], a[0], b[2]),
|
||||
difference_of_products(a[0], b[1], a[1], b[0]),
|
||||
difference_of_products(a[3], b[3], a[3], b[3]),
|
||||
]
|
||||
}
|
||||
|
||||
fn invert_3x3(m: [D4; 3]) -> Option<([D4; 3], f64)> {
|
||||
let m0_bca = [m[0][1], m[0][2], m[0][0], m[0][3]];
|
||||
let m1_bca = [m[1][1], m[1][2], m[1][0], m[1][3]];
|
||||
let m2_bca = [m[2][1], m[2][2], m[2][0], m[2][3]];
|
||||
let m0_cab = [m[0][2], m[0][0], m[0][1], m[0][3]];
|
||||
let m1_cab = [m[1][2], m[1][0], m[1][1], m[1][3]];
|
||||
let m2_cab = [m[2][2], m[2][0], m[2][1], m[2][3]];
|
||||
|
||||
let abc = [
|
||||
difference_of_products(m1_bca[0], m2_cab[0], m1_cab[0], m2_bca[0]),
|
||||
difference_of_products(m1_bca[1], m2_cab[1], m1_cab[1], m2_bca[1]),
|
||||
difference_of_products(m1_bca[2], m2_cab[2], m1_cab[2], m2_bca[2]),
|
||||
difference_of_products(m1_bca[3], m2_cab[3], m1_cab[3], m2_bca[3]),
|
||||
];
|
||||
let def = [
|
||||
difference_of_products(m2_bca[0], m0_cab[0], m2_cab[0], m0_bca[0]),
|
||||
difference_of_products(m2_bca[1], m0_cab[1], m2_cab[1], m0_bca[1]),
|
||||
difference_of_products(m2_bca[2], m0_cab[2], m2_cab[2], m0_bca[2]),
|
||||
difference_of_products(m2_bca[3], m0_cab[3], m2_cab[3], m0_bca[3]),
|
||||
];
|
||||
let ghi = [
|
||||
difference_of_products(m0_bca[0], m1_cab[0], m0_cab[0], m1_bca[0]),
|
||||
difference_of_products(m0_bca[1], m1_cab[1], m0_cab[1], m1_bca[1]),
|
||||
difference_of_products(m0_bca[2], m1_cab[2], m0_cab[2], m1_bca[2]),
|
||||
difference_of_products(m0_bca[3], m1_cab[3], m0_cab[3], m1_bca[3]),
|
||||
];
|
||||
|
||||
let det = dot_3(
|
||||
[abc[0], def[0], ghi[0], 0.0],
|
||||
[m[0][0], m[1][0], m[2][0], 0.0],
|
||||
);
|
||||
|
||||
if det == 0.0 {
|
||||
None
|
||||
} else {
|
||||
Some((
|
||||
[
|
||||
[abc[0] / det, def[0] / det, ghi[0] / det, 0.0],
|
||||
[abc[1] / det, def[1] / det, ghi[1] / det, 0.0],
|
||||
[abc[2] / det, def[2] / det, ghi[2] / det, 0.0],
|
||||
],
|
||||
// [
|
||||
// [abc[0], def[0], ghi[0], 0.0],
|
||||
// [abc[1], def[1], ghi[1], 0.0],
|
||||
// [abc[2], def[2], ghi[2], 0.0],
|
||||
// ],
|
||||
det,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn rel_diff(a: f64, b: f64) -> f64 {
|
||||
(a - b).abs() / a.abs().max(b.abs())
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------
|
||||
|
||||
/// `(a * b) - (c * d)` but done with high precision via floating point tricks.
|
||||
///
|
||||
/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats
|
||||
#[inline(always)]
|
||||
fn difference_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
let cd = c * d;
|
||||
let dop = a.mul_add(b, -cd);
|
||||
let err = (-c).mul_add(d, cd);
|
||||
dop + err
|
||||
}
|
||||
|
||||
/// `(a * b) + (c * d)` but done with high precision via floating point tricks.
|
||||
#[inline(always)]
|
||||
fn sum_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
let cd = c * d;
|
||||
let sop = a.mul_add(b, cd);
|
||||
let err = c.mul_add(d, -cd);
|
||||
sop + err
|
||||
}
|
||||
|
||||
/// `a * b` but also returns a rounding error for precise composition
|
||||
/// with other operations.
|
||||
#[inline(always)]
|
||||
fn two_prod(a: f64, b: f64) -> (f64, f64)
|
||||
// (product, rounding_err)
|
||||
{
|
||||
let ab = a * b;
|
||||
(ab, a.mul_add(b, -ab))
|
||||
}
|
||||
|
||||
/// `a + b` but also returns a rounding error for precise composition
|
||||
/// with other operations.
|
||||
#[inline(always)]
|
||||
fn two_sum(a: f64, b: f64) -> (f64, f64)
|
||||
// (sum, rounding_err)
|
||||
{
|
||||
let sum = a + b;
|
||||
let delta = sum - a;
|
||||
(sum, (a - (sum - delta)) + (b - delta))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn two_diff(a: f64, b: f64) -> (f64, f64)
|
||||
// (diff, rounding_err)
|
||||
{
|
||||
let diff = a - b;
|
||||
let delta = diff - a;
|
||||
(diff, (a - (diff - delta)) - (b + delta))
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
mod normal;
|
||||
mod point;
|
||||
mod sealed;
|
||||
mod utils;
|
||||
pub mod utils;
|
||||
mod vector;
|
||||
pub mod wide4;
|
||||
mod xform;
|
||||
|
|
|
@ -2,7 +2,7 @@ const TOP_BIT: u32 = 1 << 31;
|
|||
|
||||
/// Compute how different two floats are in ulps.
|
||||
#[inline(always)]
|
||||
pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 {
|
||||
pub fn ulp_diff(a: f32, b: f32) -> u32 {
|
||||
let a = a.to_bits();
|
||||
let b = b.to_bits();
|
||||
|
||||
|
@ -20,7 +20,7 @@ pub(crate) fn ulp_diff(a: f32, b: f32) -> u32 {
|
|||
|
||||
/// Checks if two floats are approximately equal, within `max_ulps`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool {
|
||||
pub fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool {
|
||||
!a.is_nan() && !b.is_nan() && (ulp_diff(a, b) <= max_ulps)
|
||||
}
|
||||
|
||||
|
|
|
@ -100,6 +100,33 @@ impl Float4 {
|
|||
}
|
||||
}
|
||||
|
||||
/// Invert a 3x3 matrix, and also return the computed determinant.
|
||||
///
|
||||
/// Returns `None` if not invertible.
|
||||
#[inline]
|
||||
pub fn invert_3x3_w_det(m: &[Self; 3]) -> Option<([Self; 3], f32)> {
|
||||
let m0_bca = m[0].bcad();
|
||||
let m1_bca = m[1].bcad();
|
||||
let m2_bca = m[2].bcad();
|
||||
let m0_cab = m[0].cabd();
|
||||
let m1_cab = m[1].cabd();
|
||||
let m2_cab = m[2].cabd();
|
||||
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
|
||||
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
|
||||
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
|
||||
|
||||
let det = Self::dot_3(
|
||||
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
|
||||
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
|
||||
);
|
||||
|
||||
if det == 0.0 {
|
||||
None
|
||||
} else {
|
||||
Some((Self::transpose_3x3(&[abc / det, def / det, ghi / det]), det))
|
||||
}
|
||||
}
|
||||
|
||||
/// Faster but less precise version of `invert_3x3()`.
|
||||
#[inline]
|
||||
pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {
|
||||
|
|
Loading…
Reference in New Issue
Block a user