diff --git a/sub_crates/oct32norm/benches/bench.rs b/sub_crates/oct32norm/benches/bench.rs index 18a6940..012d7fb 100644 --- a/sub_crates/oct32norm/benches/bench.rs +++ b/sub_crates/oct32norm/benches/bench.rs @@ -1,26 +1,38 @@ use bencher::{benchmark_group, benchmark_main, black_box, Bencher}; -use oct32norm::{decode, encode}; +use oct32norm::{decode, encode, encode_precise}; use rand::{rngs::SmallRng, FromEntropy, Rng}; //---- -fn encode_100_values(bench: &mut Bencher) { +fn encode_1000_values(bench: &mut Bencher) { let mut rng = SmallRng::from_entropy(); bench.iter(|| { let x = rng.gen::() - 0.5; let y = rng.gen::() - 0.5; let z = rng.gen::() - 0.5; - for _ in 0..100 { + for _ in 0..1000 { black_box(encode(black_box((x, y, z)))); } }); } -fn decode_100_values(bench: &mut Bencher) { +fn encode_precise_1000_values(bench: &mut Bencher) { + let mut rng = SmallRng::from_entropy(); + bench.iter(|| { + let x = rng.gen::() - 0.5; + let y = rng.gen::() - 0.5; + let z = rng.gen::() - 0.5; + for _ in 0..1000 { + black_box(encode_precise(black_box((x, y, z)))); + } + }); +} + +fn decode_1000_values(bench: &mut Bencher) { let mut rng = SmallRng::from_entropy(); bench.iter(|| { let v = rng.gen::(); - for _ in 0..100 { + for _ in 0..1000 { black_box(decode(black_box(v))); } }); @@ -28,5 +40,10 @@ fn decode_100_values(bench: &mut Bencher) { //---- -benchmark_group!(benches, encode_100_values, decode_100_values,); +benchmark_group!( + benches, + encode_1000_values, + encode_precise_1000_values, + decode_1000_values, +); benchmark_main!(benches); diff --git a/sub_crates/oct32norm/src/lib.rs b/sub_crates/oct32norm/src/lib.rs index 46c1086..6f54ebb 100644 --- a/sub_crates/oct32norm/src/lib.rs +++ b/sub_crates/oct32norm/src/lib.rs @@ -4,29 +4,60 @@ //! of Efficient Representations for Independent Unit Vectors" by //! Cigolle et al. +const STEP_SIZE: f32 = 1.0 / STEPS; +const STEPS: f32 = ((1 << (16 - 1)) - 1) as f32; + /// Encodes a vector of three floats to the oct32 format. /// /// The input vector does not need to be normalized--only the direction /// matters to the encoding process, not the length. #[inline] pub fn encode(vec: (f32, f32, f32)) -> u32 { - let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs(); - let v0_norm = vec.0 / l1_norm; - let v1_norm = vec.1 / l1_norm; + let (u, v) = vec3_to_oct(vec); + ((to_snorm_16(u) as u32) << 16) | to_snorm_16(v) as u32 +} - let (u, v) = if vec.2 < 0.0 { - ( - u32::from(to_snorm_16((1.0 - v1_norm.abs()) * sign(vec.0))), - u32::from(to_snorm_16((1.0 - v0_norm.abs()) * sign(vec.1))), - ) - } else { - ( - u32::from(to_snorm_16(v0_norm)), - u32::from(to_snorm_16(v1_norm)), - ) +/// Encodes a vector of three floats to the oct32 format. +/// +/// This is the same as `encode()` except that it is slower and encodes +/// with slightly better precision. +pub fn encode_precise(vec: (f32, f32, f32)) -> u32 { + #[inline(always)] + fn dot_norm(a: (f32, f32, f32), b: (f32, f32, f32)) -> f64 { + let l = ((a.0 as f64 * a.0 as f64) + (a.1 as f64 * a.1 as f64) + (a.2 as f64 * a.2 as f64)) + .sqrt(); + ((a.0 as f64 * b.0 as f64) + (a.1 as f64 * b.1 as f64) + (a.2 as f64 * b.2 as f64)) / l + } + + // Calculate the initial floored version. + let s = { + let mut s = vec3_to_oct(vec); // Remap to the square. + s.0 = (s.0.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE; + s.1 = (s.1.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE; + s }; - (u << 16) | v + // Test all combinations of floor and ceil and keep the best. + // Note that at +/- 1, this will exit the square, but that + // will be a worse encoding and never win. + let mut best_rep = s; + let mut max_dot = 0.0; + for &(i, j) in &[ + (0.0, 0.0), + (0.0, STEP_SIZE), + (STEP_SIZE, 0.0), + (STEP_SIZE, STEP_SIZE), + ] { + let candidate = (s.0 + i, s.1 + j); + let oct = oct_to_vec3(candidate); + let dot = dot_norm(oct, vec); + if dot > max_dot { + best_rep = candidate; + max_dot = dot; + } + } + + ((to_snorm_16(best_rep.0) as u32) << 16) | to_snorm_16(best_rep.1) as u32 } /// Decodes from an oct32 to a vector of three floats. @@ -35,27 +66,45 @@ pub fn encode(vec: (f32, f32, f32)) -> u32 { /// needs a normalized vector should normalize the returned vector. #[inline] pub fn decode(n: u32) -> (f32, f32, f32) { - let mut vec0 = from_snorm_16((n >> 16) as u16); - let mut vec1 = from_snorm_16(n as u16); - let vec2 = 1.0 - (vec0.abs() + vec1.abs()); + oct_to_vec3((from_snorm_16((n >> 16) as u16), from_snorm_16(n as u16))) +} + +#[inline(always)] +fn vec3_to_oct(vec: (f32, f32, f32)) -> (f32, f32) { + let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs(); + let u = vec.0 / l1_norm; + let v = vec.1 / l1_norm; + + if vec.2 > 0.0 { + (u, v) + } else { + ((1.0 - v.abs()) * sign(vec.0), (1.0 - u.abs()) * sign(vec.1)) + } +} + +#[inline(always)] +fn oct_to_vec3(oct: (f32, f32)) -> (f32, f32, f32) { + let vec2 = 1.0 - (oct.0.abs() + oct.1.abs()); if vec2 < 0.0 { - let old_x = vec0; - vec0 = (1.0 - vec1.abs()) * sign(old_x); - vec1 = (1.0 - old_x.abs()) * sign(vec1); + ( + (1.0 - oct.1.abs()) * sign(oct.0), + (1.0 - oct.0.abs()) * sign(oct.1), + vec2, + ) + } else { + (oct.0, oct.1, vec2) } - - (vec0, vec1, vec2) } #[inline(always)] fn to_snorm_16(n: f32) -> u16 { - (n * ((1u32 << (16 - 1)) - 1) as f32).round() as i16 as u16 + (n * STEPS).round() as i16 as u16 } #[inline(always)] fn from_snorm_16(n: u16) -> f32 { - f32::from(n as i16) * (1.0f32 / ((1u32 << (16 - 1)) - 1) as f32) + f32::from(n as i16) * STEP_SIZE } #[inline(always)] @@ -75,21 +124,27 @@ mod tests { fn axis_directions() { let px = (1.0, 0.0, 0.0); let px_oct = encode(px); + let px_octp = encode_precise(px); let nx = (-1.0, 0.0, 0.0); let nx_oct = encode(nx); + let nx_octp = encode_precise(nx); let py = (0.0, 1.0, 0.0); let py_oct = encode(py); + let py_octp = encode_precise(py); let ny = (0.0, -1.0, 0.0); let ny_oct = encode(ny); + let ny_octp = encode_precise(ny); let pz = (0.0, 0.0, 1.0); let pz_oct = encode(pz); + let pz_octp = encode_precise(pz); let nz = (0.0, 0.0, -1.0); let nz_oct = encode(nz); + let nz_octp = encode_precise(nz); assert_eq!(px, decode(px_oct)); assert_eq!(nx, decode(nx_oct)); @@ -97,5 +152,12 @@ mod tests { assert_eq!(ny, decode(ny_oct)); assert_eq!(pz, decode(pz_oct)); assert_eq!(nz, decode(nz_oct)); + + assert_eq!(px, decode(px_octp)); + assert_eq!(nx, decode(nx_octp)); + assert_eq!(py, decode(py_octp)); + assert_eq!(ny, decode(ny_octp)); + assert_eq!(pz, decode(pz_octp)); + assert_eq!(nz, decode(nz_octp)); } } diff --git a/sub_crates/oct32norm/tests/proptest_tests.rs b/sub_crates/oct32norm/tests/proptest_tests.rs index 7cf8d71..3234055 100644 --- a/sub_crates/oct32norm/tests/proptest_tests.rs +++ b/sub_crates/oct32norm/tests/proptest_tests.rs @@ -2,7 +2,7 @@ extern crate proptest; extern crate oct32norm; -use oct32norm::{decode, encode}; +use oct32norm::{decode, encode, encode_precise}; use proptest::test_runner::Config; /// Calculates the cosine of the angle between the two vectors, @@ -56,18 +56,23 @@ proptest! { #[test] fn pt_roundtrip_angle_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) { let oct = encode(v); - let v2 = decode(oct); + let octp = encode_precise(v); // Check if the angle between the original and the roundtrip // is less than 0.004 degrees - assert!(cos_gt(v, v2, 0.9999999976)); + assert!(cos_gt(v, decode(oct), 0.9999999976)); + + // Check if the angle between the original and the roundtrip + // is less than 0.003 degrees + assert!(cos_gt(v, decode(octp), 0.9999999986)); } #[test] fn pt_roundtrip_component_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) { let oct = encode(v); - let v2 = decode(oct); + let octp = encode_precise(v); - assert!(l1_delta_lt(v, v2, 0.00005)); + assert!(l1_delta_lt(v, decode(oct), 0.00005)); + assert!(l1_delta_lt(v, decode(octp), 0.00003)); } }