Add oct32 encode_precise()
function.
This is significantly slower than `encode()`, but a little more precise.
This commit is contained in:
parent
8c738b2f39
commit
0ba1acc42d
|
@ -1,26 +1,38 @@
|
||||||
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
|
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
|
||||||
use oct32norm::{decode, encode};
|
use oct32norm::{decode, encode, encode_precise};
|
||||||
use rand::{rngs::SmallRng, FromEntropy, Rng};
|
use rand::{rngs::SmallRng, FromEntropy, Rng};
|
||||||
|
|
||||||
//----
|
//----
|
||||||
|
|
||||||
fn encode_100_values(bench: &mut Bencher) {
|
fn encode_1000_values(bench: &mut Bencher) {
|
||||||
let mut rng = SmallRng::from_entropy();
|
let mut rng = SmallRng::from_entropy();
|
||||||
bench.iter(|| {
|
bench.iter(|| {
|
||||||
let x = rng.gen::<f32>() - 0.5;
|
let x = rng.gen::<f32>() - 0.5;
|
||||||
let y = rng.gen::<f32>() - 0.5;
|
let y = rng.gen::<f32>() - 0.5;
|
||||||
let z = rng.gen::<f32>() - 0.5;
|
let z = rng.gen::<f32>() - 0.5;
|
||||||
for _ in 0..100 {
|
for _ in 0..1000 {
|
||||||
black_box(encode(black_box((x, y, z))));
|
black_box(encode(black_box((x, y, z))));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn decode_100_values(bench: &mut Bencher) {
|
fn encode_precise_1000_values(bench: &mut Bencher) {
|
||||||
|
let mut rng = SmallRng::from_entropy();
|
||||||
|
bench.iter(|| {
|
||||||
|
let x = rng.gen::<f32>() - 0.5;
|
||||||
|
let y = rng.gen::<f32>() - 0.5;
|
||||||
|
let z = rng.gen::<f32>() - 0.5;
|
||||||
|
for _ in 0..1000 {
|
||||||
|
black_box(encode_precise(black_box((x, y, z))));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_1000_values(bench: &mut Bencher) {
|
||||||
let mut rng = SmallRng::from_entropy();
|
let mut rng = SmallRng::from_entropy();
|
||||||
bench.iter(|| {
|
bench.iter(|| {
|
||||||
let v = rng.gen::<u32>();
|
let v = rng.gen::<u32>();
|
||||||
for _ in 0..100 {
|
for _ in 0..1000 {
|
||||||
black_box(decode(black_box(v)));
|
black_box(decode(black_box(v)));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -28,5 +40,10 @@ fn decode_100_values(bench: &mut Bencher) {
|
||||||
|
|
||||||
//----
|
//----
|
||||||
|
|
||||||
benchmark_group!(benches, encode_100_values, decode_100_values,);
|
benchmark_group!(
|
||||||
|
benches,
|
||||||
|
encode_1000_values,
|
||||||
|
encode_precise_1000_values,
|
||||||
|
decode_1000_values,
|
||||||
|
);
|
||||||
benchmark_main!(benches);
|
benchmark_main!(benches);
|
||||||
|
|
|
@ -4,29 +4,60 @@
|
||||||
//! of Efficient Representations for Independent Unit Vectors" by
|
//! of Efficient Representations for Independent Unit Vectors" by
|
||||||
//! Cigolle et al.
|
//! Cigolle et al.
|
||||||
|
|
||||||
|
const STEP_SIZE: f32 = 1.0 / STEPS;
|
||||||
|
const STEPS: f32 = ((1 << (16 - 1)) - 1) as f32;
|
||||||
|
|
||||||
/// Encodes a vector of three floats to the oct32 format.
|
/// Encodes a vector of three floats to the oct32 format.
|
||||||
///
|
///
|
||||||
/// The input vector does not need to be normalized--only the direction
|
/// The input vector does not need to be normalized--only the direction
|
||||||
/// matters to the encoding process, not the length.
|
/// matters to the encoding process, not the length.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn encode(vec: (f32, f32, f32)) -> u32 {
|
pub fn encode(vec: (f32, f32, f32)) -> u32 {
|
||||||
let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs();
|
let (u, v) = vec3_to_oct(vec);
|
||||||
let v0_norm = vec.0 / l1_norm;
|
((to_snorm_16(u) as u32) << 16) | to_snorm_16(v) as u32
|
||||||
let v1_norm = vec.1 / l1_norm;
|
}
|
||||||
|
|
||||||
let (u, v) = if vec.2 < 0.0 {
|
/// Encodes a vector of three floats to the oct32 format.
|
||||||
(
|
///
|
||||||
u32::from(to_snorm_16((1.0 - v1_norm.abs()) * sign(vec.0))),
|
/// This is the same as `encode()` except that it is slower and encodes
|
||||||
u32::from(to_snorm_16((1.0 - v0_norm.abs()) * sign(vec.1))),
|
/// with slightly better precision.
|
||||||
)
|
pub fn encode_precise(vec: (f32, f32, f32)) -> u32 {
|
||||||
} else {
|
#[inline(always)]
|
||||||
(
|
fn dot_norm(a: (f32, f32, f32), b: (f32, f32, f32)) -> f64 {
|
||||||
u32::from(to_snorm_16(v0_norm)),
|
let l = ((a.0 as f64 * a.0 as f64) + (a.1 as f64 * a.1 as f64) + (a.2 as f64 * a.2 as f64))
|
||||||
u32::from(to_snorm_16(v1_norm)),
|
.sqrt();
|
||||||
)
|
((a.0 as f64 * b.0 as f64) + (a.1 as f64 * b.1 as f64) + (a.2 as f64 * b.2 as f64)) / l
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the initial floored version.
|
||||||
|
let s = {
|
||||||
|
let mut s = vec3_to_oct(vec); // Remap to the square.
|
||||||
|
s.0 = (s.0.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE;
|
||||||
|
s.1 = (s.1.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE;
|
||||||
|
s
|
||||||
};
|
};
|
||||||
|
|
||||||
(u << 16) | v
|
// Test all combinations of floor and ceil and keep the best.
|
||||||
|
// Note that at +/- 1, this will exit the square, but that
|
||||||
|
// will be a worse encoding and never win.
|
||||||
|
let mut best_rep = s;
|
||||||
|
let mut max_dot = 0.0;
|
||||||
|
for &(i, j) in &[
|
||||||
|
(0.0, 0.0),
|
||||||
|
(0.0, STEP_SIZE),
|
||||||
|
(STEP_SIZE, 0.0),
|
||||||
|
(STEP_SIZE, STEP_SIZE),
|
||||||
|
] {
|
||||||
|
let candidate = (s.0 + i, s.1 + j);
|
||||||
|
let oct = oct_to_vec3(candidate);
|
||||||
|
let dot = dot_norm(oct, vec);
|
||||||
|
if dot > max_dot {
|
||||||
|
best_rep = candidate;
|
||||||
|
max_dot = dot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
((to_snorm_16(best_rep.0) as u32) << 16) | to_snorm_16(best_rep.1) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decodes from an oct32 to a vector of three floats.
|
/// Decodes from an oct32 to a vector of three floats.
|
||||||
|
@ -35,27 +66,45 @@ pub fn encode(vec: (f32, f32, f32)) -> u32 {
|
||||||
/// needs a normalized vector should normalize the returned vector.
|
/// needs a normalized vector should normalize the returned vector.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn decode(n: u32) -> (f32, f32, f32) {
|
pub fn decode(n: u32) -> (f32, f32, f32) {
|
||||||
let mut vec0 = from_snorm_16((n >> 16) as u16);
|
oct_to_vec3((from_snorm_16((n >> 16) as u16), from_snorm_16(n as u16)))
|
||||||
let mut vec1 = from_snorm_16(n as u16);
|
}
|
||||||
let vec2 = 1.0 - (vec0.abs() + vec1.abs());
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn vec3_to_oct(vec: (f32, f32, f32)) -> (f32, f32) {
|
||||||
|
let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs();
|
||||||
|
let u = vec.0 / l1_norm;
|
||||||
|
let v = vec.1 / l1_norm;
|
||||||
|
|
||||||
|
if vec.2 > 0.0 {
|
||||||
|
(u, v)
|
||||||
|
} else {
|
||||||
|
((1.0 - v.abs()) * sign(vec.0), (1.0 - u.abs()) * sign(vec.1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn oct_to_vec3(oct: (f32, f32)) -> (f32, f32, f32) {
|
||||||
|
let vec2 = 1.0 - (oct.0.abs() + oct.1.abs());
|
||||||
|
|
||||||
if vec2 < 0.0 {
|
if vec2 < 0.0 {
|
||||||
let old_x = vec0;
|
(
|
||||||
vec0 = (1.0 - vec1.abs()) * sign(old_x);
|
(1.0 - oct.1.abs()) * sign(oct.0),
|
||||||
vec1 = (1.0 - old_x.abs()) * sign(vec1);
|
(1.0 - oct.0.abs()) * sign(oct.1),
|
||||||
|
vec2,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(oct.0, oct.1, vec2)
|
||||||
}
|
}
|
||||||
|
|
||||||
(vec0, vec1, vec2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn to_snorm_16(n: f32) -> u16 {
|
fn to_snorm_16(n: f32) -> u16 {
|
||||||
(n * ((1u32 << (16 - 1)) - 1) as f32).round() as i16 as u16
|
(n * STEPS).round() as i16 as u16
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn from_snorm_16(n: u16) -> f32 {
|
fn from_snorm_16(n: u16) -> f32 {
|
||||||
f32::from(n as i16) * (1.0f32 / ((1u32 << (16 - 1)) - 1) as f32)
|
f32::from(n as i16) * STEP_SIZE
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
|
@ -75,21 +124,27 @@ mod tests {
|
||||||
fn axis_directions() {
|
fn axis_directions() {
|
||||||
let px = (1.0, 0.0, 0.0);
|
let px = (1.0, 0.0, 0.0);
|
||||||
let px_oct = encode(px);
|
let px_oct = encode(px);
|
||||||
|
let px_octp = encode_precise(px);
|
||||||
|
|
||||||
let nx = (-1.0, 0.0, 0.0);
|
let nx = (-1.0, 0.0, 0.0);
|
||||||
let nx_oct = encode(nx);
|
let nx_oct = encode(nx);
|
||||||
|
let nx_octp = encode_precise(nx);
|
||||||
|
|
||||||
let py = (0.0, 1.0, 0.0);
|
let py = (0.0, 1.0, 0.0);
|
||||||
let py_oct = encode(py);
|
let py_oct = encode(py);
|
||||||
|
let py_octp = encode_precise(py);
|
||||||
|
|
||||||
let ny = (0.0, -1.0, 0.0);
|
let ny = (0.0, -1.0, 0.0);
|
||||||
let ny_oct = encode(ny);
|
let ny_oct = encode(ny);
|
||||||
|
let ny_octp = encode_precise(ny);
|
||||||
|
|
||||||
let pz = (0.0, 0.0, 1.0);
|
let pz = (0.0, 0.0, 1.0);
|
||||||
let pz_oct = encode(pz);
|
let pz_oct = encode(pz);
|
||||||
|
let pz_octp = encode_precise(pz);
|
||||||
|
|
||||||
let nz = (0.0, 0.0, -1.0);
|
let nz = (0.0, 0.0, -1.0);
|
||||||
let nz_oct = encode(nz);
|
let nz_oct = encode(nz);
|
||||||
|
let nz_octp = encode_precise(nz);
|
||||||
|
|
||||||
assert_eq!(px, decode(px_oct));
|
assert_eq!(px, decode(px_oct));
|
||||||
assert_eq!(nx, decode(nx_oct));
|
assert_eq!(nx, decode(nx_oct));
|
||||||
|
@ -97,5 +152,12 @@ mod tests {
|
||||||
assert_eq!(ny, decode(ny_oct));
|
assert_eq!(ny, decode(ny_oct));
|
||||||
assert_eq!(pz, decode(pz_oct));
|
assert_eq!(pz, decode(pz_oct));
|
||||||
assert_eq!(nz, decode(nz_oct));
|
assert_eq!(nz, decode(nz_oct));
|
||||||
|
|
||||||
|
assert_eq!(px, decode(px_octp));
|
||||||
|
assert_eq!(nx, decode(nx_octp));
|
||||||
|
assert_eq!(py, decode(py_octp));
|
||||||
|
assert_eq!(ny, decode(ny_octp));
|
||||||
|
assert_eq!(pz, decode(pz_octp));
|
||||||
|
assert_eq!(nz, decode(nz_octp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
extern crate proptest;
|
extern crate proptest;
|
||||||
extern crate oct32norm;
|
extern crate oct32norm;
|
||||||
|
|
||||||
use oct32norm::{decode, encode};
|
use oct32norm::{decode, encode, encode_precise};
|
||||||
use proptest::test_runner::Config;
|
use proptest::test_runner::Config;
|
||||||
|
|
||||||
/// Calculates the cosine of the angle between the two vectors,
|
/// Calculates the cosine of the angle between the two vectors,
|
||||||
|
@ -56,18 +56,23 @@ proptest! {
|
||||||
#[test]
|
#[test]
|
||||||
fn pt_roundtrip_angle_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
|
fn pt_roundtrip_angle_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
|
||||||
let oct = encode(v);
|
let oct = encode(v);
|
||||||
let v2 = decode(oct);
|
let octp = encode_precise(v);
|
||||||
|
|
||||||
// Check if the angle between the original and the roundtrip
|
// Check if the angle between the original and the roundtrip
|
||||||
// is less than 0.004 degrees
|
// is less than 0.004 degrees
|
||||||
assert!(cos_gt(v, v2, 0.9999999976));
|
assert!(cos_gt(v, decode(oct), 0.9999999976));
|
||||||
|
|
||||||
|
// Check if the angle between the original and the roundtrip
|
||||||
|
// is less than 0.003 degrees
|
||||||
|
assert!(cos_gt(v, decode(octp), 0.9999999986));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn pt_roundtrip_component_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
|
fn pt_roundtrip_component_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
|
||||||
let oct = encode(v);
|
let oct = encode(v);
|
||||||
let v2 = decode(oct);
|
let octp = encode_precise(v);
|
||||||
|
|
||||||
assert!(l1_delta_lt(v, v2, 0.00005));
|
assert!(l1_delta_lt(v, decode(oct), 0.00005));
|
||||||
|
assert!(l1_delta_lt(v, decode(octp), 0.00003));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user