Use bit fiddling to avoid some expensive operations in trifloat encoding/decoding.

This commit is contained in:
Nathan Vegdahl 2018-11-23 22:31:28 -08:00
parent 3d1ade21c2
commit ff9a56977a

View File

@ -54,13 +54,13 @@ pub fn encode(floats: (f32, f32, f32)) -> u32 {
} else { } else {
(largest_value.log2() as i32 + 1).max(-10).min(21) (largest_value.log2() as i32 + 1).max(-10).min(21)
}; };
let mut inv_multiplier = 512.0 / (exponent as f32).exp2(); let mut inv_multiplier = fiddle_exp2(-exponent + 9);
// Edge-case: make sure rounding pushes the largest value up // Edge-case: make sure rounding pushes the largest value up
// appropriately if needed. // appropriately if needed.
if (largest_value * inv_multiplier) + 0.5 >= 512.0 { if (largest_value * inv_multiplier) + 0.5 >= 512.0 {
exponent = (exponent + 1).max(-10).min(21); exponent = (exponent + 1).max(-10).min(21);
inv_multiplier = 512.0 / (exponent as f32).exp2(); inv_multiplier = fiddle_exp2(-exponent + 9);
} }
(exponent, inv_multiplier) (exponent, inv_multiplier)
@ -84,7 +84,7 @@ pub fn decode(trifloat: u32) -> (f32, f32, f32) {
let z = (trifloat >> 5) & 0b111111111; let z = (trifloat >> 5) & 0b111111111;
let e = trifloat & 0b11111; let e = trifloat & 0b11111;
let multiplier = ((e as i32 - 10) as f32).exp2() * (1.0 / 512.0); let multiplier = fiddle_exp2(e as i32 - 10 - 9);
( (
x as f32 * multiplier, x as f32 * multiplier,
@ -93,6 +93,16 @@ pub fn decode(trifloat: u32) -> (f32, f32, f32) {
) )
} }
/// Calculates 2.0^exp using IEEE bit fiddling.
///
/// Only works for integer exponents in the range [-126, 127]
/// due to IEEE 32-bit float limits.
#[inline(always)]
fn fiddle_exp2(exp: i32) -> f32 {
use std::f32;
f32::from_bits(((exp + 127) as u32) << 23)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;