diff --git a/sub_crates/trifloat/src/fluv32.rs b/sub_crates/trifloat/src/fluv32.rs index c0471fb..09fc1d1 100644 --- a/sub_crates/trifloat/src/fluv32.rs +++ b/sub_crates/trifloat/src/fluv32.rs @@ -1,4 +1,4 @@ -//! Encoding/decoding for the 32-bit FloatLuv color format. +//! Encoding/decoding for the 32-bit FLuv32 color format. //! //! This encoding is based on, but is slightly different than, the 32-bit //! LogLuv format from the paper "Overcoming Gamut and Dynamic Range @@ -6,27 +6,27 @@ //! //! * It uses the same uv chroma storage approach, but with *very* slightly //! tweaked scales to allow perfect representation of E. -//! * It uses uses a floating point rather than log encoding to store -//! luminance, mainly for the sake of faster decoding. -//! * It also omits the sign bit of LogLuv, foregoing negative luminance +//! * It uses a floating point rather than log encoding to store luminance, +//! mainly for the sake of faster decoding. +//! * Unlike LogLuv, this format's dynamic range is biased to put more of it +//! above 1.0 (see Luminance details below). +//! * It omits the sign bit of LogLuv, foregoing negative luminance //! capabilities. //! -//! Compared to LogLuv, this format's chroma precision is the same and its -//! luminance precision is better, but its luminance *range* is smaller. -//! The supported luminance range is still substantial, however (see -//! "Luminance details" below). +//! This format has the same chroma precision, very slightly improved luminance +//! precision, and the same 127-stops of dynamic range as LogLuv. //! //! Like the LogLuv format, this is an absolute rather than relative color //! encoding, and as such takes CIE XYZ triplets as input. It is *not* //! designed to take arbitrary floating point triplets, and will perform poorly //! if e.g. passed RGB values. //! -//! The bit layout is: +//! The bit layout is (from most to least significant bit): //! -//! 1. luminance exponent (6 bits, bias 27) -//! 2. luminance mantissa (10 stored bits, 11 bits precision) -//! 3. u (8 bits) -//! 4. v (8 bits) +//! * 7 bits: luminance exponent (bias 42) +//! * 9 bits: luminance mantissa (implied leading 1, for 10 bits precision) +//! * 8 bits: u' +//! * 8 bits: v' //! //! ## Luminance details //! @@ -35,21 +35,36 @@ //! > The sun is about `10^8 cd/m^2`, and the underside of a rock on a moonless //! > night is probably around `10^-6` or so [...] //! -//! The luminance range of this format is from about `10^11` on the brightest -//! end, to about `10^-8` on the darkest (excluding zero itself, which can also -//! be stored). +//! See also Wikipedia's +//! [list of luminance levels](https://en.wikipedia.org/wiki/Orders_of_magnitude_(luminance)). //! -//! That gives this format almost five orders of magnitude more dynamic range -//! than is likely to be needed for any practical situation. Moreover, that -//! extra range is split between both the high and low end, giving a -//! comfortable buffer on both ends for extreme situations. +//! The luminance range of the original LogLuv is about `10^-19` to `10^19`, +//! splitting the range evenly above and below 1.0. Given the massive dynamic +//! range, and the fact that all day-to-day luminance levels trivially fit +//! within that, that's a perfectly reasonable choice. //! -//! Like the LogLuv format, the input CIE Y value is taken directly as the -//! luminance value. +//! However, there are some stellar events like supernovae that are trillions +//! of times brighter than the sun, and would exceed `10^19`. Conversely, +//! there likely isn't much use for significantly smaller values than `10^-10` +//! or so. So although recording supernovae in physical units with a graphics +//! format seems unlikely, it doesn't hurt to bias the range towards brighter +//! luminance levels. +//! +//! With that in mind, FLuv32 uses an exponent bias of 42, putting twice as +//! many stops of dynamic range above 1.0 as below it, giving a luminance range +//! of roughly `10^-13` to `10^25`. It's the same dynamic range as +//! LogLuv (about 127 stops), but with more of that range placed above 1.0. +//! +//! Like typical floating point, the mantissa is treated as having an implicit +//! leading 1, giving it an extra bit of precision. The smallest exponent +//! indicates a value of zero, and a valid encoding should also set the +//! mantissa to zero in that case (denormal numbers are not supported). The +//! largest exponent is given no special treatment (no infinities, no NaN). #![allow(clippy::cast_lossless)] -const EXP_BIAS: i32 = 27; +const EXP_BIAS: i32 = 42; +const BIAS_OFFSET: u32 = 127 - EXP_BIAS as u32; /// The scale factor of the quantized U component. pub const U_SCALE: f32 = 817.0 / 2.0; @@ -58,13 +73,13 @@ pub const U_SCALE: f32 = 817.0 / 2.0; pub const V_SCALE: f32 = 1235.0 / 3.0; /// Largest representable Y component. -pub const Y_MAX: f32 = ((1u64 << (64 - EXP_BIAS)) - (1u64 << (64 - EXP_BIAS - 11))) as f32; +pub const Y_MAX: f32 = ((1u128 << (128 - EXP_BIAS)) - (1u128 << (128 - EXP_BIAS - 10))) as f32; /// Smallest representable non-zero Y component. -pub const Y_MIN: f32 = 1.0 / (1u64 << (EXP_BIAS - 1)) as f32; +pub const Y_MIN: f32 = 1.0 / (1u128 << (EXP_BIAS - 1)) as f32; /// Difference between 1.0 and the next largest representable Y value. -pub const Y_EPSILON: f32 = 1.0 / 1024.0; +pub const Y_EPSILON: f32 = 1.0 / 512.0; /// Encodes from CIE XYZ to 32-bit FloatLuv. #[inline] @@ -99,13 +114,12 @@ pub fn encode(xyz: (f32, f32, f32)) -> u32 { ((u as u32) << 8) | (v as u32) }; - let y_bits = xyz.1.to_bits(); - let exp = (y_bits >> 23) as i32 - 127 + EXP_BIAS; + let y_bits = xyz.1.to_bits() & 0x7fffffff; - if exp <= 0 { + if y_bits < ((BIAS_OFFSET + 1) << 23) { // Special case: black. encode_uv((1.0, 1.0, 1.0)) - } else if exp > 63 { + } else if y_bits >= ((BIAS_OFFSET + 128) << 23) { if xyz.1.is_infinite() { // Special case: infinity. In this case, we don't have any // reasonable basis for calculating chroma, so just return @@ -118,7 +132,7 @@ pub fn encode(xyz: (f32, f32, f32)) -> u32 { } } else { // Common case. - ((exp as u32) << 26) | ((y_bits & 0x07fe000) << 3) | encode_uv(xyz) + (((y_bits - (BIAS_OFFSET << 23)) << 2) & 0xffff0000) | encode_uv(xyz) } } @@ -154,9 +168,7 @@ pub fn decode(fluv32: u32) -> (f32, f32, f32) { /// to fit the range 0-255. #[inline] pub fn decode_yuv(fluv32: u32) -> (f32, u8, u8) { - const BIAS_OFFSET: u32 = (127 - EXP_BIAS as u32) << 23; - - let y = f32::from_bits(((fluv32 & 0xffff0000) >> 3) + BIAS_OFFSET); + let y = f32::from_bits(((fluv32 & 0xffff0000) >> 2) + (BIAS_OFFSET << 23)); let u = (fluv32 >> 8) as u8; let v = fluv32 as u8; @@ -193,11 +205,10 @@ mod tests { let tri = encode(fs); let fs2 = decode(tri); - assert_eq!(0x6c0056c3, tri); - - assert!((fs.0 - fs2.0).abs() < 0.0000001); assert_eq!(fs.1, fs2.1); + assert!((fs.0 - fs2.0).abs() < 0.0000001); assert!((fs.2 - fs2.2).abs() < 0.0000001); + assert_eq!(0x540056c3, tri); } #[test] @@ -221,7 +232,7 @@ mod tests { #[test] fn accuracy_01() { let mut n = 1.0; - for _ in 0..1024 { + for _ in 0..512 { let a = (n as f32, n as f32, n as f32); let b = round_trip(a); @@ -232,7 +243,7 @@ mod tests { assert!(rd0 < 0.01); assert!(rd2 < 0.01); - n += 1.0 / 1024.0; + n += 1.0 / 512.0; } } @@ -240,11 +251,11 @@ mod tests { #[should_panic] fn accuracy_02() { let mut n = 1.0; - for _ in 0..2048 { + for _ in 0..1024 { let a = (n as f32, n as f32, n as f32); let b = round_trip(a); assert_eq!(a.1, b.1); - n += 1.0 / 2048.0; + n += 1.0 / 1024.0; } } @@ -279,7 +290,7 @@ mod tests { #[test] fn saturate_y() { - let fs = (1.0e+20, 1.0e+20, 1.0e+20); + let fs = (1.0e+28, 1.0e+28, 1.0e+28); assert_eq!(Y_MAX, round_trip(fs).1); assert_eq!(Y_MAX, decode(0xFFFFFFFF).1); @@ -295,18 +306,14 @@ mod tests { } #[test] - fn smallest_value() { - let a = (Y_MIN, Y_MIN, Y_MIN); - let b = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99); - assert_eq!(Y_MIN, round_trip(a).1); - assert_eq!(0.0, round_trip(b).1); - } + fn smallest_value_and_underflow() { + let fs1 = (Y_MIN, Y_MIN, Y_MIN); + let fs2 = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99); - #[test] - fn underflow() { - let fs = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99); - assert_eq!(0x000056c3, encode(fs)); - assert_eq!((0.0, 0.0, 0.0), round_trip(fs)); + dbg!(Y_MIN); + assert_eq!(fs1.1, round_trip(fs1).1); + assert_eq!(0.0, round_trip(fs2).1); + assert_eq!(0x000056c3, encode(fs2)); } #[test]