FLuv32: increase dynamic range, and decrease precision.

This still exceeds the precision of LogLuv, but lets us match its
dynamic range.
This commit is contained in:
Nathan Vegdahl 2020-09-22 11:06:40 +09:00
parent 9cf5ebdf91
commit 6d6904a615

View File

@ -1,4 +1,4 @@
//! Encoding/decoding for the 32-bit FloatLuv color format.
//! Encoding/decoding for the 32-bit FLuv32 color format.
//!
//! This encoding is based on, but is slightly different than, the 32-bit
//! LogLuv format from the paper "Overcoming Gamut and Dynamic Range
@ -6,27 +6,27 @@
//!
//! * It uses the same uv chroma storage approach, but with *very* slightly
//! tweaked scales to allow perfect representation of E.
//! * It uses uses a floating point rather than log encoding to store
//! luminance, mainly for the sake of faster decoding.
//! * It also omits the sign bit of LogLuv, foregoing negative luminance
//! * It uses a floating point rather than log encoding to store luminance,
//! mainly for the sake of faster decoding.
//! * Unlike LogLuv, this format's dynamic range is biased to put more of it
//! above 1.0 (see Luminance details below).
//! * It omits the sign bit of LogLuv, foregoing negative luminance
//! capabilities.
//!
//! Compared to LogLuv, this format's chroma precision is the same and its
//! luminance precision is better, but its luminance *range* is smaller.
//! The supported luminance range is still substantial, however (see
//! "Luminance details" below).
//! This format has the same chroma precision, very slightly improved luminance
//! precision, and the same 127-stops of dynamic range as LogLuv.
//!
//! Like the LogLuv format, this is an absolute rather than relative color
//! encoding, and as such takes CIE XYZ triplets as input. It is *not*
//! designed to take arbitrary floating point triplets, and will perform poorly
//! if e.g. passed RGB values.
//!
//! The bit layout is:
//! The bit layout is (from most to least significant bit):
//!
//! 1. luminance exponent (6 bits, bias 27)
//! 2. luminance mantissa (10 stored bits, 11 bits precision)
//! 3. u (8 bits)
//! 4. v (8 bits)
//! * 7 bits: luminance exponent (bias 42)
//! * 9 bits: luminance mantissa (implied leading 1, for 10 bits precision)
//! * 8 bits: u'
//! * 8 bits: v'
//!
//! ## Luminance details
//!
@ -35,21 +35,36 @@
//! > The sun is about `10^8 cd/m^2`, and the underside of a rock on a moonless
//! > night is probably around `10^-6` or so [...]
//!
//! The luminance range of this format is from about `10^11` on the brightest
//! end, to about `10^-8` on the darkest (excluding zero itself, which can also
//! be stored).
//! See also Wikipedia's
//! [list of luminance levels](https://en.wikipedia.org/wiki/Orders_of_magnitude_(luminance)).
//!
//! That gives this format almost five orders of magnitude more dynamic range
//! than is likely to be needed for any practical situation. Moreover, that
//! extra range is split between both the high and low end, giving a
//! comfortable buffer on both ends for extreme situations.
//! The luminance range of the original LogLuv is about `10^-19` to `10^19`,
//! splitting the range evenly above and below 1.0. Given the massive dynamic
//! range, and the fact that all day-to-day luminance levels trivially fit
//! within that, that's a perfectly reasonable choice.
//!
//! Like the LogLuv format, the input CIE Y value is taken directly as the
//! luminance value.
//! However, there are some stellar events like supernovae that are trillions
//! of times brighter than the sun, and would exceed `10^19`. Conversely,
//! there likely isn't much use for significantly smaller values than `10^-10`
//! or so. So although recording supernovae in physical units with a graphics
//! format seems unlikely, it doesn't hurt to bias the range towards brighter
//! luminance levels.
//!
//! With that in mind, FLuv32 uses an exponent bias of 42, putting twice as
//! many stops of dynamic range above 1.0 as below it, giving a luminance range
//! of roughly `10^-13` to `10^25`. It's the same dynamic range as
//! LogLuv (about 127 stops), but with more of that range placed above 1.0.
//!
//! Like typical floating point, the mantissa is treated as having an implicit
//! leading 1, giving it an extra bit of precision. The smallest exponent
//! indicates a value of zero, and a valid encoding should also set the
//! mantissa to zero in that case (denormal numbers are not supported). The
//! largest exponent is given no special treatment (no infinities, no NaN).
#![allow(clippy::cast_lossless)]
const EXP_BIAS: i32 = 27;
const EXP_BIAS: i32 = 42;
const BIAS_OFFSET: u32 = 127 - EXP_BIAS as u32;
/// The scale factor of the quantized U component.
pub const U_SCALE: f32 = 817.0 / 2.0;
@ -58,13 +73,13 @@ pub const U_SCALE: f32 = 817.0 / 2.0;
pub const V_SCALE: f32 = 1235.0 / 3.0;
/// Largest representable Y component.
pub const Y_MAX: f32 = ((1u64 << (64 - EXP_BIAS)) - (1u64 << (64 - EXP_BIAS - 11))) as f32;
pub const Y_MAX: f32 = ((1u128 << (128 - EXP_BIAS)) - (1u128 << (128 - EXP_BIAS - 10))) as f32;
/// Smallest representable non-zero Y component.
pub const Y_MIN: f32 = 1.0 / (1u64 << (EXP_BIAS - 1)) as f32;
pub const Y_MIN: f32 = 1.0 / (1u128 << (EXP_BIAS - 1)) as f32;
/// Difference between 1.0 and the next largest representable Y value.
pub const Y_EPSILON: f32 = 1.0 / 1024.0;
pub const Y_EPSILON: f32 = 1.0 / 512.0;
/// Encodes from CIE XYZ to 32-bit FloatLuv.
#[inline]
@ -99,13 +114,12 @@ pub fn encode(xyz: (f32, f32, f32)) -> u32 {
((u as u32) << 8) | (v as u32)
};
let y_bits = xyz.1.to_bits();
let exp = (y_bits >> 23) as i32 - 127 + EXP_BIAS;
let y_bits = xyz.1.to_bits() & 0x7fffffff;
if exp <= 0 {
if y_bits < ((BIAS_OFFSET + 1) << 23) {
// Special case: black.
encode_uv((1.0, 1.0, 1.0))
} else if exp > 63 {
} else if y_bits >= ((BIAS_OFFSET + 128) << 23) {
if xyz.1.is_infinite() {
// Special case: infinity. In this case, we don't have any
// reasonable basis for calculating chroma, so just return
@ -118,7 +132,7 @@ pub fn encode(xyz: (f32, f32, f32)) -> u32 {
}
} else {
// Common case.
((exp as u32) << 26) | ((y_bits & 0x07fe000) << 3) | encode_uv(xyz)
(((y_bits - (BIAS_OFFSET << 23)) << 2) & 0xffff0000) | encode_uv(xyz)
}
}
@ -154,9 +168,7 @@ pub fn decode(fluv32: u32) -> (f32, f32, f32) {
/// to fit the range 0-255.
#[inline]
pub fn decode_yuv(fluv32: u32) -> (f32, u8, u8) {
const BIAS_OFFSET: u32 = (127 - EXP_BIAS as u32) << 23;
let y = f32::from_bits(((fluv32 & 0xffff0000) >> 3) + BIAS_OFFSET);
let y = f32::from_bits(((fluv32 & 0xffff0000) >> 2) + (BIAS_OFFSET << 23));
let u = (fluv32 >> 8) as u8;
let v = fluv32 as u8;
@ -193,11 +205,10 @@ mod tests {
let tri = encode(fs);
let fs2 = decode(tri);
assert_eq!(0x6c0056c3, tri);
assert!((fs.0 - fs2.0).abs() < 0.0000001);
assert_eq!(fs.1, fs2.1);
assert!((fs.0 - fs2.0).abs() < 0.0000001);
assert!((fs.2 - fs2.2).abs() < 0.0000001);
assert_eq!(0x540056c3, tri);
}
#[test]
@ -221,7 +232,7 @@ mod tests {
#[test]
fn accuracy_01() {
let mut n = 1.0;
for _ in 0..1024 {
for _ in 0..512 {
let a = (n as f32, n as f32, n as f32);
let b = round_trip(a);
@ -232,7 +243,7 @@ mod tests {
assert!(rd0 < 0.01);
assert!(rd2 < 0.01);
n += 1.0 / 1024.0;
n += 1.0 / 512.0;
}
}
@ -240,11 +251,11 @@ mod tests {
#[should_panic]
fn accuracy_02() {
let mut n = 1.0;
for _ in 0..2048 {
for _ in 0..1024 {
let a = (n as f32, n as f32, n as f32);
let b = round_trip(a);
assert_eq!(a.1, b.1);
n += 1.0 / 2048.0;
n += 1.0 / 1024.0;
}
}
@ -279,7 +290,7 @@ mod tests {
#[test]
fn saturate_y() {
let fs = (1.0e+20, 1.0e+20, 1.0e+20);
let fs = (1.0e+28, 1.0e+28, 1.0e+28);
assert_eq!(Y_MAX, round_trip(fs).1);
assert_eq!(Y_MAX, decode(0xFFFFFFFF).1);
@ -295,18 +306,14 @@ mod tests {
}
#[test]
fn smallest_value() {
let a = (Y_MIN, Y_MIN, Y_MIN);
let b = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99);
assert_eq!(Y_MIN, round_trip(a).1);
assert_eq!(0.0, round_trip(b).1);
}
fn smallest_value_and_underflow() {
let fs1 = (Y_MIN, Y_MIN, Y_MIN);
let fs2 = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99);
#[test]
fn underflow() {
let fs = (Y_MIN * 0.99, Y_MIN * 0.99, Y_MIN * 0.99);
assert_eq!(0x000056c3, encode(fs));
assert_eq!((0.0, 0.0, 0.0), round_trip(fs));
dbg!(Y_MIN);
assert_eq!(fs1.1, round_trip(fs1).1);
assert_eq!(0.0, round_trip(fs2).1);
assert_eq!(0x000056c3, encode(fs2));
}
#[test]