From d6ab9d06be4742f10ea50c0db730d2db8e91c08c Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Fri, 11 Sep 2020 21:57:43 +0900
Subject: [PATCH] More work on the packed HDR RGB 32-bit format.

Switched to a different chroma encoding, which is notably faster
and never produces negative values when decoded.
---
 sub_crates/trifloat/src/lib.rs   |   5 +
 sub_crates/trifloat/src/rgb32.rs | 280 +++++++++++++++++--------------
 2 files changed, 155 insertions(+), 130 deletions(-)

diff --git a/sub_crates/trifloat/src/lib.rs b/sub_crates/trifloat/src/lib.rs
index dafef1e..bfc7092 100644
--- a/sub_crates/trifloat/src/lib.rs
+++ b/sub_crates/trifloat/src/lib.rs
@@ -33,3 +33,8 @@ fn fiddle_log2(n: f32) -> i32 {
     use std::f32;
     ((f32::to_bits(n) >> 23) & 0b1111_1111) as i32 - 127
 }
+
+#[inline(always)]
+fn clamp_0_1(n: f32) -> f32 {
+    n.max(0.0).min(1.0)
+}
diff --git a/sub_crates/trifloat/src/rgb32.rs b/sub_crates/trifloat/src/rgb32.rs
index 7fe1248..f903c34 100644
--- a/sub_crates/trifloat/src/rgb32.rs
+++ b/sub_crates/trifloat/src/rgb32.rs
@@ -1,33 +1,55 @@
-//! Encoding/decoding for specialized HDR RGB 32-bit storage format.
+//! Encoding/decoding for a specialized HDR RGB 32-bit storage format.
 //!
 //! The motivation for this format is to separate out the luma of
 //! the color from its chromaticity, in the same spirit as most
 //! image and video compression approaches, and then allocate more
-//! data to the luma component since that's what the human eye is
-//! most sensitive to.
+//! bits to storing the luma component since that's what the human
+//! eye is most sensitive to.
 //!
-//! This encoding first transforms into YCoCg colorspace, and then
-//! fiddles the resulting Y, Co, and Cg components into a special
-//! 32-bit format.  The Y component is stored as an unsigned float,
-//! with 6 bits of exponent and 10 bits of mantissa.  The Co and Cg
-//! components are stored as 8-bit integers.
+//! This encoding first transforms the color into a Y (luma) component
+//! and two chroma components (green-magenta and red-blue), and then
+//! fiddles those components into a special 32-bit format.
+//! The Y component is stored as an unsigned float, with 6 bits of
+//! exponent and 10 bits of mantissa.  The two chroma components are
+//! each stored as 8-bit integers.
 //!
 //! The layout is:
 //!
 //! 1. Y-exponent: 6 bits
 //! 2. Y-mantissa: 10 bits
-//! 3. Co: 8 bits
-//! 4. Cg: 8 bits
+//! 3. Green-Magenta: 8 bits
+//! 4. Red-Blue: 8 bits
 //!
-//! The Y component follows the convention of a mantissa with an
-//! implicit leading one, giving it 11 bits of precision.  The
-//! exponent has a bias of 24.
+//! The Y-mantissa has an implicit leading one, giving 11 bits of
+//! precision.
+
+use crate::clamp_0_1;
+
+const EXP_BIAS: i32 = 23;
+
+/// The largest value this format can store.
+///
+/// More precisely, this is the largest value that can be *reliably*
+/// stored.
+///
+/// This can be exceeded by individual channels in limited cases due
+/// to the color transform used.  But values *at least* this large
+/// can be relied on.
+pub const MAX: f32 = ((1u64 << (63 - EXP_BIAS)) - (1 << (52 - EXP_BIAS))) as f32;
+
+/// The smallest non-zero value this format can store.
+///
+/// Note that since this is effectively a shared-exponent format,
+/// the numerical precision of all channels depends on the magnitude
+/// of the over-all RGB color.
+pub const MIN: f32 = 1.0 / (1 << (EXP_BIAS - 2)) as f32;
 
 /// Encodes three floating point RGB values into a packed 32-bit format.
 ///
 /// Warning: negative values and NaN's are _not_ supported.  There are
 /// debug-only assertions in place to catch such values in the input
-/// floats.
+/// floats.  Infinity in any channel will saturate the whole color to
+/// the brightest representable white.
 #[inline]
 pub fn encode(floats: (f32, f32, f32)) -> u32 {
     debug_assert!(
@@ -45,23 +67,15 @@ pub fn encode(floats: (f32, f32, f32)) -> u32 {
         floats.2
     );
 
-    // Convert to YCoCg colorspace.
-    let y = (floats.0 * 0.25) + (floats.1 * 0.5) + (floats.2 * 0.25);
-    let co = (floats.0 * 0.5) + (floats.2 * -0.5);
-    let cg = (floats.0 * -0.25) + (floats.1 * 0.5) + (floats.2 * -0.25);
-
-    if y <= 0.0 {
-        // Corner case: black.
-        return 0;
-    } else if y.is_infinite() {
-        // Corner case: infinite white.
-        return 0xffff7f7f;
-    }
-
-    // Encode Co and Cg as 8-bit integers.
-    let inv_y = 1.0 / y;
-    let co_8bit = ((co * inv_y * 63.5) + 127.5).min(255.0).max(0.0) as u8;
-    let cg_8bit = ((cg * inv_y * 127.0) + 127.5).min(255.0).max(0.0) as u8;
+    // Convert to Y/Green-Magenta/Red-Blue components.
+    let u = floats.0 + floats.2;
+    let y = (u * 0.5) + floats.1;
+    let green_magenta = clamp_0_1(floats.1 / y);
+    let red_blue = if u > 0.0 {
+        clamp_0_1(floats.0 / u)
+    } else {
+        0.5
+    };
 
     // Bit-fiddle to get the float components of Y.
     // This assumes we're working with a standard 32-bit IEEE float.
@@ -69,22 +83,31 @@ pub fn encode(floats: (f32, f32, f32)) -> u32 {
     let y_mantissa = (y_ieee_bits >> 13) & 0b11_1111_1111;
     let y_exp = ((y_ieee_bits >> 23) & 0b1111_1111) as i32 - 127;
 
+    // Encode Cg and Cr as 8-bit integers.
+    let gm_8bit = ((green_magenta * 254.0) + 0.5) as u8;
+    let rb_8bit = ((red_blue * 254.0) + 0.5) as u8;
+
     // Pack values into a u32 and return.
-    if y_exp <= -24 {
-        // Corner-case:
+    if y_exp <= (0 - EXP_BIAS) {
+        // Early-out corner-case:
         // Luma is so dark that it will be zero at our precision,
         // and hence black.
         0
-    } else if y_exp >= 40 {
-        dbg!();
+    } else if y_exp >= (63 - EXP_BIAS) {
         // Corner-case:
         // Luma is so bright that it exceeds our max value, so saturate
         // the luma.
-        0xffff0000 | ((co_8bit as u32) << 8) | cg_8bit as u32
+        if y.is_infinite() {
+            // If luma is infinity, our chroma values are bogus, so
+            // just go with white.
+            0xffff7f7f
+        } else {
+            0xffff0000 | ((gm_8bit as u32) << 8) | rb_8bit as u32
+        }
     } else {
         // Common case.
-        let exp = (y_exp + 24) as u32;
-        (exp << 26) | (y_mantissa << 16) | ((co_8bit as u32) << 8) | cg_8bit as u32
+        let exp = (y_exp + EXP_BIAS) as u32;
+        (exp << 26) | (y_mantissa << 16) | ((gm_8bit as u32) << 8) | rb_8bit as u32
     }
 }
 
@@ -92,29 +115,32 @@ pub fn encode(floats: (f32, f32, f32)) -> u32 {
 /// floating point RGB numbers.
 #[inline]
 pub fn decode(packed_rgb: u32) -> (f32, f32, f32) {
-    // Reconstruct Y, Co, and Cg from the packed bits.
+    // Pull out Y, Green-Magenta, and Red-Blue from the packed
+    // bits.
     let y = {
         let exp = (packed_rgb & 0xfc00_0000) >> 26;
         if exp == 0 {
             0.0
         } else {
-            f32::from_bits(((exp + 103) << 23) | ((packed_rgb & 0x03ff_0000) >> 3))
+            f32::from_bits(
+                ((exp + (127 - EXP_BIAS as u32)) << 23) | ((packed_rgb & 0x03ff_0000) >> 3),
+            )
         }
     };
-    let co = {
-        let co_8bit = (packed_rgb >> 8) & 0xff;
-        ((co_8bit as f32) - 127.0) * (1.0 / 63.5) * y
+    let green_magenta = {
+        let gm_8bit = (packed_rgb >> 8) & 0xff;
+        (gm_8bit as f32) * (1.0 / 254.0)
     };
-    let cg = {
-        let cg_8bit = packed_rgb & 0xff;
-        ((cg_8bit as f32) - 127.0) * (1.0 / 127.0) * y
+    let red_blue = {
+        let rb_8bit = packed_rgb & 0xff;
+        (rb_8bit as f32) * (1.0 / 254.0)
     };
 
     // Convert back to RGB.
-    let tmp = y - cg;
-    let r = (tmp + co).max(0.0);
-    let g = (y + cg).max(0.0);
-    let b = (tmp - co).max(0.0);
+    let g = y * green_magenta;
+    let u = (y - g) * 2.0;
+    let r = u * red_blue;
+    let b = u - r;
 
     (r, g, b)
 }
@@ -161,7 +187,7 @@ mod tests {
     }
 
     #[test]
-    fn full_saturation() {
+    fn color_saturation() {
         let fs1 = (1.0, 0.0, 0.0);
         let fs2 = (0.0, 1.0, 0.0);
         let fs3 = (0.0, 0.0, 1.0);
@@ -172,112 +198,106 @@ mod tests {
     }
 
     #[test]
-    fn saturate() {
+    fn num_saturate() {
         let fs = (10000000000000.0, 10000000000000.0, 10000000000000.0);
 
-        assert_eq!(
-            (1098974760000.0, 1098974760000.0, 1098974760000.0),
-            round_trip(fs)
-        );
+        assert_eq!((MAX, MAX, MAX), round_trip(fs));
     }
 
     #[test]
-    fn inf_saturate() {
+    fn num_inf_saturate() {
         use std::f32::INFINITY;
         let fs = (INFINITY, INFINITY, INFINITY);
 
-        assert_eq!(
-            (1098974760000.0, 1098974760000.0, 1098974760000.0),
-            round_trip(fs)
-        );
+        assert_eq!((MAX, MAX, MAX), round_trip(fs));
     }
 
     #[test]
-    fn partial_saturate() {
+    fn num_partial_saturate() {
         let fs1 = (10000000000000.0, 0.0, 0.0);
         let fs2 = (0.0, 10000000000000.0, 0.0);
         let fs3 = (0.0, 0.0, 10000000000000.0);
 
-        assert_eq!(round_trip(fs1), (4395899000000.0, 0.0, 0.0));
-        assert_eq!(round_trip(fs2), (0.0, 2197949500000.0, 0.0));
-        assert_eq!(round_trip(fs3), (0.0, 0.0, 4395899000000.0));
+        assert_eq!((MAX * 4.0, 0.0, 0.0), round_trip(fs1));
+        assert_eq!((0.0, MAX * 2.0, 0.0), round_trip(fs2));
+        assert_eq!((0.0, 0.0, MAX * 4.0), round_trip(fs3));
     }
 
-    // #[test]
-    // fn accuracy() {
-    //     let mut n = 1.0;
-    //     for _ in 0..256 {
-    //         let (x, _, _) = round_trip((n, 0.0, 0.0));
-    //         assert_eq!(n, x);
-    //         n += 1.0 / 256.0;
-    //     }
-    // }
+    #[test]
+    fn largest_value() {
+        let fs1 = (MAX, MAX, MAX);
+        let fs2 = (MAX, 0.0, 0.0);
+        let fs3 = (0.0, MAX, 0.0);
+        let fs4 = (0.0, 0.0, MAX);
 
-    // #[test]
-    // fn rounding() {
-    //     let fs = (7.0f32, 513.0f32, 1.0f32);
-    //     assert_eq!(round_trip(fs), (8.0, 514.0, 2.0));
-    // }
+        assert_eq!(fs1, round_trip(fs1));
+        assert_eq!(fs2, round_trip(fs2));
+        assert_eq!(fs3, round_trip(fs3));
+        assert_eq!(fs4, round_trip(fs4));
+    }
 
-    // #[test]
-    // fn rounding_edge_case() {
-    //     let fs = (1023.0f32, 0.0f32, 0.0f32);
+    #[test]
+    fn smallest_value() {
+        let fs1 = (MIN, MIN, MIN);
+        let fs2 = (MIN, 0.0, 0.0);
+        let fs3 = (0.0, MIN, 0.0);
+        let fs4 = (0.0, 0.0, MIN);
 
-    //     assert_eq!(round_trip(fs), (1024.0, 0.0, 0.0),);
-    // }
+        assert_eq!(fs1, round_trip(fs1));
+        assert_eq!(fs2, round_trip(fs2));
+        assert_eq!(fs3, round_trip(fs3));
+        assert_eq!(fs4, round_trip(fs4));
+    }
 
-    // #[test]
-    // fn smallest_value() {
-    //     let fs = (MIN, MIN * 0.5, MIN * 0.49);
-    //     assert_eq!(round_trip(fs), (MIN, MIN, 0.0));
-    //     assert_eq!(decode(0x00_80_40_00), (MIN, MIN, 0.0));
-    // }
+    #[test]
+    fn underflow() {
+        let fs1 = (MIN * 0.5, 0.0, 0.0);
+        let fs2 = (0.0, MIN * 0.25, 0.0);
+        let fs3 = (0.0, 0.0, MIN * 0.5);
 
-    // #[test]
-    // fn underflow() {
-    //     let fs = (MIN * 0.49, 0.0, 0.0);
-    //     assert_eq!(encode(fs), 0);
-    //     assert_eq!(round_trip(fs), (0.0, 0.0, 0.0));
-    // }
+        assert_eq!(round_trip(fs1), (0.0, 0.0, 0.0));
+        assert_eq!(round_trip(fs2), (0.0, 0.0, 0.0));
+        assert_eq!(round_trip(fs3), (0.0, 0.0, 0.0));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn nans_01() {
-    //     encode((std::f32::NAN, 0.0, 0.0));
-    // }
+    #[test]
+    #[should_panic]
+    fn nans_01() {
+        encode((std::f32::NAN, 0.0, 0.0));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn nans_02() {
-    //     encode((0.0, std::f32::NAN, 0.0));
-    // }
+    #[test]
+    #[should_panic]
+    fn nans_02() {
+        encode((0.0, std::f32::NAN, 0.0));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn nans_03() {
-    //     encode((0.0, 0.0, std::f32::NAN));
-    // }
+    #[test]
+    #[should_panic]
+    fn nans_03() {
+        encode((0.0, 0.0, std::f32::NAN));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn negative_01() {
-    //     encode((-1.0, 0.0, 0.0));
-    // }
+    #[test]
+    #[should_panic]
+    fn negative_01() {
+        encode((-1.0, 0.0, 0.0));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn negative_02() {
-    //     encode((0.0, -1.0, 0.0));
-    // }
+    #[test]
+    #[should_panic]
+    fn negative_02() {
+        encode((0.0, -1.0, 0.0));
+    }
 
-    // #[test]
-    // #[should_panic]
-    // fn negative_03() {
-    //     encode((0.0, 0.0, -1.0));
-    // }
+    #[test]
+    #[should_panic]
+    fn negative_03() {
+        encode((0.0, 0.0, -1.0));
+    }
 
-    // #[test]
-    // fn negative_04() {
-    //     encode((-0.0, -0.0, -0.0));
-    // }
+    #[test]
+    fn negative_04() {
+        encode((-0.0, -0.0, -0.0));
+    }
 }