psychopath/sub_crates/spectral_upsampling/src/jakob.rs

/// This file implements a lighter alternative version of the Jakob
/// 2019 spectral upsampling method.  Instead of using the entire 3D
/// looking table, we use two 2d slices of the table and interpolate
/// between the evaluated spectral values calculated from those tables.
///
/// The provides similar color matching as full Jakob, at the expense of
/// somewhat lower quality spectrums, and the inability to precalculate
/// the coefficents for even more efficient evaluation later on.
use glam::Vec4;

/// How many polynomial coefficients?
const RGB2SPEC_N_COEFFS: usize = 3;

// Include tables generated by the build.rs script
include!(concat!(env!("OUT_DIR"), "/jakob_table_inc.rs"));

#[inline]
pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
    small_rgb_to_spectrum_p4(
        REC709_TABLE,
        REC709_TABLE_RES,
        REC709_TABLE_MID_VALUE,
        lambdas,
        rgb,
    )
}

#[inline]
pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
    small_rgb_to_spectrum_p4(
        REC2020_TABLE,
        REC2020_TABLE_RES,
        REC2020_TABLE_MID_VALUE,
        lambdas,
        rgb,
    )
}

#[inline]
pub fn aces_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
    small_rgb_to_spectrum_p4(
        ACES_TABLE,
        ACES_TABLE_RES,
        ACES_TABLE_MID_VALUE,
        lambdas,
        rgb,
    )
}

//===============================================================
// Core functions, specialized above for specific color spaces.

#[inline(always)]
#[allow(clippy::many_single_char_names)]
fn small_rgb_to_spectrum_p4(
    table: &[[(f32, f32, f32); 2]],
    table_res: usize,
    table_mid_value: f32,
    lambdas: Vec4,
    rgb: (f32, f32, f32),
) -> Vec4 {
    // Determine largest RGB component, and calculate the other two
    // components scaled for lookups.
    let (i, max_val, x, y) = if rgb.0 > rgb.1 && rgb.0 > rgb.2 {
        (0, rgb.0, rgb.1, rgb.2)
    } else if rgb.1 > rgb.2 {
        (1, rgb.1, rgb.2, rgb.0)
    } else {
        (2, rgb.2, rgb.0, rgb.1)
    };
    if max_val == 0.0 {
        // If max_val is zero, just return zero.  This avoids NaN's from
        // divide by zero.  This is also correct, since it's black.
        return Vec4::splat(0.0);
    }
    let x = x * 63.0 / max_val;
    let y = y * 63.0 / max_val;

    // Calculate lookup coordinates.
    let xi = (x as usize).min(table_res - 2);
    let yi = (y as usize).min(table_res - 2);
    let offset = (table_res * table_res * i) + (yi * table_res) + xi;
    let dx = 1;
    let dy = table_res;

    // Look up values from table.
    let a0 = table[offset];
    let a1 = table[offset + dx];
    let a2 = table[offset + dy];
    let a3 = table[offset + dy + dx];

    // Convert to SIMD format for faster interpolation.
    let a0 = [
        Vec4::new(a0[0].0, a0[0].1, a0[0].2, 0.0),
        Vec4::new(a0[1].0, a0[1].1, a0[1].2, 0.0),
    ];
    let a1 = [
        Vec4::new(a1[0].0, a1[0].1, a1[0].2, 0.0),
        Vec4::new(a1[1].0, a1[1].1, a1[1].2, 0.0),
    ];
    let a2 = [
        Vec4::new(a2[0].0, a2[0].1, a2[0].2, 0.0),
        Vec4::new(a2[1].0, a2[1].1, a2[1].2, 0.0),
    ];
    let a3 = [
        Vec4::new(a3[0].0, a3[0].1, a3[0].2, 0.0),
        Vec4::new(a3[1].0, a3[1].1, a3[1].2, 0.0),
    ];

    // Do interpolation.
    let x1: f32 = x - xi as f32;
    let x0: f32 = 1.0 - x1 as f32;
    let y1: f32 = y - yi as f32;
    let y0: f32 = 1.0 - y1 as f32;
    let b0 = [(a0[0] * x0) + (a1[0] * x1), (a0[1] * x0) + (a1[1] * x1)];
    let b1 = [(a2[0] * x0) + (a3[0] * x1), (a2[1] * x0) + (a3[1] * x1)];
    let c = [(b0[0] * y0) + (b1[0] * y1), (b0[1] * y0) + (b1[1] * y1)];

    // Evaluate the spectral function and return the result.
    if max_val <= table_mid_value {
        rgb2spec_eval_4([c[0].x(), c[0].y(), c[0].z()], lambdas) * (1.0 / table_mid_value) * max_val
    } else if max_val < 1.0 {
        let n = (max_val - table_mid_value) / (1.0 - table_mid_value);
        let s0 = rgb2spec_eval_4([c[0].x(), c[0].y(), c[0].z()], lambdas);
        let s1 = rgb2spec_eval_4([c[1].x(), c[1].y(), c[1].z()], lambdas);
        (s0 * (1.0 - n)) + (s1 * n)
    } else {
        rgb2spec_eval_4([c[1].x(), c[1].y(), c[1].z()], lambdas) * max_val
    }
}

//============================================================
// Coefficient -> eval functions

#[inline(always)]
fn rgb2spec_fma_4(a: Vec4, b: Vec4, c: Vec4) -> Vec4 {
    (a * b) + c
}

fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Vec4) -> Vec4 {
    let co0 = Vec4::splat(coeff[0]);
    let co1 = Vec4::splat(coeff[1]);
    let co2 = Vec4::splat(coeff[2]);

    let x = rgb2spec_fma_4(rgb2spec_fma_4(co0, lambda, co1), lambda, co2);

    let y = {
        // TODO: replace this with a SIMD sqrt op.
        let (x, y, z, w) = rgb2spec_fma_4(x, x, Vec4::splat(1.0)).into();
        Vec4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).reciprocal()
    };

    rgb2spec_fma_4(Vec4::splat(0.5) * x, y, Vec4::splat(0.5))
}