Some basic SIMD optimizations for XYZ->Spectrum conversion.

2018-07-01 15:50:34 -07:00 · 2018-07-01 15:50:34 -07:00 · 3f55df7225
commit 3f55df7225
parent ef7084e694
4 changed files with 202 additions and 21 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -202,6 +202,9 @@ version = "0.1.0"
 [[package]]
 name = "spectra_xyz"
 version = "0.1.0"
 dependencies = [
 "float4 0.1.0",
 ]
 [[package]]
 name = "strsim"
--- a/src/color.rs
+++ b/src/color.rs
@ -1,6 +1,6 @@
 use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign};
-use spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE};
+use spectra_xyz::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE};
 use float4::Float4;
 use lerp::Lerp;
@ -19,22 +19,10 @@ pub fn map_0_1_to_wavelength(n: f32) -> f32 {
 }
 pub trait Color {
-    fn sample_spectrum(&self, wavelength: f32) -> f32;
+    fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample;
    fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample {
        SpectralSample {
            e: Float4::new(
                self.sample_spectrum(nth_wavelength(hero_wavelength, 0)),
                self.sample_spectrum(nth_wavelength(hero_wavelength, 1)),
                self.sample_spectrum(nth_wavelength(hero_wavelength, 2)),
                self.sample_spectrum(nth_wavelength(hero_wavelength, 3)),
            ),
            hero_wavelength: hero_wavelength,
        }
    }
 }
 #[inline(always)]
 fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
    let wl = hero_wavelength + (WL_RANGE_Q * n as f32);
    if wl > WL_MAX {
@ -44,6 +32,17 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
    }
 }
 /// Returns all wavelengths of a hero wavelength set as a Float4
 #[inline(always)]
 fn wavelengths(hero_wavelength: f32) -> Float4 {
    Float4::new(
        nth_wavelength(hero_wavelength, 0),
        nth_wavelength(hero_wavelength, 1),
        nth_wavelength(hero_wavelength, 2),
        nth_wavelength(hero_wavelength, 3),
    )
 }
 //----------------------------------------------------------------
 #[derive(Copy, Clone, Debug)]
@ -201,8 +200,11 @@ impl XYZ {
 }
 impl Color for XYZ {
-    fn sample_spectrum(&self, wavelength: f32) -> f32 {
+    fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample {
-        xyz_to_spectrum((self.x, self.y, self.z), wavelength)
+        SpectralSample {
            e: xyz_to_spectrum_4((self.x, self.y, self.z), wavelengths(hero_wavelength)),
            hero_wavelength: hero_wavelength,
        }
    }
 }
@ -271,11 +273,12 @@ impl DivAssign<f32> for XYZ {
 //----------------------------------------------------------------
-/// Samples an CIE 1931 XYZ color at a particular wavelength, according to
+/// Samples an CIE 1931 XYZ color at a particular set of wavelengths, according to
 /// the method in the paper "Physically Meaningful Rendering using Tristimulus
 /// Colours" by Meng et al.
-fn xyz_to_spectrum(xyz: (f32, f32, f32), wavelength: f32) -> f32 {
+#[inline(always)]
-    spectrum_xyz_to_p(wavelength, xyz) * (1.0 / EQUAL_ENERGY_REFLECTANCE)
+fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 {
    spectrum_xyz_to_p_4(wavelengths, xyz) * Float4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE)
 }
 /// Close analytic approximations of the CIE 1931 XYZ color curves.
--- a/sub_crates/spectra_xyz/Cargo.toml
+++ b/sub_crates/spectra_xyz/Cargo.toml
@ -7,3 +7,7 @@ license = "MIT"
 [lib]
 name = "spectra_xyz"
 path = "src/lib.rs"
 # Local crate dependencies
 [dependencies.float4]
 path = "../float4"
--- a/sub_crates/spectra_xyz/src/lib.rs
+++ b/sub_crates/spectra_xyz/src/lib.rs
@ -1,3 +1,6 @@
 extern crate float4;
 use float4::Float4;
 use std::f32;
 mod spectra_tables;
@ -80,7 +83,6 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
        SPECTRUM_NUM_SAMPLES - 1
    };
    assert!(sb0 < SPECTRUM_NUM_SAMPLES);
    assert!(sb1 < SPECTRUM_NUM_SAMPLES);
    let sbf: f32 = sb as f32 - sb0 as f32;
    for i in 0..(num as usize) {
        debug_assert!(idx[i] >= 0);
@ -161,6 +163,175 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
    return interpolated_p * inv_norm;
 }
 /// Evaluate the spectrum for xyz at the given wavelengths.
 ///
 /// Works on 4 wavelengths at once via SIMD.
 #[inline]
 pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 {
    assert!(lambdas.h_min() >= SPECTRUM_SAMPLE_MIN);
    assert!(lambdas.h_max() <= SPECTRUM_SAMPLE_MAX);
    let inv_norm = xyz.0 + xyz.1 + xyz.2;
    let norm = {
        let norm = 1.0 / inv_norm;
        if norm < f32::MAX {
            norm
        } else {
            return Float4::splat(0.0);
        }
    };
    let xyy = (xyz.0 * norm, xyz.1 * norm, xyz.1);
    // Rotate to align with grid
    let uv = spectrum_xy_to_uv((xyy.0, xyy.1));
    if uv.0 < 0.0
        || uv.0 >= SPECTRUM_GRID_WIDTH as f32
        || uv.1 < 0.0
        || uv.1 >= SPECTRUM_GRID_HEIGHT as f32
    {
        return Float4::splat(0.0);
    }
    let uvi = (uv.0 as i32, uv.1 as i32);
    debug_assert!(uvi.0 < SPECTRUM_GRID_WIDTH);
    debug_assert!(uvi.1 < SPECTRUM_GRID_HEIGHT);
    let cell_idx: i32 = uvi.0 + SPECTRUM_GRID_WIDTH * uvi.1;
    debug_assert!(cell_idx >= 0);
    let cell = &SPECTRUM_GRID[cell_idx as usize];
    let inside: bool = cell.inside;
    let idx = &cell.idx;
    let num: i32 = cell.num_points;
    // If the cell has no points, nothing we can do, so return 0.0
    if num == 0 {
        return Float4::splat(0.0);
    }
    // Normalize lambda to spectrum table index range.
    let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN))
        / (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN)
        * (SPECTRUM_NUM_SAMPLES as f32 - 1.0);
    debug_assert!(sb.h_min() >= 0.0);
    debug_assert!(sb.h_max() <= SPECTRUM_NUM_SAMPLES as f32);
    // Get the spectral values for the vertices of the grid cell.
    // TODO: use integer SIMD intrinsics to make this part faster.
    let mut p = [Float4::splat(0.0); 6];
    let sb0: [i32; 4] = [
        sb.get_0() as i32,
        sb.get_1() as i32,
        sb.get_2() as i32,
        sb.get_3() as i32,
    ];
    assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES);
    let plus_one_clamped = |n| {
        if (n + 1.0) < SPECTRUM_NUM_SAMPLES as f32 {
            n as i32 + 1
        } else {
            SPECTRUM_NUM_SAMPLES - 1
        }
    };
    let sb1: [i32; 4] = [
        plus_one_clamped(sb.get_0()),
        plus_one_clamped(sb.get_1()),
        plus_one_clamped(sb.get_2()),
        plus_one_clamped(sb.get_3()),
    ];
    let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
    for i in 0..(num as usize) {
        debug_assert!(idx[i] >= 0);
        let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum;
        let p0 = Float4::new(
            spectrum[sb0[0] as usize],
            spectrum[sb0[1] as usize],
            spectrum[sb0[2] as usize],
            spectrum[sb0[3] as usize],
        );
        let p1 = Float4::new(
            spectrum[sb1[0] as usize],
            spectrum[sb1[1] as usize],
            spectrum[sb1[2] as usize],
            spectrum[sb1[3] as usize],
        );
        p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf;
    }
    // Linearly interpolated the spectral power of the cell vertices.
    let mut interpolated_p = Float4::splat(0.0);
    if inside {
        // Fast path for normal inner quads:
        let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);
        assert!(uv2.0 >= 0.0 && uv2.0 <= 1.0);
        assert!(uv2.1 >= 0.0 && uv2.1 <= 1.0);
        // The layout of the vertices in the quad is:
        //  2  3
        //  0  1
        interpolated_p = p[0] * (1.0 - uv2.0) * (1.0 - uv2.1)
            + p[2] * (1.0 - uv2.0) * uv2.1
            + p[3] * uv2.0 * uv2.1
            + p[1] * uv2.0 * (1.0 - uv2.1);
    } else {
        // Need to go through triangulation :(
        // We get the indices in such an order that they form a triangle fan around idx[0].
        // compute barycentric coordinates of our xy* point for all triangles in the fan:
        let ex: f32 = uv.0 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0;
        let ey: f32 = uv.1 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1;
        let mut e0x: f32 =
            SPECTRUM_DATA_POINTS[idx[1] as usize].uv.0 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0;
        let mut e0y: f32 =
            SPECTRUM_DATA_POINTS[idx[1] as usize].uv.1 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1;
        let mut uu: f32 = e0x * ey - ex * e0y;
        for i in 0..(num as usize - 1) {
            let (e1x, e1y): (f32, f32) = if i as i32 == (num - 2) {
                // Close the circle
                (
                    SPECTRUM_DATA_POINTS[idx[1] as usize].uv.0
                        - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0,
                    SPECTRUM_DATA_POINTS[idx[1] as usize].uv.1
                        - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1,
                )
            } else {
                (
                    SPECTRUM_DATA_POINTS[idx[i + 2] as usize].uv.0
                        - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0,
                    SPECTRUM_DATA_POINTS[idx[i + 2] as usize].uv.1
                        - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1,
                )
            };
            let vv: f32 = ex * e1y - e1x * ey;
            let area: f32 = e0x * e1y - e1x * e0y;
            // Normalise
            let u: f32 = uu / area;
            let v: f32 = vv / area;
            let w: f32 = 1.0 - u - v;
            // Outside spectral locus (quantized version at least) or outside grid
            if u < 0.0 || v < 0.0 || w < 0.0 {
                uu = -vv;
                e0x = e1x;
                e0y = e1y;
                continue;
            }
            // This seems to be the triangle we've been looking for.
            interpolated_p =
                p[0] * w + p[i + 1] * v + p[if i as i32 == (num - 2) { 1 } else { i + 2 }] * u;
            break;
        }
    }
    // Now we have a spectrum which corresponds to the xy chromaticities of
    // the input. need to scale according to the input brightness X+Y+Z now:
    return interpolated_p * inv_norm;
 }
 // apply a 3x2 matrix to a 2D color.
 #[inline(always)]
 fn spectrum_apply_3x2(matrix: &[f32; 6], src: (f32, f32)) -> (f32, f32) {