Some basic SIMD optimizations for XYZ->Spectrum conversion.
This commit is contained in:
parent
ef7084e694
commit
3f55df7225
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -202,6 +202,9 @@ version = "0.1.0"
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "spectra_xyz"
|
name = "spectra_xyz"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"float4 0.1.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
|
|
43
src/color.rs
43
src/color.rs
|
@ -1,6 +1,6 @@
|
||||||
use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign};
|
use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign};
|
||||||
|
|
||||||
use spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE};
|
use spectra_xyz::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE};
|
||||||
|
|
||||||
use float4::Float4;
|
use float4::Float4;
|
||||||
use lerp::Lerp;
|
use lerp::Lerp;
|
||||||
|
@ -19,22 +19,10 @@ pub fn map_0_1_to_wavelength(n: f32) -> f32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Color {
|
pub trait Color {
|
||||||
fn sample_spectrum(&self, wavelength: f32) -> f32;
|
fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample;
|
||||||
|
|
||||||
fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample {
|
|
||||||
SpectralSample {
|
|
||||||
e: Float4::new(
|
|
||||||
self.sample_spectrum(nth_wavelength(hero_wavelength, 0)),
|
|
||||||
self.sample_spectrum(nth_wavelength(hero_wavelength, 1)),
|
|
||||||
self.sample_spectrum(nth_wavelength(hero_wavelength, 2)),
|
|
||||||
self.sample_spectrum(nth_wavelength(hero_wavelength, 3)),
|
|
||||||
),
|
|
||||||
|
|
||||||
hero_wavelength: hero_wavelength,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
|
fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
|
||||||
let wl = hero_wavelength + (WL_RANGE_Q * n as f32);
|
let wl = hero_wavelength + (WL_RANGE_Q * n as f32);
|
||||||
if wl > WL_MAX {
|
if wl > WL_MAX {
|
||||||
|
@ -44,6 +32,17 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns all wavelengths of a hero wavelength set as a Float4
|
||||||
|
#[inline(always)]
|
||||||
|
fn wavelengths(hero_wavelength: f32) -> Float4 {
|
||||||
|
Float4::new(
|
||||||
|
nth_wavelength(hero_wavelength, 0),
|
||||||
|
nth_wavelength(hero_wavelength, 1),
|
||||||
|
nth_wavelength(hero_wavelength, 2),
|
||||||
|
nth_wavelength(hero_wavelength, 3),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
@ -201,8 +200,11 @@ impl XYZ {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Color for XYZ {
|
impl Color for XYZ {
|
||||||
fn sample_spectrum(&self, wavelength: f32) -> f32 {
|
fn to_spectral_sample(&self, hero_wavelength: f32) -> SpectralSample {
|
||||||
xyz_to_spectrum((self.x, self.y, self.z), wavelength)
|
SpectralSample {
|
||||||
|
e: xyz_to_spectrum_4((self.x, self.y, self.z), wavelengths(hero_wavelength)),
|
||||||
|
hero_wavelength: hero_wavelength,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,11 +273,12 @@ impl DivAssign<f32> for XYZ {
|
||||||
|
|
||||||
//----------------------------------------------------------------
|
//----------------------------------------------------------------
|
||||||
|
|
||||||
/// Samples an CIE 1931 XYZ color at a particular wavelength, according to
|
/// Samples an CIE 1931 XYZ color at a particular set of wavelengths, according to
|
||||||
/// the method in the paper "Physically Meaningful Rendering using Tristimulus
|
/// the method in the paper "Physically Meaningful Rendering using Tristimulus
|
||||||
/// Colours" by Meng et al.
|
/// Colours" by Meng et al.
|
||||||
fn xyz_to_spectrum(xyz: (f32, f32, f32), wavelength: f32) -> f32 {
|
#[inline(always)]
|
||||||
spectrum_xyz_to_p(wavelength, xyz) * (1.0 / EQUAL_ENERGY_REFLECTANCE)
|
fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 {
|
||||||
|
spectrum_xyz_to_p_4(wavelengths, xyz) * Float4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Close analytic approximations of the CIE 1931 XYZ color curves.
|
/// Close analytic approximations of the CIE 1931 XYZ color curves.
|
||||||
|
|
|
@ -7,3 +7,7 @@ license = "MIT"
|
||||||
[lib]
|
[lib]
|
||||||
name = "spectra_xyz"
|
name = "spectra_xyz"
|
||||||
path = "src/lib.rs"
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
# Local crate dependencies
|
||||||
|
[dependencies.float4]
|
||||||
|
path = "../float4"
|
|
@ -1,3 +1,6 @@
|
||||||
|
extern crate float4;
|
||||||
|
|
||||||
|
use float4::Float4;
|
||||||
use std::f32;
|
use std::f32;
|
||||||
|
|
||||||
mod spectra_tables;
|
mod spectra_tables;
|
||||||
|
@ -80,7 +83,6 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
|
||||||
SPECTRUM_NUM_SAMPLES - 1
|
SPECTRUM_NUM_SAMPLES - 1
|
||||||
};
|
};
|
||||||
assert!(sb0 < SPECTRUM_NUM_SAMPLES);
|
assert!(sb0 < SPECTRUM_NUM_SAMPLES);
|
||||||
assert!(sb1 < SPECTRUM_NUM_SAMPLES);
|
|
||||||
let sbf: f32 = sb as f32 - sb0 as f32;
|
let sbf: f32 = sb as f32 - sb0 as f32;
|
||||||
for i in 0..(num as usize) {
|
for i in 0..(num as usize) {
|
||||||
debug_assert!(idx[i] >= 0);
|
debug_assert!(idx[i] >= 0);
|
||||||
|
@ -161,6 +163,175 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
|
||||||
return interpolated_p * inv_norm;
|
return interpolated_p * inv_norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Evaluate the spectrum for xyz at the given wavelengths.
|
||||||
|
///
|
||||||
|
/// Works on 4 wavelengths at once via SIMD.
|
||||||
|
#[inline]
|
||||||
|
pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 {
|
||||||
|
assert!(lambdas.h_min() >= SPECTRUM_SAMPLE_MIN);
|
||||||
|
assert!(lambdas.h_max() <= SPECTRUM_SAMPLE_MAX);
|
||||||
|
|
||||||
|
let inv_norm = xyz.0 + xyz.1 + xyz.2;
|
||||||
|
let norm = {
|
||||||
|
let norm = 1.0 / inv_norm;
|
||||||
|
if norm < f32::MAX {
|
||||||
|
norm
|
||||||
|
} else {
|
||||||
|
return Float4::splat(0.0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let xyy = (xyz.0 * norm, xyz.1 * norm, xyz.1);
|
||||||
|
|
||||||
|
// Rotate to align with grid
|
||||||
|
let uv = spectrum_xy_to_uv((xyy.0, xyy.1));
|
||||||
|
if uv.0 < 0.0
|
||||||
|
|| uv.0 >= SPECTRUM_GRID_WIDTH as f32
|
||||||
|
|| uv.1 < 0.0
|
||||||
|
|| uv.1 >= SPECTRUM_GRID_HEIGHT as f32
|
||||||
|
{
|
||||||
|
return Float4::splat(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let uvi = (uv.0 as i32, uv.1 as i32);
|
||||||
|
debug_assert!(uvi.0 < SPECTRUM_GRID_WIDTH);
|
||||||
|
debug_assert!(uvi.1 < SPECTRUM_GRID_HEIGHT);
|
||||||
|
|
||||||
|
let cell_idx: i32 = uvi.0 + SPECTRUM_GRID_WIDTH * uvi.1;
|
||||||
|
debug_assert!(cell_idx >= 0);
|
||||||
|
|
||||||
|
let cell = &SPECTRUM_GRID[cell_idx as usize];
|
||||||
|
let inside: bool = cell.inside;
|
||||||
|
let idx = &cell.idx;
|
||||||
|
let num: i32 = cell.num_points;
|
||||||
|
|
||||||
|
// If the cell has no points, nothing we can do, so return 0.0
|
||||||
|
if num == 0 {
|
||||||
|
return Float4::splat(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize lambda to spectrum table index range.
|
||||||
|
let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN))
|
||||||
|
/ (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN)
|
||||||
|
* (SPECTRUM_NUM_SAMPLES as f32 - 1.0);
|
||||||
|
debug_assert!(sb.h_min() >= 0.0);
|
||||||
|
debug_assert!(sb.h_max() <= SPECTRUM_NUM_SAMPLES as f32);
|
||||||
|
|
||||||
|
// Get the spectral values for the vertices of the grid cell.
|
||||||
|
// TODO: use integer SIMD intrinsics to make this part faster.
|
||||||
|
let mut p = [Float4::splat(0.0); 6];
|
||||||
|
let sb0: [i32; 4] = [
|
||||||
|
sb.get_0() as i32,
|
||||||
|
sb.get_1() as i32,
|
||||||
|
sb.get_2() as i32,
|
||||||
|
sb.get_3() as i32,
|
||||||
|
];
|
||||||
|
assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES);
|
||||||
|
let plus_one_clamped = |n| {
|
||||||
|
if (n + 1.0) < SPECTRUM_NUM_SAMPLES as f32 {
|
||||||
|
n as i32 + 1
|
||||||
|
} else {
|
||||||
|
SPECTRUM_NUM_SAMPLES - 1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let sb1: [i32; 4] = [
|
||||||
|
plus_one_clamped(sb.get_0()),
|
||||||
|
plus_one_clamped(sb.get_1()),
|
||||||
|
plus_one_clamped(sb.get_2()),
|
||||||
|
plus_one_clamped(sb.get_3()),
|
||||||
|
];
|
||||||
|
let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
|
||||||
|
for i in 0..(num as usize) {
|
||||||
|
debug_assert!(idx[i] >= 0);
|
||||||
|
let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum;
|
||||||
|
let p0 = Float4::new(
|
||||||
|
spectrum[sb0[0] as usize],
|
||||||
|
spectrum[sb0[1] as usize],
|
||||||
|
spectrum[sb0[2] as usize],
|
||||||
|
spectrum[sb0[3] as usize],
|
||||||
|
);
|
||||||
|
let p1 = Float4::new(
|
||||||
|
spectrum[sb1[0] as usize],
|
||||||
|
spectrum[sb1[1] as usize],
|
||||||
|
spectrum[sb1[2] as usize],
|
||||||
|
spectrum[sb1[3] as usize],
|
||||||
|
);
|
||||||
|
p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Linearly interpolated the spectral power of the cell vertices.
|
||||||
|
let mut interpolated_p = Float4::splat(0.0);
|
||||||
|
if inside {
|
||||||
|
// Fast path for normal inner quads:
|
||||||
|
let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);
|
||||||
|
|
||||||
|
assert!(uv2.0 >= 0.0 && uv2.0 <= 1.0);
|
||||||
|
assert!(uv2.1 >= 0.0 && uv2.1 <= 1.0);
|
||||||
|
|
||||||
|
// The layout of the vertices in the quad is:
|
||||||
|
// 2 3
|
||||||
|
// 0 1
|
||||||
|
interpolated_p = p[0] * (1.0 - uv2.0) * (1.0 - uv2.1)
|
||||||
|
+ p[2] * (1.0 - uv2.0) * uv2.1
|
||||||
|
+ p[3] * uv2.0 * uv2.1
|
||||||
|
+ p[1] * uv2.0 * (1.0 - uv2.1);
|
||||||
|
} else {
|
||||||
|
// Need to go through triangulation :(
|
||||||
|
// We get the indices in such an order that they form a triangle fan around idx[0].
|
||||||
|
// compute barycentric coordinates of our xy* point for all triangles in the fan:
|
||||||
|
let ex: f32 = uv.0 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0;
|
||||||
|
let ey: f32 = uv.1 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1;
|
||||||
|
let mut e0x: f32 =
|
||||||
|
SPECTRUM_DATA_POINTS[idx[1] as usize].uv.0 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0;
|
||||||
|
let mut e0y: f32 =
|
||||||
|
SPECTRUM_DATA_POINTS[idx[1] as usize].uv.1 - SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1;
|
||||||
|
let mut uu: f32 = e0x * ey - ex * e0y;
|
||||||
|
|
||||||
|
for i in 0..(num as usize - 1) {
|
||||||
|
let (e1x, e1y): (f32, f32) = if i as i32 == (num - 2) {
|
||||||
|
// Close the circle
|
||||||
|
(
|
||||||
|
SPECTRUM_DATA_POINTS[idx[1] as usize].uv.0
|
||||||
|
- SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0,
|
||||||
|
SPECTRUM_DATA_POINTS[idx[1] as usize].uv.1
|
||||||
|
- SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
SPECTRUM_DATA_POINTS[idx[i + 2] as usize].uv.0
|
||||||
|
- SPECTRUM_DATA_POINTS[idx[0] as usize].uv.0,
|
||||||
|
SPECTRUM_DATA_POINTS[idx[i + 2] as usize].uv.1
|
||||||
|
- SPECTRUM_DATA_POINTS[idx[0] as usize].uv.1,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let vv: f32 = ex * e1y - e1x * ey;
|
||||||
|
let area: f32 = e0x * e1y - e1x * e0y;
|
||||||
|
|
||||||
|
// Normalise
|
||||||
|
let u: f32 = uu / area;
|
||||||
|
let v: f32 = vv / area;
|
||||||
|
let w: f32 = 1.0 - u - v;
|
||||||
|
// Outside spectral locus (quantized version at least) or outside grid
|
||||||
|
if u < 0.0 || v < 0.0 || w < 0.0 {
|
||||||
|
uu = -vv;
|
||||||
|
e0x = e1x;
|
||||||
|
e0y = e1y;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This seems to be the triangle we've been looking for.
|
||||||
|
interpolated_p =
|
||||||
|
p[0] * w + p[i + 1] * v + p[if i as i32 == (num - 2) { 1 } else { i + 2 }] * u;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we have a spectrum which corresponds to the xy chromaticities of
|
||||||
|
// the input. need to scale according to the input brightness X+Y+Z now:
|
||||||
|
return interpolated_p * inv_norm;
|
||||||
|
}
|
||||||
|
|
||||||
// apply a 3x2 matrix to a 2D color.
|
// apply a 3x2 matrix to a 2D color.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn spectrum_apply_3x2(matrix: &[f32; 6], src: (f32, f32)) -> (f32, f32) {
|
fn spectrum_apply_3x2(matrix: &[f32; 6], src: (f32, f32)) -> (f32, f32) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user