Use fast approximate exp functions for XYZ color curves.
This brings a significant performance improvement, and doesn't appear to have any visual impact.
This commit is contained in:
parent
177bb95eff
commit
7401c6fb99
|
@ -2,6 +2,7 @@ mod spectra_xyz;
|
||||||
|
|
||||||
use std::ops::{Add, AddAssign, Mul, MulAssign, Div, DivAssign};
|
use std::ops::{Add, AddAssign, Mul, MulAssign, Div, DivAssign};
|
||||||
|
|
||||||
|
use math::faster_exp;
|
||||||
use float4::Float4;
|
use float4::Float4;
|
||||||
use lerp::Lerp;
|
use lerp::Lerp;
|
||||||
use self::spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE};
|
use self::spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE};
|
||||||
|
@ -323,8 +324,8 @@ fn x_1931(wavelength: f32) -> f32 {
|
||||||
} else {
|
} else {
|
||||||
0.0382
|
0.0382
|
||||||
});
|
});
|
||||||
(0.362 * (-0.5 * t1 * t1).exp()) + (1.056 * (-0.5 * t2 * t2).exp()) -
|
(0.362 * faster_exp(-0.5 * t1 * t1)) + (1.056 * faster_exp(-0.5 * t2 * t2)) -
|
||||||
(0.065 * (-0.5 * t3 * t3).exp())
|
(0.065 * faster_exp(-0.5 * t3 * t3))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
@ -341,7 +342,7 @@ fn y_1931(wavelength: f32) -> f32 {
|
||||||
} else {
|
} else {
|
||||||
0.0322
|
0.0322
|
||||||
});
|
});
|
||||||
(0.821 * (-0.5 * t1 * t1).exp()) + (0.286 * (-0.5 * t2 * t2).exp())
|
(0.821 * faster_exp(-0.5 * t1 * t1)) + (0.286 * faster_exp(-0.5 * t2 * t2))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
|
@ -358,7 +359,7 @@ fn z_1931(wavelength: f32) -> f32 {
|
||||||
} else {
|
} else {
|
||||||
0.0725
|
0.0725
|
||||||
});
|
});
|
||||||
(1.217 * (-0.5 * t1 * t1).exp()) + (0.681 * (-0.5 * t2 * t2).exp())
|
(1.217 * faster_exp(-0.5 * t1 * t1)) + (0.681 * faster_exp(-0.5 * t2 * t2))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -38,6 +38,68 @@ pub fn fast_ln(x: f32) -> f32 {
|
||||||
return y - 87.989971088;
|
return y - 87.989971088;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn fast_pow2(p: f32) -> f32 {
|
||||||
|
use std::mem::transmute_copy;
|
||||||
|
|
||||||
|
let offset: f32 = if p < 0.0 {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
let clipp: f32 = if p < -126.0 {
|
||||||
|
-126.0
|
||||||
|
} else {
|
||||||
|
p
|
||||||
|
};
|
||||||
|
let w: i32 = clipp as i32;
|
||||||
|
let z: f32 = clipp - w as f32 + offset;
|
||||||
|
|
||||||
|
let i: u32 = ((1 << 23) as f32 *
|
||||||
|
(clipp + 121.2740575 + 27.7280233 / (4.84252568 - z) -
|
||||||
|
1.49012907 * z)) as u32;
|
||||||
|
|
||||||
|
unsafe { transmute_copy::<u32, f32>(&i) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fast_exp(p: f32) -> f32 {
|
||||||
|
fast_pow2(1.442695040 * p)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn faster_pow2(p: f32) -> f32 {
|
||||||
|
use std::mem::transmute_copy;
|
||||||
|
|
||||||
|
let clipp: f32 = if p < -126.0 {
|
||||||
|
-126.0
|
||||||
|
} else {
|
||||||
|
p
|
||||||
|
};
|
||||||
|
let i: u32 = ((1 << 23) as f32 * (clipp + 126.94269504)) as u32;
|
||||||
|
|
||||||
|
unsafe { transmute_copy::<u32, f32>(&i) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn faster_exp(p: f32) -> f32 {
|
||||||
|
faster_pow2(1.442695040 * p)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The stdlib min function is slower than a simple if statement for some reason.
|
||||||
|
pub fn fast_minf32(a: f32, b: f32) -> f32 {
|
||||||
|
if a < b {
|
||||||
|
a
|
||||||
|
} else {
|
||||||
|
b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The stdlib max function is slower than a simple if statement for some reason.
|
||||||
|
pub fn fast_maxf32(a: f32, b: f32) -> f32 {
|
||||||
|
if a > b {
|
||||||
|
a
|
||||||
|
} else {
|
||||||
|
b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Rounds an integer up to the next power of two.
|
/// Rounds an integer up to the next power of two.
|
||||||
pub fn upper_power_of_two(mut v: u32) -> u32 {
|
pub fn upper_power_of_two(mut v: u32) -> u32 {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user