From 7401c6fb99a791a1460b282ce962b33bd703ff0a Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sat, 16 Jul 2016 17:53:50 -0700 Subject: [PATCH] Use fast approximate exp functions for XYZ color curves. This brings a significant performance improvement, and doesn't appear to have any visual impact. --- src/color/mod.rs | 9 +++---- src/math/mod.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/color/mod.rs b/src/color/mod.rs index 28a0166..26536df 100644 --- a/src/color/mod.rs +++ b/src/color/mod.rs @@ -2,6 +2,7 @@ mod spectra_xyz; use std::ops::{Add, AddAssign, Mul, MulAssign, Div, DivAssign}; +use math::faster_exp; use float4::Float4; use lerp::Lerp; use self::spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE}; @@ -323,8 +324,8 @@ fn x_1931(wavelength: f32) -> f32 { } else { 0.0382 }); - (0.362 * (-0.5 * t1 * t1).exp()) + (1.056 * (-0.5 * t2 * t2).exp()) - - (0.065 * (-0.5 * t3 * t3).exp()) + (0.362 * faster_exp(-0.5 * t1 * t1)) + (1.056 * faster_exp(-0.5 * t2 * t2)) - + (0.065 * faster_exp(-0.5 * t3 * t3)) } #[allow(dead_code)] @@ -341,7 +342,7 @@ fn y_1931(wavelength: f32) -> f32 { } else { 0.0322 }); - (0.821 * (-0.5 * t1 * t1).exp()) + (0.286 * (-0.5 * t2 * t2).exp()) + (0.821 * faster_exp(-0.5 * t1 * t1)) + (0.286 * faster_exp(-0.5 * t2 * t2)) } #[allow(dead_code)] @@ -358,7 +359,7 @@ fn z_1931(wavelength: f32) -> f32 { } else { 0.0725 }); - (1.217 * (-0.5 * t1 * t1).exp()) + (0.681 * (-0.5 * t2 * t2).exp()) + (1.217 * faster_exp(-0.5 * t1 * t1)) + (0.681 * faster_exp(-0.5 * t2 * t2)) } #[cfg(test)] diff --git a/src/math/mod.rs b/src/math/mod.rs index 646b619..6dca3c4 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -38,6 +38,68 @@ pub fn fast_ln(x: f32) -> f32 { return y - 87.989971088; } +pub fn fast_pow2(p: f32) -> f32 { + use std::mem::transmute_copy; + + let offset: f32 = if p < 0.0 { + 1.0 + } else { + 0.0 + }; + let clipp: f32 = if p < -126.0 { + -126.0 + } else { + p + }; + let w: i32 = clipp as i32; + let z: f32 = clipp - w as f32 + offset; + + let i: u32 = ((1 << 23) as f32 * + (clipp + 121.2740575 + 27.7280233 / (4.84252568 - z) - + 1.49012907 * z)) as u32; + + unsafe { transmute_copy::(&i) } +} + +pub fn fast_exp(p: f32) -> f32 { + fast_pow2(1.442695040 * p) +} + +pub fn faster_pow2(p: f32) -> f32 { + use std::mem::transmute_copy; + + let clipp: f32 = if p < -126.0 { + -126.0 + } else { + p + }; + let i: u32 = ((1 << 23) as f32 * (clipp + 126.94269504)) as u32; + + unsafe { transmute_copy::(&i) } +} + +pub fn faster_exp(p: f32) -> f32 { + faster_pow2(1.442695040 * p) +} + +// The stdlib min function is slower than a simple if statement for some reason. +pub fn fast_minf32(a: f32, b: f32) -> f32 { + if a < b { + a + } else { + b + } +} + +// The stdlib max function is slower than a simple if statement for some reason. +pub fn fast_maxf32(a: f32, b: f32) -> f32 { + if a > b { + a + } else { + b + } +} + /// Rounds an integer up to the next power of two. pub fn upper_power_of_two(mut v: u32) -> u32 {