From 7401c6fb99a791a1460b282ce962b33bd703ff0a Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Sat, 16 Jul 2016 17:53:50 -0700
Subject: [PATCH] Use fast approximate exp functions for XYZ color curves.

This brings a significant performance improvement, and doesn't
appear to have any visual impact.
---
 src/color/mod.rs |  9 +++----
 src/math/mod.rs  | 62 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/src/color/mod.rs b/src/color/mod.rs
index 28a0166..26536df 100644
--- a/src/color/mod.rs
+++ b/src/color/mod.rs
@@ -2,6 +2,7 @@ mod spectra_xyz;
 
 use std::ops::{Add, AddAssign, Mul, MulAssign, Div, DivAssign};
 
+use math::faster_exp;
 use float4::Float4;
 use lerp::Lerp;
 use self::spectra_xyz::{spectrum_xyz_to_p, EQUAL_ENERGY_REFLECTANCE};
@@ -323,8 +324,8 @@ fn x_1931(wavelength: f32) -> f32 {
     } else {
         0.0382
     });
-    (0.362 * (-0.5 * t1 * t1).exp()) + (1.056 * (-0.5 * t2 * t2).exp()) -
-    (0.065 * (-0.5 * t3 * t3).exp())
+    (0.362 * faster_exp(-0.5 * t1 * t1)) + (1.056 * faster_exp(-0.5 * t2 * t2)) -
+    (0.065 * faster_exp(-0.5 * t3 * t3))
 }
 
 #[allow(dead_code)]
@@ -341,7 +342,7 @@ fn y_1931(wavelength: f32) -> f32 {
     } else {
         0.0322
     });
-    (0.821 * (-0.5 * t1 * t1).exp()) + (0.286 * (-0.5 * t2 * t2).exp())
+    (0.821 * faster_exp(-0.5 * t1 * t1)) + (0.286 * faster_exp(-0.5 * t2 * t2))
 }
 
 #[allow(dead_code)]
@@ -358,7 +359,7 @@ fn z_1931(wavelength: f32) -> f32 {
     } else {
         0.0725
     });
-    (1.217 * (-0.5 * t1 * t1).exp()) + (0.681 * (-0.5 * t2 * t2).exp())
+    (1.217 * faster_exp(-0.5 * t1 * t1)) + (0.681 * faster_exp(-0.5 * t2 * t2))
 }
 
 #[cfg(test)]
diff --git a/src/math/mod.rs b/src/math/mod.rs
index 646b619..6dca3c4 100644
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@@ -38,6 +38,68 @@ pub fn fast_ln(x: f32) -> f32 {
     return y - 87.989971088;
 }
 
+pub fn fast_pow2(p: f32) -> f32 {
+    use std::mem::transmute_copy;
+
+    let offset: f32 = if p < 0.0 {
+        1.0
+    } else {
+        0.0
+    };
+    let clipp: f32 = if p < -126.0 {
+        -126.0
+    } else {
+        p
+    };
+    let w: i32 = clipp as i32;
+    let z: f32 = clipp - w as f32 + offset;
+
+    let i: u32 = ((1 << 23) as f32 *
+                  (clipp + 121.2740575 + 27.7280233 / (4.84252568 - z) -
+                   1.49012907 * z)) as u32;
+
+    unsafe { transmute_copy::<u32, f32>(&i) }
+}
+
+pub fn fast_exp(p: f32) -> f32 {
+    fast_pow2(1.442695040 * p)
+}
+
+pub fn faster_pow2(p: f32) -> f32 {
+    use std::mem::transmute_copy;
+
+    let clipp: f32 = if p < -126.0 {
+        -126.0
+    } else {
+        p
+    };
+    let i: u32 = ((1 << 23) as f32 * (clipp + 126.94269504)) as u32;
+
+    unsafe { transmute_copy::<u32, f32>(&i) }
+}
+
+pub fn faster_exp(p: f32) -> f32 {
+    faster_pow2(1.442695040 * p)
+}
+
+// The stdlib min function is slower than a simple if statement for some reason.
+pub fn fast_minf32(a: f32, b: f32) -> f32 {
+    if a < b {
+        a
+    } else {
+        b
+    }
+}
+
+// The stdlib max function is slower than a simple if statement for some reason.
+pub fn fast_maxf32(a: f32, b: f32) -> f32 {
+    if a > b {
+        a
+    } else {
+        b
+    }
+}
+
 
 /// Rounds an integer up to the next power of two.
 pub fn upper_power_of_two(mut v: u32) -> u32 {