From 8deb3058505869ae9d40cc6cb2c9f32f46a145bd Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Fri, 27 Mar 2020 21:19:24 +0900
Subject: [PATCH] Use a much better approximation of the probit function.

For 32-bit floating point, it can essentially be considered exact.
---
 src/math.rs     | 82 +++++++++++++++++++++++++++++++++++++++++++------
 src/renderer.rs |  8 +++--
 2 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/src/math.rs b/src/math.rs
index 25275fe..10255d3 100644
--- a/src/math.rs
+++ b/src/math.rs
@@ -110,20 +110,82 @@ pub fn zup_to_vec(from: Vector, toz: Vector) -> Vector {
     (tox * from.x()) + (toy * from.y()) + (toz * from.z())
 }
 
-/// The logit function, scaled to approximate the probit function.
+/// A highly accurate approximation of the probit function.
 ///
-/// We use this as a close approximation to the gaussian inverse CDF,
-/// since the gaussian inverse CDF (probit) has no analytic formula.
-pub fn logit(p: f32, width: f32) -> f32 {
-    let n = 0.001 + (p * 0.998);
+/// From Peter John Acklam, sourced from here:
+/// https://web.archive.org/web/20151030215612/http://home.online.no/~pjacklam/notes/invnorm/
+///
+/// Regarding the approximation error, he says, "The absolute value of the
+/// relative error is less than 1.15 × 10−9 in the entire region."
+///
+/// Given that this implementation outputs 32-bit floating point values,
+/// and 32-bit floating point has significantly less precision than that,
+/// this approximation can essentially be considered exact.
+pub fn probit(n: f32, width: f32) -> f32 {
+    let n = n as f64;
 
-    (n / (1.0 - n)).ln() * width * (0.6266 / 4.0)
-}
+    // Coefficients of the rational approximations.
+    const A1: f64 = -3.969683028665376e+01;
+    const A2: f64 = 2.209460984245205e+02;
+    const A3: f64 = -2.759285104469687e+02;
+    const A4: f64 = 1.383577518672690e+02;
+    const A5: f64 = -3.066479806614716e+01;
+    const A6: f64 = 2.506628277459239e+00;
 
-pub fn fast_logit(p: f32, width: f32) -> f32 {
-    let n = 0.001 + (p * 0.998);
+    const B1: f64 = -5.447609879822406e+01;
+    const B2: f64 = 1.615858368580409e+02;
+    const B3: f64 = -1.556989798598866e+02;
+    const B4: f64 = 6.680131188771972e+01;
+    const B5: f64 = -1.328068155288572e+01;
 
-    fast_ln(n / (1.0 - n)) * width * (0.6266 / 4.0)
+    const C1: f64 = -7.784894002430293e-03;
+    const C2: f64 = -3.223964580411365e-01;
+    const C3: f64 = -2.400758277161838e+00;
+    const C4: f64 = -2.549732539343734e+00;
+    const C5: f64 = 4.374664141464968e+00;
+    const C6: f64 = 2.938163982698783e+00;
+
+    const D1: f64 = 7.784695709041462e-03;
+    const D2: f64 = 3.224671290700398e-01;
+    const D3: f64 = 2.445134137142996e+00;
+    const D4: f64 = 3.754408661907416e+00;
+
+    // Transition points between the rational functions.
+    const N_LOW: f64 = 0.02425;
+    const N_HIGH: f64 = 1.0 - N_LOW;
+
+    let x = match n {
+        // Lower region.
+        n if 0.0 < n && n < N_LOW => {
+            let q = (-2.0 * n.ln()).sqrt();
+            (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6)
+                / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
+        }
+
+        // Central region.
+        n if n <= N_HIGH => {
+            let q = n - 0.5;
+            let r = q * q;
+            (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q
+                / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0)
+        }
+
+        // Upper region.
+        n if n < 1.0 => {
+            let q = (-2.0 * (1.0 - n).ln()).sqrt();
+            -(((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6)
+                / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
+        }
+
+        // Exactly 1 or 0.  Should be extremely rare.
+        n if n == 0.0 => -std::f64::INFINITY,
+        n if n == 1.0 => std::f64::INFINITY,
+
+        // Outside of valid input range.
+        _ => std::f64::NAN,
+    };
+
+    x as f32 * width
 }
 
 //----------------------------------------------------------------
diff --git a/src/renderer.rs b/src/renderer.rs
index 3034df4..8b273e1 100644
--- a/src/renderer.rs
+++ b/src/renderer.rs
@@ -18,7 +18,7 @@ use crate::{
     hash::hash_u32,
     hilbert,
     image::Image,
-    math::{fast_logit, upper_power_of_two},
+    math::{probit, upper_power_of_two},
     mis::power_heuristic,
     ray::{Ray, RayBatch},
     scene::{Scene, SceneLightSample},
@@ -244,9 +244,11 @@ impl<'a> Renderer<'a> {
                         // Calculate image plane x and y coordinates
                         let (img_x, img_y) = {
                             let filter_x =
-                                fast_logit(get_sample(4, si as u32, (x, y), self.seed), 1.5) + 0.5;
+                                probit(get_sample(4, si as u32, (x, y), self.seed), 2.0 / 6.0)
+                                    + 0.5;
                             let filter_y =
-                                fast_logit(get_sample(5, si as u32, (x, y), self.seed), 1.5) + 0.5;
+                                probit(get_sample(5, si as u32, (x, y), self.seed), 2.0 / 6.0)
+                                    + 0.5;
                             let samp_x = (filter_x + x as f32) * cmpx;
                             let samp_y = (filter_y + y as f32) * cmpy;
                             ((samp_x - 0.5) * x_extent, (0.5 - samp_y) * y_extent)