From 8deb3058505869ae9d40cc6cb2c9f32f46a145bd Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Fri, 27 Mar 2020 21:19:24 +0900 Subject: [PATCH] Use a much better approximation of the probit function. For 32-bit floating point, it can essentially be considered exact. --- src/math.rs | 82 +++++++++++++++++++++++++++++++++++++++++++------ src/renderer.rs | 8 +++-- 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/src/math.rs b/src/math.rs index 25275fe..10255d3 100644 --- a/src/math.rs +++ b/src/math.rs @@ -110,20 +110,82 @@ pub fn zup_to_vec(from: Vector, toz: Vector) -> Vector { (tox * from.x()) + (toy * from.y()) + (toz * from.z()) } -/// The logit function, scaled to approximate the probit function. +/// A highly accurate approximation of the probit function. /// -/// We use this as a close approximation to the gaussian inverse CDF, -/// since the gaussian inverse CDF (probit) has no analytic formula. -pub fn logit(p: f32, width: f32) -> f32 { - let n = 0.001 + (p * 0.998); +/// From Peter John Acklam, sourced from here: +/// https://web.archive.org/web/20151030215612/http://home.online.no/~pjacklam/notes/invnorm/ +/// +/// Regarding the approximation error, he says, "The absolute value of the +/// relative error is less than 1.15 × 10−9 in the entire region." +/// +/// Given that this implementation outputs 32-bit floating point values, +/// and 32-bit floating point has significantly less precision than that, +/// this approximation can essentially be considered exact. +pub fn probit(n: f32, width: f32) -> f32 { + let n = n as f64; - (n / (1.0 - n)).ln() * width * (0.6266 / 4.0) -} + // Coefficients of the rational approximations. + const A1: f64 = -3.969683028665376e+01; + const A2: f64 = 2.209460984245205e+02; + const A3: f64 = -2.759285104469687e+02; + const A4: f64 = 1.383577518672690e+02; + const A5: f64 = -3.066479806614716e+01; + const A6: f64 = 2.506628277459239e+00; -pub fn fast_logit(p: f32, width: f32) -> f32 { - let n = 0.001 + (p * 0.998); + const B1: f64 = -5.447609879822406e+01; + const B2: f64 = 1.615858368580409e+02; + const B3: f64 = -1.556989798598866e+02; + const B4: f64 = 6.680131188771972e+01; + const B5: f64 = -1.328068155288572e+01; - fast_ln(n / (1.0 - n)) * width * (0.6266 / 4.0) + const C1: f64 = -7.784894002430293e-03; + const C2: f64 = -3.223964580411365e-01; + const C3: f64 = -2.400758277161838e+00; + const C4: f64 = -2.549732539343734e+00; + const C5: f64 = 4.374664141464968e+00; + const C6: f64 = 2.938163982698783e+00; + + const D1: f64 = 7.784695709041462e-03; + const D2: f64 = 3.224671290700398e-01; + const D3: f64 = 2.445134137142996e+00; + const D4: f64 = 3.754408661907416e+00; + + // Transition points between the rational functions. + const N_LOW: f64 = 0.02425; + const N_HIGH: f64 = 1.0 - N_LOW; + + let x = match n { + // Lower region. + n if 0.0 < n && n < N_LOW => { + let q = (-2.0 * n.ln()).sqrt(); + (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) + / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0) + } + + // Central region. + n if n <= N_HIGH => { + let q = n - 0.5; + let r = q * q; + (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q + / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0) + } + + // Upper region. + n if n < 1.0 => { + let q = (-2.0 * (1.0 - n).ln()).sqrt(); + -(((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) + / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0) + } + + // Exactly 1 or 0. Should be extremely rare. + n if n == 0.0 => -std::f64::INFINITY, + n if n == 1.0 => std::f64::INFINITY, + + // Outside of valid input range. + _ => std::f64::NAN, + }; + + x as f32 * width } //---------------------------------------------------------------- diff --git a/src/renderer.rs b/src/renderer.rs index 3034df4..8b273e1 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -18,7 +18,7 @@ use crate::{ hash::hash_u32, hilbert, image::Image, - math::{fast_logit, upper_power_of_two}, + math::{probit, upper_power_of_two}, mis::power_heuristic, ray::{Ray, RayBatch}, scene::{Scene, SceneLightSample}, @@ -244,9 +244,11 @@ impl<'a> Renderer<'a> { // Calculate image plane x and y coordinates let (img_x, img_y) = { let filter_x = - fast_logit(get_sample(4, si as u32, (x, y), self.seed), 1.5) + 0.5; + probit(get_sample(4, si as u32, (x, y), self.seed), 2.0 / 6.0) + + 0.5; let filter_y = - fast_logit(get_sample(5, si as u32, (x, y), self.seed), 1.5) + 0.5; + probit(get_sample(5, si as u32, (x, y), self.seed), 2.0 / 6.0) + + 0.5; let samp_x = (filter_x + x as f32) * cmpx; let samp_y = (filter_y + y as f32) * cmpy; ((samp_x - 0.5) * x_extent, (0.5 - samp_y) * y_extent)