Use a much better approximation of the probit function.

For 32-bit floating point, it can essentially be considered exact.
This commit is contained in:
Nathan Vegdahl 2020-03-27 21:19:24 +09:00
parent e46fc5a4d6
commit 8deb305850
2 changed files with 77 additions and 13 deletions

View File

@ -110,20 +110,82 @@ pub fn zup_to_vec(from: Vector, toz: Vector) -> Vector {
(tox * from.x()) + (toy * from.y()) + (toz * from.z())
}
/// The logit function, scaled to approximate the probit function.
/// A highly accurate approximation of the probit function.
///
/// We use this as a close approximation to the gaussian inverse CDF,
/// since the gaussian inverse CDF (probit) has no analytic formula.
pub fn logit(p: f32, width: f32) -> f32 {
let n = 0.001 + (p * 0.998);
/// From Peter John Acklam, sourced from here:
/// https://web.archive.org/web/20151030215612/http://home.online.no/~pjacklam/notes/invnorm/
///
/// Regarding the approximation error, he says, "The absolute value of the
/// relative error is less than 1.15 × 109 in the entire region."
///
/// Given that this implementation outputs 32-bit floating point values,
/// and 32-bit floating point has significantly less precision than that,
/// this approximation can essentially be considered exact.
pub fn probit(n: f32, width: f32) -> f32 {
let n = n as f64;
(n / (1.0 - n)).ln() * width * (0.6266 / 4.0)
}
// Coefficients of the rational approximations.
const A1: f64 = -3.969683028665376e+01;
const A2: f64 = 2.209460984245205e+02;
const A3: f64 = -2.759285104469687e+02;
const A4: f64 = 1.383577518672690e+02;
const A5: f64 = -3.066479806614716e+01;
const A6: f64 = 2.506628277459239e+00;
pub fn fast_logit(p: f32, width: f32) -> f32 {
let n = 0.001 + (p * 0.998);
const B1: f64 = -5.447609879822406e+01;
const B2: f64 = 1.615858368580409e+02;
const B3: f64 = -1.556989798598866e+02;
const B4: f64 = 6.680131188771972e+01;
const B5: f64 = -1.328068155288572e+01;
fast_ln(n / (1.0 - n)) * width * (0.6266 / 4.0)
const C1: f64 = -7.784894002430293e-03;
const C2: f64 = -3.223964580411365e-01;
const C3: f64 = -2.400758277161838e+00;
const C4: f64 = -2.549732539343734e+00;
const C5: f64 = 4.374664141464968e+00;
const C6: f64 = 2.938163982698783e+00;
const D1: f64 = 7.784695709041462e-03;
const D2: f64 = 3.224671290700398e-01;
const D3: f64 = 2.445134137142996e+00;
const D4: f64 = 3.754408661907416e+00;
// Transition points between the rational functions.
const N_LOW: f64 = 0.02425;
const N_HIGH: f64 = 1.0 - N_LOW;
let x = match n {
// Lower region.
n if 0.0 < n && n < N_LOW => {
let q = (-2.0 * n.ln()).sqrt();
(((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6)
/ ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
}
// Central region.
n if n <= N_HIGH => {
let q = n - 0.5;
let r = q * q;
(((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q
/ (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0)
}
// Upper region.
n if n < 1.0 => {
let q = (-2.0 * (1.0 - n).ln()).sqrt();
-(((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6)
/ ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0)
}
// Exactly 1 or 0. Should be extremely rare.
n if n == 0.0 => -std::f64::INFINITY,
n if n == 1.0 => std::f64::INFINITY,
// Outside of valid input range.
_ => std::f64::NAN,
};
x as f32 * width
}
//----------------------------------------------------------------

View File

@ -18,7 +18,7 @@ use crate::{
hash::hash_u32,
hilbert,
image::Image,
math::{fast_logit, upper_power_of_two},
math::{probit, upper_power_of_two},
mis::power_heuristic,
ray::{Ray, RayBatch},
scene::{Scene, SceneLightSample},
@ -244,9 +244,11 @@ impl<'a> Renderer<'a> {
// Calculate image plane x and y coordinates
let (img_x, img_y) = {
let filter_x =
fast_logit(get_sample(4, si as u32, (x, y), self.seed), 1.5) + 0.5;
probit(get_sample(4, si as u32, (x, y), self.seed), 2.0 / 6.0)
+ 0.5;
let filter_y =
fast_logit(get_sample(5, si as u32, (x, y), self.seed), 1.5) + 0.5;
probit(get_sample(5, si as u32, (x, y), self.seed), 2.0 / 6.0)
+ 0.5;
let samp_x = (filter_x + x as f32) * cmpx;
let samp_y = (filter_y + y as f32) * cmpy;
((samp_x - 0.5) * x_extent, (0.5 - samp_y) * y_extent)