From c5965ec8746ceaa57e237ac8e778446e66be0cc1 Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Sun, 15 May 2022 23:00:49 -0700
Subject: [PATCH] Replace some custom math functions with stdlib functions.

Some of these didn't exist in stable Rust before, and some of them
used to be slower than the custom ones.
---
 src/bbox.rs                    |  4 +-
 src/math.rs                    | 71 ++++------------------------------
 src/renderer.rs                |  4 +-
 src/shading/surface_closure.rs | 14 +++----
 4 files changed, 18 insertions(+), 75 deletions(-)

diff --git a/src/bbox.rs b/src/bbox.rs
index f4a2ab6..4815011 100644
--- a/src/bbox.rs
+++ b/src/bbox.rs
@@ -7,7 +7,7 @@ use std::{
 
 use crate::{
     lerp::{lerp, lerp_slice, Lerp},
-    math::{fast_minf32, Point, Transform, Vector},
+    math::{Point, Transform, Vector},
 };
 
 const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
@@ -47,7 +47,7 @@ impl BBox {
         // Find the far and near intersection
         let far_t = t1.max(t2).extend(std::f32::INFINITY);
         let near_t = t1.min(t2).extend(0.0);
-        let far_hit_t = fast_minf32(far_t.min_element() * BBOX_MAXT_ADJUST, max_t);
+        let far_hit_t = (far_t.min_element() * BBOX_MAXT_ADJUST).min(max_t);
         let near_hit_t = near_t.max_element();
 
         // Did we hit?
diff --git a/src/math.rs b/src/math.rs
index fec2f06..8dd0e31 100644
--- a/src/math.rs
+++ b/src/math.rs
@@ -4,73 +4,16 @@ use std::f32;
 
 pub use math3d::{cross, dot, CrossProduct, DotProduct, Normal, Point, Transform, Vector};
 
-/// Clamps a value between a min and max.
-pub fn clamp<T: PartialOrd>(v: T, lower: T, upper: T) -> T {
-    if v < lower {
-        lower
-    } else if v > upper {
-        upper
-    } else {
-        v
-    }
-}
-
-// The stdlib min function is slower than a simple if statement for some reason.
-pub fn fast_minf32(a: f32, b: f32) -> f32 {
-    if a < b {
-        a
-    } else {
-        b
-    }
-}
-
-// The stdlib max function is slower than a simple if statement for some reason.
-pub fn fast_maxf32(a: f32, b: f32) -> f32 {
-    if a > b {
-        a
-    } else {
-        b
-    }
-}
-
-/// Rounds an integer up to the next power of two.
-pub fn upper_power_of_two(mut v: u32) -> u32 {
-    v -= 1;
-    v |= v >> 1;
-    v |= v >> 2;
-    v |= v >> 4;
-    v |= v >> 8;
-    v |= v >> 16;
-    v + 1
-}
-
 /// Gets the log base 2 of the given integer
-pub fn log2_64(mut value: u64) -> u64 {
-    // This works by doing a binary search for the largest non-zero binary
-    // digit in the number.  Its bit position is then the log2 of the integer.
+pub fn log2_64(n: u64) -> u64 {
+    // This works by finding the largest non-zero binary digit in the
+    // number.  Its bit position is then the log2 of the integer.
 
-    let mut log = 0;
-
-    const POWERS: [(u64, u64); 6] = [
-        (32, (1 << 32) - 1),
-        (16, (1 << 16) - 1),
-        (8, (1 << 8) - 1),
-        (4, (1 << 4) - 1),
-        (2, (1 << 2) - 1),
-        (1, (1 << 1) - 1),
-    ];
-
-    for &(i, j) in &POWERS {
-        let tmp = value >> i;
-        if tmp != 0 {
-            log += i;
-            value = tmp;
-        } else {
-            value &= j;
-        }
+    if n == 0 {
+        0
+    } else {
+        (63 - n.leading_zeros()) as u64
     }
-
-    log
 }
 
 /// Creates a coordinate system from a single vector.
diff --git a/src/renderer.rs b/src/renderer.rs
index 9cbd1e6..8739e02 100644
--- a/src/renderer.rs
+++ b/src/renderer.rs
@@ -18,7 +18,7 @@ use crate::{
     hash::hash_u32,
     hilbert,
     image::Image,
-    math::{probit, upper_power_of_two},
+    math::probit,
     mis::power_heuristic,
     ray::{Ray, RayBatch},
     scene::{Scene, SceneLightSample},
@@ -151,7 +151,7 @@ impl<'a> Renderer<'a> {
                 let bucket_count_x = ((width / bucket_w) + 1) as u32;
                 let bucket_count_y = ((height / bucket_h) + 1) as u32;
                 let larger = cmp::max(bucket_count_x, bucket_count_y);
-                let pow2 = upper_power_of_two(larger);
+                let pow2 = larger.next_power_of_two();
                 pow2 * pow2
             };
             for hilbert_d in 0..bucket_n {
diff --git a/src/shading/surface_closure.rs b/src/shading/surface_closure.rs
index b1f3741..a87ffdf 100644
--- a/src/shading/surface_closure.rs
+++ b/src/shading/surface_closure.rs
@@ -7,7 +7,7 @@ use glam::Vec4;
 use crate::{
     color::{Color, SpectralSample},
     lerp::{lerp, Lerp},
-    math::{clamp, dot, zup_to_vec, Normal, Vector},
+    math::{dot, zup_to_vec, Normal, Vector},
     sampling::cosine_sample_hemisphere,
 };
 
@@ -481,11 +481,11 @@ mod ggx_closure {
         }
 
         // Calculate needed dot products
-        let na = clamp(dot(nn, aa), -1.0, 1.0);
-        let nb = clamp(dot(nn, bb), -1.0, 1.0);
-        let ha = clamp(dot(hh, aa), -1.0, 1.0);
-        let hb = clamp(dot(hh, bb), -1.0, 1.0);
-        let nh = clamp(dot(nn, hh), -1.0, 1.0);
+        let na = dot(nn, aa).clamp(-1.0, 1.0);
+        let nb = dot(nn, bb).clamp(-1.0, 1.0);
+        let ha = dot(hh, aa).clamp(-1.0, 1.0);
+        let hb = dot(hh, bb).clamp(-1.0, 1.0);
+        let nh = dot(nn, hh).clamp(-1.0, 1.0);
 
         // Calculate F - Fresnel
         let col_f = {
@@ -584,7 +584,7 @@ mod ggx_closure {
         // Approximate method
         let theta = cos_theta_max.acos();
         let hh = (aa + bb).normalized();
-        let nh = clamp(dot(nn, hh), -1.0, 1.0);
+        let nh = dot(nn, hh).clamp(-1.0, 1.0);
         let fac = ggx_d(nh, (1.0f32).min(roughness.sqrt() + (2.0 * theta / PI_32)));
 
         fac * (1.0f32).min(1.0 - cos_theta_max) * INV_PI