Simpler way to implement screen-space blue-noise sampling.

We now do the index scrambling at the top of the sampling loop, which is also faster since we only have to run it once per pixel instead of once per sample.
2022-07-20 18:54:38 -07:00 · 2022-07-20 18:54:38 -07:00 · 83b48f722d
commit 83b48f722d
parent 86814dbf8f
6 changed files with 129 additions and 67 deletions
--- a/src/hilbert.rs
+++ b/src/hilbert.rs
@ -20,7 +20,7 @@ fn hil_rot(n: u32, rx: u32, ry: u32, x: &mut u32, y: &mut u32) {
 /// y: The y coordinate.  Must be a positive integer no greater than 2^16-1.
 ///
 /// Returns the hilbert curve index corresponding to the (x,y) coordinates given.
-pub fn xy2d(x: u32, y: u32) -> u32 {
+pub fn xy2i(x: u32, y: u32) -> u32 {
    assert!(x < N);
    assert!(y < N);

@ -44,7 +44,7 @@ pub fn xy2d(x: u32, y: u32) -> u32 {
 /// d: The hilbert curve index.
 ///
 /// Returns the (x, y) coords at the given index.
-pub fn d2xy(d: u32) -> (u32, u32) {
+pub fn i2xy(d: u32) -> (u32, u32) {
    let (mut x, mut y) = (0, 0);
    let mut s = 1;
    let mut t = d;
@ -69,8 +69,8 @@ mod tests {
    #[test]
    fn reversible() {
        let d = 54;
-        let (x, y) = d2xy(d);
-        let d2 = xy2d(x, y);
+        let (x, y) = i2xy(d);
+        let d2 = xy2i(x, y);

        assert_eq!(d, d2);
    }
--- a/src/lerp.rs
+++ b/src/lerp.rs
@ -213,20 +213,18 @@ mod tests {

    #[test]
    fn lerp_matrix() {
-        let a = Transform::new_from_values(
-            0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
-        );
-        let b = Transform::new_from_values(
+        let a = Xform::new(0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
+        let b = Xform::new(
            -1.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
        );

-        let c1 = Transform::new_from_values(
+        let c1 = Xform::new(
            -0.25, 1.75, 2.25, 3.25, 4.25, 5.25, 6.25, 7.25, 8.25, 9.25, 10.25, 11.25,
        );
-        let c2 = Transform::new_from_values(
+        let c2 = Xform::new(
            -0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5,
        );
-        let c3 = Transform::new_from_values(
+        let c3 = Xform::new(
            -0.75, 1.25, 2.75, 3.75, 4.75, 5.75, 6.75, 7.75, 8.75, 9.75, 10.75, 11.75,
        );

--- a/src/main.rs
+++ b/src/main.rs
@ -29,11 +29,13 @@ mod lerp;
 mod light;
 mod math;
 mod mis;
+mod morton;
 mod parse;
 mod ray;
 mod renderer;
 mod sampling;
 mod scene;
+mod scramble;
 mod shading;
 mod surface;
 mod timer;
--- a/src/morton.rs
+++ b/src/morton.rs
@ -0,0 +1,81 @@
+#![allow(dead_code)]
+
+const N: u32 = 1 << 16;
+
+#[inline(always)]
+fn part_1_by_1(mut x: u32) -> u32 {
+    x &= 0x0000ffff;
+    x = (x ^ (x << 8)) & 0x00ff00ff;
+    x = (x ^ (x << 4)) & 0x0f0f0f0f;
+    x = (x ^ (x << 2)) & 0x33333333;
+    x = (x ^ (x << 1)) & 0x55555555;
+    x
+}
+
+#[inline(always)]
+fn part_1_by_2(mut x: u32) -> u32 {
+    x &= 0x000003ff;
+    x = (x ^ (x << 16)) & 0xff0000ff;
+    x = (x ^ (x << 8)) & 0x0300f00f;
+    x = (x ^ (x << 4)) & 0x030c30c3;
+    x = (x ^ (x << 2)) & 0x09249249;
+    x
+}
+
+#[inline(always)]
+fn compact_1_by_1(mut x: u32) -> u32 {
+    x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
+    x = (x ^ (x >> 1)) & 0x33333333;
+    x = (x ^ (x >> 2)) & 0x0f0f0f0f;
+    x = (x ^ (x >> 4)) & 0x00ff00ff;
+    x = (x ^ (x >> 8)) & 0x0000ffff;
+    x
+}
+
+#[inline(always)]
+fn compact_1_by_2(mut x: u32) -> u32 {
+    x &= 0x09249249;
+    x = (x ^ (x >> 2)) & 0x030c30c3;
+    x = (x ^ (x >> 4)) & 0x0300f00f;
+    x = (x ^ (x >> 8)) & 0xff0000ff;
+    x = (x ^ (x >> 16)) & 0x000003ff;
+    x
+}
+
+/// Convert (x,y) to morton curve index.
+///
+/// x: The x coordinate.  Must be a positive integer no greater than 2^16-1.
+/// y: The y coordinate.  Must be a positive integer no greater than 2^16-1.
+///
+/// Returns the morton curve index corresponding to the (x,y) coordinates given.
+pub fn xy2i(x: u32, y: u32) -> u32 {
+    debug_assert!(x < N);
+    debug_assert!(y < N);
+
+    part_1_by_1(x) | (part_1_by_1(y) << 1)
+}
+
+/// Convert morton curve index to (x,y).
+///
+/// i: The morton curve index.
+///
+/// Returns the (x, y) coords at the given index.
+pub fn i2xy(i: u32) -> (u32, u32) {
+    (compact_1_by_1(i), compact_1_by_1(i >> 1))
+}
+
+// TODO: 3D morton curves.
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn reversible() {
+        let d = 54;
+        let (x, y) = i2xy(d);
+        let d2 = xy2i(x, y);
+
+        assert_eq!(d, d2);
+    }
+}
--- a/src/renderer.rs
+++ b/src/renderer.rs
@ -13,7 +13,6 @@ use crate::{
    accel::ACCEL_NODE_RAY_TESTS,
    color::{map_0_1_to_wavelength, SpectralSample, XYZ},
    fp_utils::robust_ray_origin,
-    hash::hash_u32,
    hilbert,
    image::Image,
    math::{probit, upper_power_of_two, Float4},
@ -153,7 +152,7 @@ impl<'a> Renderer<'a> {
                pow2 * pow2
            };
            for hilbert_d in 0..bucket_n {
-                let (bx, by) = hilbert::d2xy(hilbert_d);
+                let (bx, by) = hilbert::i2xy(hilbert_d);

                let x = bx as usize * bucket_w;
                let y = by as usize * bucket_h;
@ -238,13 +237,24 @@ impl<'a> Renderer<'a> {
            // Generate light paths and initial rays
            for y in bucket.y..(bucket.y + bucket.h) {
                for x in bucket.x..(bucket.x + bucket.w) {
+                    // `si_offset` is for screen-space blue-noise sampling in the
+                    // spirit of the paper "Screen-Space Blue-Noise Diffusion of Monte
+                    // Carlo Sampling Error via Hierarchical Ordering of Pixels" by
+                    // Ahmed et al.
+                    //
+                    // This works with any sobol or sobol-like sampler.  But also happens
+                    // to work well with golden-ratio sampling.  Since we only use sobol
+                    // and golden ratio sampling, we do this up-front here rather than in
+                    // the samplers themselves.
+                    let si_offset = crate::scramble::owen(hilbert::xy2i(x, y), self.seed)
+                        .wrapping_mul(self.spp as u32);
                    for si in 0..self.spp {
+                        let si = (si as u32).wrapping_add(si_offset);
+
                        // Raw sample numbers.
-                        let d0 = golden_ratio_sample(si as u32, (x, y), self.seed, self.spp as u32);
-                        let (d1, d2, d3, d4) =
-                            get_sample_4d(si as u32, 0, (x, y), self.seed, self.spp as u32);
-                        let (d5, _, _, _) =
-                            get_sample_4d(si as u32, 1, (x, y), self.seed, self.spp as u32);
+                        let d0 = golden_ratio_sample(si);
+                        let (d1, d2, d3, d4) = get_sample_4d(si, 0, self.seed);
+                        let (d5, _, _, _) = get_sample_4d(si, 1, self.seed);

                        // Calculate image plane x and y coordinates
                        let (img_x, img_y) = {
@ -265,7 +275,6 @@ impl<'a> Renderer<'a> {
                            d1,
                            map_0_1_to_wavelength(d0),
                            si as u32,
-                            self.spp as u32,
                        );
                        paths.push(path);
                        rays.push(ray, false);
@ -384,8 +393,6 @@ pub struct LightPath {
    light_attenuation: Float4,
    pending_color_addition: Float4,
    color: Float4,
-
-    max_samples: u32,
 }

 #[allow(clippy::new_ret_no_self)]
@ -399,7 +406,6 @@ impl LightPath {
        time: f32,
        wavelength: f32,
        sample_number: u32,
-        max_samples: u32,
    ) -> (LightPath, Ray) {
        (
            LightPath {
@ -420,8 +426,6 @@ impl LightPath {
                light_attenuation: Float4::splat(1.0),
                pending_color_addition: Float4::splat(0.0),
                color: Float4::splat(0.0),
-
-                max_samples: max_samples,
            },
            scene.camera.generate_ray(
                image_plane_co.0,
@ -442,13 +446,7 @@ impl LightPath {
    fn next_lds_samp(&mut self) -> (f32, f32, f32, f32) {
        let dimension = self.dim_offset;
        self.dim_offset += 1;
-        get_sample_4d(
-            self.sample_number,
-            dimension,
-            self.pixel_co,
-            self.sampling_seed,
-            self.max_samples,
-        )
+        get_sample_4d(self.sample_number, dimension, self.sampling_seed)
    }

    fn next(
@ -700,27 +698,12 @@ impl LightPath {
 /// and switching to random samples at higher dimensions where
 /// LDS samples aren't available.
 #[inline(always)]
-fn get_sample_4d(
-    i: u32,
-    dimension_set: u32,
-    pixel_co: (u32, u32),
-    seed: u32,
-    max_samples: u32,
-) -> (f32, f32, f32, f32) {
+fn get_sample_4d(i: u32, dimension_set: u32, seed: u32) -> (f32, f32, f32, f32) {
    match dimension_set {
        ds if ds < sobol_burley::NUM_DIMENSION_SETS_4D as u32 => {
            // Sobol sampling.
            //
-            // The `offset` is for screen-space blue noise distribution in the
-            // spirit of the paper "Screen-Space Blue-Noise Diffusion of Monte
-            // Carlo Sampling Error via Hierarchical Ordering of Pixels" by
-            // Ahmed et al.
-            // `sample_4d()` already does sample shuffling internally in a way
-            // consistent with the paper, so we only have to arrange the pixels
-            // according to a space filling curve to achieve the results from
-            // the paper.
-            let offset = hilbert::xy2d(pixel_co.0, pixel_co.1) * max_samples;
-            let n4 = sobol_burley::sample_4d(offset + i, ds, seed);
+            let n4 = sobol_burley::sample_4d(i, ds, seed);
            (n4[0], n4[1], n4[2], n4[3])
        }
        ds => {
@ -742,25 +725,9 @@ fn get_sample_4d(
 // due to the nature of hero wavelength sampling this ends up
 // being crazily more efficient than pretty much any other sampler,
 // and reduces variance by a huge amount.
-fn golden_ratio_sample(i: u32, pixel_co: (u32, u32), seed: u32, max_samples: u32) -> f32 {
-    use sobol_burley::parts::{owen_scramble_rev, u32_to_f32_norm};
-
-    // Turns out the screen-space blue-noise diffusion trick mentioned
-    // in `get_sample_4d()` works reasonably well on golden ratio
-    // sampling as well!  But we actually have to do the shuffling
-    // ourselves here.
-    let offset = owen_scramble_rev(
-        hilbert::xy2d(pixel_co.0, pixel_co.1).reverse_bits(),
-        hash_u32(seed, 6543266),
-    )
-    .reverse_bits()
-        * max_samples;
-    let i = i.wrapping_add(offset);
-
-    // The actual golden ratio sampling computation.
-    let n = i.wrapping_mul(2654435769);
-
-    u32_to_f32_norm(n)
+fn golden_ratio_sample(i: u32) -> f32 {
+    use sobol_burley::parts::u32_to_f32_norm;
+    u32_to_f32_norm(i.wrapping_mul(2654435769))
 }

 #[derive(Debug)]
--- a/src/scramble.rs
+++ b/src/scramble.rs
@ -0,0 +1,14 @@
+use crate::hash::hash_u32;
+
+/// Performs a base-2 Owen scramble on an integer.
+pub fn owen(n: u32, seed: u32) -> u32 {
+    let mut result = n;
+
+    for b in 0..31 {
+        let mask = (!0) << (b + 1);
+        result ^= hash_u32(n & mask, seed) & (1 << b);
+    }
+    result ^= hash_u32(0, seed) & (1 << 31); // Handle highest bit.
+
+    result
+}