From 45241784fb01a87f4823e860813ad6e649e32dfd Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Thu, 30 Apr 2020 22:41:16 +0900
Subject: [PATCH] Code tidying on the Sobol sampler.

Also swapped the sample index and dimension paramater in the function
signature.  This feels more intuitive.
---
 src/renderer.rs             |  12 ++--
 sub_crates/halton/build.rs  |   2 +-
 sub_crates/sobol/build.rs   |  12 +---
 sub_crates/sobol/src/lib.rs | 128 +++++++++++++++---------------------
 4 files changed, 63 insertions(+), 91 deletions(-)

diff --git a/src/renderer.rs b/src/renderer.rs
index be045ef..3f6fbb4 100644
--- a/src/renderer.rs
+++ b/src/renderer.rs
@@ -242,8 +242,8 @@ impl<'a> Renderer<'a> {
                 for x in bucket.x..(bucket.x + bucket.w) {
                     for si in 0..self.spp {
                         // Raw sample numbers.
-                        let (d0, d1, d2, d3) = get_sample_4d(0, si as u32, (x, y), self.seed);
-                        let (d4, _, _, _) = get_sample_4d(1, si as u32, (x, y), self.seed);
+                        let (d0, d1, d2, d3) = get_sample_4d(si as u32, 0, (x, y), self.seed);
+                        let (d4, _, _, _) = get_sample_4d(si as u32, 1, (x, y), self.seed);
 
                         // Calculate image plane x and y coordinates
                         let (img_x, img_y) = {
@@ -439,8 +439,8 @@ impl LightPath {
         let dimension = self.dim_offset;
         self.dim_offset += 1;
         get_sample_4d(
-            dimension,
             self.sample_number,
+            dimension,
             self.pixel_co,
             self.sampling_seed,
         )
@@ -696,8 +696,8 @@ impl LightPath {
 /// LDS samples aren't available.
 #[inline(always)]
 fn get_sample_4d(
-    dimension_set: u32,
     i: u32,
+    dimension_set: u32,
     pixel_co: (u32, u32),
     seed: u32,
 ) -> (f32, f32, f32, f32) {
@@ -706,9 +706,9 @@ fn get_sample_4d(
     let seed = pixel_co.0 ^ (pixel_co.1 << 16) ^ seed.wrapping_mul(0x736caf6f);
 
     match dimension_set {
-        ds if ds < sobol::MAX_DIMENSION as u32 => {
+        ds if ds < sobol::MAX_DIMENSION_SET as u32 => {
             // Sobol sampling.
-            let n4 = sobol::sample_4d(ds, i, seed);
+            let n4 = sobol::sample_4d(i, ds, seed);
             (n4[0], n4[1], n4[2], n4[3])
         }
         ds => {
diff --git a/sub_crates/halton/build.rs b/sub_crates/halton/build.rs
index 51b8793..cebf92e 100644
--- a/sub_crates/halton/build.rs
+++ b/sub_crates/halton/build.rs
@@ -98,7 +98,7 @@ pub const MAX_DIMENSION: u32 = {};
     f.write_all(
         format!(
             r#"
-pub fn sample(dimension: u32, index: u32) -> f32 {{
+pub fn sample(index: u32, dimension: u32) -> f32 {{
     let mut index = index;
 
     match dimension {{"#
diff --git a/sub_crates/sobol/build.rs b/sub_crates/sobol/build.rs
index afa0499..6d04271 100644
--- a/sub_crates/sobol/build.rs
+++ b/sub_crates/sobol/build.rs
@@ -18,7 +18,7 @@ fn main() {
     let vectors = generate_direction_vectors(NUM_DIMENSIONS);
 
     // Write dimensions limit.
-    f.write_all(format!("pub const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes())
+    f.write_all(format!("const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes())
         .unwrap();
 
     // Write the vectors.
@@ -26,14 +26,8 @@ fn main() {
     // uses them.  First, we interleave the numbers of each set of four
     // dimensions, for SIMD evaluation.  Second, each number is written
     // with reversed bits, to avoid needing to reverse them before scrambling.
-    f.write_all(
-        format!(
-            "pub const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n",
-            SOBOL_BITS
-        )
-        .as_bytes(),
-    )
-    .unwrap();
+    f.write_all(format!("const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", SOBOL_BITS).as_bytes())
+        .unwrap();
     for d4 in vectors.chunks_exact(4) {
         f.write_all("  [\n".as_bytes()).unwrap();
         for ((a, b), (c, d)) in d4[0]
diff --git a/sub_crates/sobol/src/lib.rs b/sub_crates/sobol/src/lib.rs
index 59b0958..06a6e53 100644
--- a/sub_crates/sobol/src/lib.rs
+++ b/sub_crates/sobol/src/lib.rs
@@ -1,79 +1,74 @@
 //! An implementation of the Sobol sequence with Owen scrambling.
+//!
+//! This implementation also allows seeding to create multiple statistically
+//! independent Sobol sequences, using a technique due to Brent Burley.  This
+//! is useful for any situation where you want to decorrelate sampling.
+//!
+//! This implementation is SIMD accelerated with SSE 4.1 on x86-64 platforms.
 
 mod wide;
 use wide::Int4;
 
-// The following `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
+// This `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
 // See the build.rs file for how this included file is generated.
 include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
 
-/// Compute one component of one sample from the Sobol sequence, where
-/// `dimension` specifies the component and `index` specifies the sample
-/// within the sequence.
+pub const MAX_DIMENSION_SET: u32 = MAX_DIMENSION / 4;
+
+/// Compute four dimensions of a sample from the Owen-scrambled Sobol sequence.
 ///
-/// Passing a different `seed` parameter results in a statistically
-/// independent Sobol sequence, uncorrelated to others with different seeds.
+/// `sample_index` specifies which sample in the Sobol sequence to compute
+/// the dimensions for.  This implementation produces up to 2^16 samples total,
+/// and will loop back to the start of the sequence after that.
 ///
-/// Note: generates a maximum of 2^16 samples per dimension.  If the `index`
-/// parameter exceeds 2^16-1, the sample set will start repeating.
+/// `dimension_set` specifies which four dimensions to compute:
+/// * 0 = dimensions 0-3
+/// * 1 = dimensions 4-7
+/// * 2 = dimensions 8-11
+/// * And so on.
+///
+/// Passing a different `seed` will produce a completely statistically
+/// independent Sobol sequence, with no stratification with or correlation to
+/// sequences with other seeds.
 #[inline]
-pub fn sample(dimension: u32, index: u32, seed: u32) -> f32 {
-    sample_4d(dimension >> 2, index, seed)[(dimension & 0b11) as usize]
-}
-
-/// Same as `sample()` but calculates a set of 4 dimensions all at once
-/// using SIMD.
-#[inline]
-pub fn sample_4d(dimension_set: u32, index: u32, seed: u32) -> [f32; 4] {
-    // This index shuffling approach is due to Brent Burley, and is
-    // what allows us to create statistically independent Sobol sequences.
-    let shuffled_rev_index = lk_scramble(index.reverse_bits(), seed);
-
-    let sobol = lk_int4_scramble(
-        sobol_int4_rev(dimension_set, shuffled_rev_index),
-        dimension_set ^ seed,
-    )
-    .reverse_bits();
-
-    sobol.to_norm_floats()
-}
-
-//----------------------------------------------------------------------
-
-/// The core Sobol samplng code.  Used by the other functions.
-///
-/// This actually produces the Sobol sequence with reversed bits, and takes
-/// the index with reversed bits.  This is because the related scrambling
-/// code works on reversed bits, so this avoids repeated reversing/unreversing,
-/// keeping everything in reversed bits until the final step.
-///
-/// Note: if the `index` parameter exceeds 2^16-1, the sample set will start
-/// repeating.
-#[inline(always)]
-fn sobol_int4_rev(dimension_set: u32, index: u32) -> Int4 {
-    assert!(dimension_set < (MAX_DIMENSION / 4));
+pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
+    assert!(dimension_set < MAX_DIMENSION_SET);
     let vecs = &REV_VECTORS[dimension_set as usize];
-    let mut index = (index >> 16) as u16;
 
-    let mut result = Int4::zero();
+    // Shuffle the index using the given seed to produce a unique statistically
+    // independent Sobol sequence.  This index shuffling approach is due to
+    // Brent Burley.
+    let shuffled_rev_index = lk_scramble(sample_index.reverse_bits(), seed);
+
+    // Compute the Sobol sample with reversed bits.
+    let mut sobol_rev = Int4::zero();
+    let mut index = shuffled_rev_index & 0xffff0000; // Only use the top 16 bits.
     let mut i = 0;
     while index != 0 {
         let j = index.leading_zeros();
-        result ^= vecs[(i + j) as usize].into();
+        sobol_rev ^= vecs[(i + j) as usize].into();
         i += j + 1;
         index <<= j;
         index <<= 1;
     }
 
-    result
+    // Do Owen scrambling on the reversed-bits Sobol sample.
+    let sobol_owen_rev = lk_scramble_int4(sobol_rev, dimension_set ^ seed);
+
+    // Un-reverse the bits and convert to floating point in [0, 1).
+    sobol_owen_rev.reverse_bits().to_norm_floats()
 }
 
+//----------------------------------------------------------------------
+
 /// Scrambles `n` using the Laine Karras hash.  This is equivalent to Owen
 /// scrambling, but on reversed bits.
 #[inline]
 fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
-    // This uses the technique presented in the paper "Stratified Sampling for
-    // Stochastic Transparency" by Laine and Karras to scramble the bits.
+    // This uses essentially the same technique as presented in the paper
+    // "Stratified Sampling for Stochastic Transparency" by Laine and Karras,
+    // but with a faster, higher quality hash function.
+    //
     // The basic idea is that we're running a special kind of hash function
     // that only allows avalanche to happen upwards (i.e. a bit is only
     // affected by the bits lower than it).  This is achieved by only doing
@@ -95,11 +90,10 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
     n
 }
 
-/// Same as `lk_scramble()`, except does it on 4 integers at a time
-/// with SIMD.
+/// Same as `lk_scramble()`, except does it on 4 integers at a time.
 #[inline(always)]
-fn lk_int4_scramble(mut n: Int4, scramble: u32) -> Int4 {
-    n += hash_4d([scramble; 4].into(), 2);
+fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
+    n += hash_int4([scramble; 4].into(), 2);
 
     n ^= [0xdc967795; 4].into();
     n *= [0x97b754b7; 4].into();
@@ -109,29 +103,11 @@ fn lk_int4_scramble(mut n: Int4, scramble: u32) -> Int4 {
     n
 }
 
-/// Same as `lk_scramble()` except uses a slower more full version of
-/// hashing.
-///
-/// This is mainly intended to help validate the faster scrambling function,
-/// and likely shouldn't be used for real things.  It is significantly
-/// slower.
-#[allow(dead_code)]
-#[inline]
-fn lk_scramble_slow(mut n: u32, scramble: u32) -> u32 {
-    n = n.wrapping_add(hash(scramble, 3));
-    for i in 0..31 {
-        let low_mask = (1u32 << i).wrapping_sub(1);
-        let low_bits_hash = hash((n & low_mask) ^ hash(i, 3), 3);
-        n ^= low_bits_hash & !low_mask;
-    }
-    n
-}
-
 /// A simple 32-bit hash function.  Its quality can be tuned with
 /// the number of rounds used.
 #[inline(always)]
 fn hash(n: u32, rounds: u32) -> u32 {
-    let mut hash = n ^ 0x912f69ba;
+    let mut hash = n ^ 0x79c68e4a;
     for _ in 0..rounds {
         hash = hash.wrapping_mul(0x736caf6f);
         hash ^= hash.wrapping_shr(16);
@@ -139,10 +115,12 @@ fn hash(n: u32, rounds: u32) -> u32 {
     hash
 }
 
-/// A simple 32-bit hash function.  Its quality can be tuned with
-/// the number of rounds used.
+/// Same as `hash()` except performs hashing on four numbers at once.
+///
+/// Each of the four numbers gets a different hash, so even if all input
+/// numbers are the same, the outputs will still be different for each of them.
 #[inline(always)]
-fn hash_4d(n: Int4, rounds: u32) -> Int4 {
+fn hash_int4(n: Int4, rounds: u32) -> Int4 {
     let mut hash = n;
     hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
     for _ in 0..rounds {