From 45241784fb01a87f4823e860813ad6e649e32dfd Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 30 Apr 2020 22:41:16 +0900 Subject: [PATCH] Code tidying on the Sobol sampler. Also swapped the sample index and dimension paramater in the function signature. This feels more intuitive. --- src/renderer.rs | 12 ++-- sub_crates/halton/build.rs | 2 +- sub_crates/sobol/build.rs | 12 +--- sub_crates/sobol/src/lib.rs | 128 +++++++++++++++--------------------- 4 files changed, 63 insertions(+), 91 deletions(-) diff --git a/src/renderer.rs b/src/renderer.rs index be045ef..3f6fbb4 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -242,8 +242,8 @@ impl<'a> Renderer<'a> { for x in bucket.x..(bucket.x + bucket.w) { for si in 0..self.spp { // Raw sample numbers. - let (d0, d1, d2, d3) = get_sample_4d(0, si as u32, (x, y), self.seed); - let (d4, _, _, _) = get_sample_4d(1, si as u32, (x, y), self.seed); + let (d0, d1, d2, d3) = get_sample_4d(si as u32, 0, (x, y), self.seed); + let (d4, _, _, _) = get_sample_4d(si as u32, 1, (x, y), self.seed); // Calculate image plane x and y coordinates let (img_x, img_y) = { @@ -439,8 +439,8 @@ impl LightPath { let dimension = self.dim_offset; self.dim_offset += 1; get_sample_4d( - dimension, self.sample_number, + dimension, self.pixel_co, self.sampling_seed, ) @@ -696,8 +696,8 @@ impl LightPath { /// LDS samples aren't available. #[inline(always)] fn get_sample_4d( - dimension_set: u32, i: u32, + dimension_set: u32, pixel_co: (u32, u32), seed: u32, ) -> (f32, f32, f32, f32) { @@ -706,9 +706,9 @@ fn get_sample_4d( let seed = pixel_co.0 ^ (pixel_co.1 << 16) ^ seed.wrapping_mul(0x736caf6f); match dimension_set { - ds if ds < sobol::MAX_DIMENSION as u32 => { + ds if ds < sobol::MAX_DIMENSION_SET as u32 => { // Sobol sampling. - let n4 = sobol::sample_4d(ds, i, seed); + let n4 = sobol::sample_4d(i, ds, seed); (n4[0], n4[1], n4[2], n4[3]) } ds => { diff --git a/sub_crates/halton/build.rs b/sub_crates/halton/build.rs index 51b8793..cebf92e 100644 --- a/sub_crates/halton/build.rs +++ b/sub_crates/halton/build.rs @@ -98,7 +98,7 @@ pub const MAX_DIMENSION: u32 = {}; f.write_all( format!( r#" -pub fn sample(dimension: u32, index: u32) -> f32 {{ +pub fn sample(index: u32, dimension: u32) -> f32 {{ let mut index = index; match dimension {{"# diff --git a/sub_crates/sobol/build.rs b/sub_crates/sobol/build.rs index afa0499..6d04271 100644 --- a/sub_crates/sobol/build.rs +++ b/sub_crates/sobol/build.rs @@ -18,7 +18,7 @@ fn main() { let vectors = generate_direction_vectors(NUM_DIMENSIONS); // Write dimensions limit. - f.write_all(format!("pub const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes()) + f.write_all(format!("const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes()) .unwrap(); // Write the vectors. @@ -26,14 +26,8 @@ fn main() { // uses them. First, we interleave the numbers of each set of four // dimensions, for SIMD evaluation. Second, each number is written // with reversed bits, to avoid needing to reverse them before scrambling. - f.write_all( - format!( - "pub const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", - SOBOL_BITS - ) - .as_bytes(), - ) - .unwrap(); + f.write_all(format!("const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", SOBOL_BITS).as_bytes()) + .unwrap(); for d4 in vectors.chunks_exact(4) { f.write_all(" [\n".as_bytes()).unwrap(); for ((a, b), (c, d)) in d4[0] diff --git a/sub_crates/sobol/src/lib.rs b/sub_crates/sobol/src/lib.rs index 59b0958..06a6e53 100644 --- a/sub_crates/sobol/src/lib.rs +++ b/sub_crates/sobol/src/lib.rs @@ -1,79 +1,74 @@ //! An implementation of the Sobol sequence with Owen scrambling. +//! +//! This implementation also allows seeding to create multiple statistically +//! independent Sobol sequences, using a technique due to Brent Burley. This +//! is useful for any situation where you want to decorrelate sampling. +//! +//! This implementation is SIMD accelerated with SSE 4.1 on x86-64 platforms. mod wide; use wide::Int4; -// The following `include` provides `MAX_DIMENSION` and `REV_VECTORS`. +// This `include` provides `MAX_DIMENSION` and `REV_VECTORS`. // See the build.rs file for how this included file is generated. include!(concat!(env!("OUT_DIR"), "/vectors.inc")); -/// Compute one component of one sample from the Sobol sequence, where -/// `dimension` specifies the component and `index` specifies the sample -/// within the sequence. +pub const MAX_DIMENSION_SET: u32 = MAX_DIMENSION / 4; + +/// Compute four dimensions of a sample from the Owen-scrambled Sobol sequence. /// -/// Passing a different `seed` parameter results in a statistically -/// independent Sobol sequence, uncorrelated to others with different seeds. +/// `sample_index` specifies which sample in the Sobol sequence to compute +/// the dimensions for. This implementation produces up to 2^16 samples total, +/// and will loop back to the start of the sequence after that. /// -/// Note: generates a maximum of 2^16 samples per dimension. If the `index` -/// parameter exceeds 2^16-1, the sample set will start repeating. +/// `dimension_set` specifies which four dimensions to compute: +/// * 0 = dimensions 0-3 +/// * 1 = dimensions 4-7 +/// * 2 = dimensions 8-11 +/// * And so on. +/// +/// Passing a different `seed` will produce a completely statistically +/// independent Sobol sequence, with no stratification with or correlation to +/// sequences with other seeds. #[inline] -pub fn sample(dimension: u32, index: u32, seed: u32) -> f32 { - sample_4d(dimension >> 2, index, seed)[(dimension & 0b11) as usize] -} - -/// Same as `sample()` but calculates a set of 4 dimensions all at once -/// using SIMD. -#[inline] -pub fn sample_4d(dimension_set: u32, index: u32, seed: u32) -> [f32; 4] { - // This index shuffling approach is due to Brent Burley, and is - // what allows us to create statistically independent Sobol sequences. - let shuffled_rev_index = lk_scramble(index.reverse_bits(), seed); - - let sobol = lk_int4_scramble( - sobol_int4_rev(dimension_set, shuffled_rev_index), - dimension_set ^ seed, - ) - .reverse_bits(); - - sobol.to_norm_floats() -} - -//---------------------------------------------------------------------- - -/// The core Sobol samplng code. Used by the other functions. -/// -/// This actually produces the Sobol sequence with reversed bits, and takes -/// the index with reversed bits. This is because the related scrambling -/// code works on reversed bits, so this avoids repeated reversing/unreversing, -/// keeping everything in reversed bits until the final step. -/// -/// Note: if the `index` parameter exceeds 2^16-1, the sample set will start -/// repeating. -#[inline(always)] -fn sobol_int4_rev(dimension_set: u32, index: u32) -> Int4 { - assert!(dimension_set < (MAX_DIMENSION / 4)); +pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] { + assert!(dimension_set < MAX_DIMENSION_SET); let vecs = &REV_VECTORS[dimension_set as usize]; - let mut index = (index >> 16) as u16; - let mut result = Int4::zero(); + // Shuffle the index using the given seed to produce a unique statistically + // independent Sobol sequence. This index shuffling approach is due to + // Brent Burley. + let shuffled_rev_index = lk_scramble(sample_index.reverse_bits(), seed); + + // Compute the Sobol sample with reversed bits. + let mut sobol_rev = Int4::zero(); + let mut index = shuffled_rev_index & 0xffff0000; // Only use the top 16 bits. let mut i = 0; while index != 0 { let j = index.leading_zeros(); - result ^= vecs[(i + j) as usize].into(); + sobol_rev ^= vecs[(i + j) as usize].into(); i += j + 1; index <<= j; index <<= 1; } - result + // Do Owen scrambling on the reversed-bits Sobol sample. + let sobol_owen_rev = lk_scramble_int4(sobol_rev, dimension_set ^ seed); + + // Un-reverse the bits and convert to floating point in [0, 1). + sobol_owen_rev.reverse_bits().to_norm_floats() } +//---------------------------------------------------------------------- + /// Scrambles `n` using the Laine Karras hash. This is equivalent to Owen /// scrambling, but on reversed bits. #[inline] fn lk_scramble(mut n: u32, scramble: u32) -> u32 { - // This uses the technique presented in the paper "Stratified Sampling for - // Stochastic Transparency" by Laine and Karras to scramble the bits. + // This uses essentially the same technique as presented in the paper + // "Stratified Sampling for Stochastic Transparency" by Laine and Karras, + // but with a faster, higher quality hash function. + // // The basic idea is that we're running a special kind of hash function // that only allows avalanche to happen upwards (i.e. a bit is only // affected by the bits lower than it). This is achieved by only doing @@ -95,11 +90,10 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 { n } -/// Same as `lk_scramble()`, except does it on 4 integers at a time -/// with SIMD. +/// Same as `lk_scramble()`, except does it on 4 integers at a time. #[inline(always)] -fn lk_int4_scramble(mut n: Int4, scramble: u32) -> Int4 { - n += hash_4d([scramble; 4].into(), 2); +fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 { + n += hash_int4([scramble; 4].into(), 2); n ^= [0xdc967795; 4].into(); n *= [0x97b754b7; 4].into(); @@ -109,29 +103,11 @@ fn lk_int4_scramble(mut n: Int4, scramble: u32) -> Int4 { n } -/// Same as `lk_scramble()` except uses a slower more full version of -/// hashing. -/// -/// This is mainly intended to help validate the faster scrambling function, -/// and likely shouldn't be used for real things. It is significantly -/// slower. -#[allow(dead_code)] -#[inline] -fn lk_scramble_slow(mut n: u32, scramble: u32) -> u32 { - n = n.wrapping_add(hash(scramble, 3)); - for i in 0..31 { - let low_mask = (1u32 << i).wrapping_sub(1); - let low_bits_hash = hash((n & low_mask) ^ hash(i, 3), 3); - n ^= low_bits_hash & !low_mask; - } - n -} - /// A simple 32-bit hash function. Its quality can be tuned with /// the number of rounds used. #[inline(always)] fn hash(n: u32, rounds: u32) -> u32 { - let mut hash = n ^ 0x912f69ba; + let mut hash = n ^ 0x79c68e4a; for _ in 0..rounds { hash = hash.wrapping_mul(0x736caf6f); hash ^= hash.wrapping_shr(16); @@ -139,10 +115,12 @@ fn hash(n: u32, rounds: u32) -> u32 { hash } -/// A simple 32-bit hash function. Its quality can be tuned with -/// the number of rounds used. +/// Same as `hash()` except performs hashing on four numbers at once. +/// +/// Each of the four numbers gets a different hash, so even if all input +/// numbers are the same, the outputs will still be different for each of them. #[inline(always)] -fn hash_4d(n: Int4, rounds: u32) -> Int4 { +fn hash_int4(n: Int4, rounds: u32) -> Int4 { let mut hash = n; hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into(); for _ in 0..rounds {