From aecff883abf344173ee15558e63579e688c4cea0 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 22 Apr 2020 16:21:50 +0900 Subject: [PATCH] Misc optimizations on the Sobol sampler. The biggest one is avoiding a bunch of bit reversals by keeping numbers in bit-reversed form for as long as we can. Also reduced the hashing rounds: just 2 rounds seems to be enough for a reasonable amount of statistical independence on both the scrambling and shuffling. I tested both independently, keeping the other with no scrambling/shuffling respectively. This makes sense because in normal contexts 3 is enough, but in this case both act as input to yet another hash which is effectively doing more rounds. --- sub_crates/sobol/build.rs | 6 ++- sub_crates/sobol/src/lib.rs | 89 ++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/sub_crates/sobol/build.rs b/sub_crates/sobol/build.rs index 8eabc12..db53598 100644 --- a/sub_crates/sobol/build.rs +++ b/sub_crates/sobol/build.rs @@ -22,12 +22,14 @@ fn main() { .unwrap(); // Write the vectors. - f.write_all(format!("pub const VECTORS: &[[u{0}; {0}]] = &[\n", SOBOL_BITS).as_bytes()) + // We actually write them with reversed bits due to how the library uses + // them, which is atypical. + f.write_all(format!("pub const REV_VECTORS: &[[u{0}; {0}]] = &[\n", SOBOL_BITS).as_bytes()) .unwrap(); for v in vectors.iter() { f.write_all(" [\n".as_bytes()).unwrap(); for n in v.iter() { - f.write_all(format!(" 0x{:08x},\n", *n).as_bytes()) + f.write_all(format!(" 0x{:08x},\n", n.reverse_bits()).as_bytes()) .unwrap(); } f.write_all(" ],\n".as_bytes()).unwrap(); diff --git a/sub_crates/sobol/src/lib.rs b/sub_crates/sobol/src/lib.rs index 0a95281..0c33a8f 100644 --- a/sub_crates/sobol/src/lib.rs +++ b/sub_crates/sobol/src/lib.rs @@ -1,110 +1,109 @@ //! An implementation of the Sobol sequence with Owen scrambling. -// The following `include` provides `MAX_DIMENSION` and `VECTORS`. +// The following `include` provides `MAX_DIMENSION` and `REV_VECTORS`. // See the build.rs file for how this included file is generated. include!(concat!(env!("OUT_DIR"), "/vectors.inc")); -/// Compute one component of one sample from the Sobol'-sequence, where +/// Compute one component of one sample from the Sobol sequence, where /// `dimension` specifies the component and `index` specifies the sample /// within the sequence. /// -/// A different `seed` parameter results in a statistically independent Sobol -/// sequence, uncorrelated to others with different seeds. +/// Passing a different `seed` parameter results in a statistically +/// independent Sobol sequence, uncorrelated to others with different seeds. /// /// Note: generates a maximum of 2^16 samples per dimension. If the `index` /// parameter exceeds 2^16-1, the sample set will start repeating. #[inline] pub fn sample(dimension: u32, index: u32, seed: u32) -> f32 { - let shuffled_index = owen_scramble(index, hash(seed)); - let scramble = hash(dimension ^ seed); - u32_to_0_1_f32(owen_scramble( - sobol_u32(dimension, shuffled_index), - scramble, - )) + let shuffled_rev_index = lk_scramble(index.reverse_bits(), hash(seed, 2)); + let scramble = hash(dimension ^ seed, 2); + let sobol = lk_scramble(sobol_u32_rev(dimension, shuffled_rev_index), scramble).reverse_bits(); + u32_to_0_1_f32(sobol) } //---------------------------------------------------------------------- -/// The actual core Sobol samplng code. Used by the other functions. +/// The core Sobol samplng code. Used by the other functions. +/// +/// This actually produces the Sobol sequence with reversed bits, and takes +/// the index with reversed bits. This is because the related scrambling +/// code works on reversed bits, so this avoids repeated reversing/unreversing, +/// keeping everything in reversed bits until the final step. /// /// Note: if the `index` parameter exceeds 2^16-1, the sample set will start /// repeating. #[inline(always)] -fn sobol_u32(dimension: u32, index: u32) -> u32 { +fn sobol_u32_rev(dimension: u32, index: u32) -> u32 { assert!(dimension < MAX_DIMENSION); - let vecs = &VECTORS[dimension as usize]; - let mut index = index as u16; + let vecs = &REV_VECTORS[dimension as usize]; + let mut index = (index >> 16) as u16; let mut result = 0; let mut i = 0; while index != 0 { - let j = index.trailing_zeros(); + let j = index.leading_zeros(); result ^= vecs[(i + j) as usize]; i += j + 1; - index >>= j; - index >>= 1; + index <<= j; + index <<= 1; } - (result as u32) << 16 + result as u32 } -/// Scrambles `n` using Owen scrambling and the given scramble parameter. +/// Scrambles `n` using the Laine Karras hash. This is equivalent to Owen +/// scrambling, but on reversed bits. #[inline(always)] -fn owen_scramble(mut n: u32, scramble: u32) -> u32 { +fn lk_scramble(mut n: u32, scramble: u32) -> u32 { // This uses the technique presented in the paper "Stratified Sampling for - // Stochastic Transparency" by Laine and Karras. + // Stochastic Transparency" by Laine and Karras to scramble the bits. // The basic idea is that we're running a special kind of hash function - // that only allows avalanche to happen downwards (i.e. a bit is only - // affected by the bits higher than it). This is achieved by first - // reversing the bits and then doing mixing via multiplication by even - // numbers. + // that only allows avalanche to happen upwards (i.e. a bit is only + // affected by the bits lower than it). This is achieved by only doing + // mixing via operations that also adhere to that property, such as + // multiplication by even numbers. // // Normally this would be considered a poor hash function, because normally // you want all bits to have an equal chance of affecting all other bits. - // But in this case that only-downward behavior is exactly what we want, - // because it ends up being equivalent to Owen scrambling. - // - // Note that the application of the scramble parameter here via addition - // does not invalidate the Owen scramble as long as it is done after the - // bit the reversal. + // But in this case that only-upward behavior is exactly what we want, + // because it ends up being equivalent to Owen scrambling on + // reverse-ordered bits. // // The permutation constants here were selected through an optimization // process to maximize low-bias avalanche between bits. const PERMS: [u32; 3] = [0x97b756bc, 0x4b0a8a12, 0x75c77e36]; - n = n.reverse_bits(); n = n.wrapping_add(scramble); for &p in PERMS.iter() { n ^= n.wrapping_mul(p); } - n = n.reverse_bits(); - - // Return the scrambled value. n } -/// Same as `owen_scramble()` except uses a slower more full version of -/// Owen scrambling. +/// Same as `lk_scramble()` except uses a slower more full version of +/// hashing. /// -/// This is mainly intended to help validate the faster Owen scrambling, +/// This is mainly intended to help validate the faster scrambling function, /// and likely shouldn't be used for real things. It is significantly /// slower. #[allow(dead_code)] #[inline] -fn owen_scramble_slow(mut n: u32, scramble: u32) -> u32 { - n = n.reverse_bits().wrapping_add(scramble).reverse_bits(); +fn lk_scramble_slow(mut n: u32, scramble: u32) -> u32 { + n = n.wrapping_add(scramble); for i in 0..31 { - let mask = (1 << (31 - i)) - 1; - let high_bits_hash = hash((n & (!mask)) ^ hash(i)); - n ^= high_bits_hash & mask; + let low_mask = (1u32 << i).wrapping_sub(1); + let low_bits_hash = hash((n & low_mask) ^ hash(i, 3), 3); + n ^= low_bits_hash & !low_mask; } n } +/// A simple 32-bit hash function. Its quality can be tuned with +/// the number of rounds used. #[inline(always)] -fn hash(n: u32) -> u32 { +fn hash(n: u32, rounds: u32) -> u32 { let mut hash = n ^ 0x912f69ba; - for _ in 0..3 { + for _ in 0..rounds { hash = hash.wrapping_mul(0x736caf6f); hash ^= hash.wrapping_shr(16); }