Misc optimizations on the Sobol sampler.
The biggest one is avoiding a bunch of bit reversals by keeping numbers in bit-reversed form for as long as we can. Also reduced the hashing rounds: just 2 rounds seems to be enough for a reasonable amount of statistical independence on both the scrambling and shuffling. I tested both independently, keeping the other with no scrambling/shuffling respectively. This makes sense because in normal contexts 3 is enough, but in this case both act as input to yet another hash which is effectively doing more rounds.
This commit is contained in:
parent
660576ec2b
commit
aecff883ab
|
@ -22,12 +22,14 @@ fn main() {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Write the vectors.
|
// Write the vectors.
|
||||||
f.write_all(format!("pub const VECTORS: &[[u{0}; {0}]] = &[\n", SOBOL_BITS).as_bytes())
|
// We actually write them with reversed bits due to how the library uses
|
||||||
|
// them, which is atypical.
|
||||||
|
f.write_all(format!("pub const REV_VECTORS: &[[u{0}; {0}]] = &[\n", SOBOL_BITS).as_bytes())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for v in vectors.iter() {
|
for v in vectors.iter() {
|
||||||
f.write_all(" [\n".as_bytes()).unwrap();
|
f.write_all(" [\n".as_bytes()).unwrap();
|
||||||
for n in v.iter() {
|
for n in v.iter() {
|
||||||
f.write_all(format!(" 0x{:08x},\n", *n).as_bytes())
|
f.write_all(format!(" 0x{:08x},\n", n.reverse_bits()).as_bytes())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
f.write_all(" ],\n".as_bytes()).unwrap();
|
f.write_all(" ],\n".as_bytes()).unwrap();
|
||||||
|
|
|
@ -1,110 +1,109 @@
|
||||||
//! An implementation of the Sobol sequence with Owen scrambling.
|
//! An implementation of the Sobol sequence with Owen scrambling.
|
||||||
|
|
||||||
// The following `include` provides `MAX_DIMENSION` and `VECTORS`.
|
// The following `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
|
||||||
// See the build.rs file for how this included file is generated.
|
// See the build.rs file for how this included file is generated.
|
||||||
include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
|
include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
|
||||||
|
|
||||||
/// Compute one component of one sample from the Sobol'-sequence, where
|
/// Compute one component of one sample from the Sobol sequence, where
|
||||||
/// `dimension` specifies the component and `index` specifies the sample
|
/// `dimension` specifies the component and `index` specifies the sample
|
||||||
/// within the sequence.
|
/// within the sequence.
|
||||||
///
|
///
|
||||||
/// A different `seed` parameter results in a statistically independent Sobol
|
/// Passing a different `seed` parameter results in a statistically
|
||||||
/// sequence, uncorrelated to others with different seeds.
|
/// independent Sobol sequence, uncorrelated to others with different seeds.
|
||||||
///
|
///
|
||||||
/// Note: generates a maximum of 2^16 samples per dimension. If the `index`
|
/// Note: generates a maximum of 2^16 samples per dimension. If the `index`
|
||||||
/// parameter exceeds 2^16-1, the sample set will start repeating.
|
/// parameter exceeds 2^16-1, the sample set will start repeating.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sample(dimension: u32, index: u32, seed: u32) -> f32 {
|
pub fn sample(dimension: u32, index: u32, seed: u32) -> f32 {
|
||||||
let shuffled_index = owen_scramble(index, hash(seed));
|
let shuffled_rev_index = lk_scramble(index.reverse_bits(), hash(seed, 2));
|
||||||
let scramble = hash(dimension ^ seed);
|
let scramble = hash(dimension ^ seed, 2);
|
||||||
u32_to_0_1_f32(owen_scramble(
|
let sobol = lk_scramble(sobol_u32_rev(dimension, shuffled_rev_index), scramble).reverse_bits();
|
||||||
sobol_u32(dimension, shuffled_index),
|
u32_to_0_1_f32(sobol)
|
||||||
scramble,
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
/// The actual core Sobol samplng code. Used by the other functions.
|
/// The core Sobol samplng code. Used by the other functions.
|
||||||
|
///
|
||||||
|
/// This actually produces the Sobol sequence with reversed bits, and takes
|
||||||
|
/// the index with reversed bits. This is because the related scrambling
|
||||||
|
/// code works on reversed bits, so this avoids repeated reversing/unreversing,
|
||||||
|
/// keeping everything in reversed bits until the final step.
|
||||||
///
|
///
|
||||||
/// Note: if the `index` parameter exceeds 2^16-1, the sample set will start
|
/// Note: if the `index` parameter exceeds 2^16-1, the sample set will start
|
||||||
/// repeating.
|
/// repeating.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn sobol_u32(dimension: u32, index: u32) -> u32 {
|
fn sobol_u32_rev(dimension: u32, index: u32) -> u32 {
|
||||||
assert!(dimension < MAX_DIMENSION);
|
assert!(dimension < MAX_DIMENSION);
|
||||||
let vecs = &VECTORS[dimension as usize];
|
let vecs = &REV_VECTORS[dimension as usize];
|
||||||
let mut index = index as u16;
|
let mut index = (index >> 16) as u16;
|
||||||
|
|
||||||
let mut result = 0;
|
let mut result = 0;
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while index != 0 {
|
while index != 0 {
|
||||||
let j = index.trailing_zeros();
|
let j = index.leading_zeros();
|
||||||
result ^= vecs[(i + j) as usize];
|
result ^= vecs[(i + j) as usize];
|
||||||
i += j + 1;
|
i += j + 1;
|
||||||
index >>= j;
|
index <<= j;
|
||||||
index >>= 1;
|
index <<= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
(result as u32) << 16
|
result as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scrambles `n` using Owen scrambling and the given scramble parameter.
|
/// Scrambles `n` using the Laine Karras hash. This is equivalent to Owen
|
||||||
|
/// scrambling, but on reversed bits.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn owen_scramble(mut n: u32, scramble: u32) -> u32 {
|
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||||
// This uses the technique presented in the paper "Stratified Sampling for
|
// This uses the technique presented in the paper "Stratified Sampling for
|
||||||
// Stochastic Transparency" by Laine and Karras.
|
// Stochastic Transparency" by Laine and Karras to scramble the bits.
|
||||||
// The basic idea is that we're running a special kind of hash function
|
// The basic idea is that we're running a special kind of hash function
|
||||||
// that only allows avalanche to happen downwards (i.e. a bit is only
|
// that only allows avalanche to happen upwards (i.e. a bit is only
|
||||||
// affected by the bits higher than it). This is achieved by first
|
// affected by the bits lower than it). This is achieved by only doing
|
||||||
// reversing the bits and then doing mixing via multiplication by even
|
// mixing via operations that also adhere to that property, such as
|
||||||
// numbers.
|
// multiplication by even numbers.
|
||||||
//
|
//
|
||||||
// Normally this would be considered a poor hash function, because normally
|
// Normally this would be considered a poor hash function, because normally
|
||||||
// you want all bits to have an equal chance of affecting all other bits.
|
// you want all bits to have an equal chance of affecting all other bits.
|
||||||
// But in this case that only-downward behavior is exactly what we want,
|
// But in this case that only-upward behavior is exactly what we want,
|
||||||
// because it ends up being equivalent to Owen scrambling.
|
// because it ends up being equivalent to Owen scrambling on
|
||||||
//
|
// reverse-ordered bits.
|
||||||
// Note that the application of the scramble parameter here via addition
|
|
||||||
// does not invalidate the Owen scramble as long as it is done after the
|
|
||||||
// bit the reversal.
|
|
||||||
//
|
//
|
||||||
// The permutation constants here were selected through an optimization
|
// The permutation constants here were selected through an optimization
|
||||||
// process to maximize low-bias avalanche between bits.
|
// process to maximize low-bias avalanche between bits.
|
||||||
|
|
||||||
const PERMS: [u32; 3] = [0x97b756bc, 0x4b0a8a12, 0x75c77e36];
|
const PERMS: [u32; 3] = [0x97b756bc, 0x4b0a8a12, 0x75c77e36];
|
||||||
n = n.reverse_bits();
|
|
||||||
n = n.wrapping_add(scramble);
|
n = n.wrapping_add(scramble);
|
||||||
for &p in PERMS.iter() {
|
for &p in PERMS.iter() {
|
||||||
n ^= n.wrapping_mul(p);
|
n ^= n.wrapping_mul(p);
|
||||||
}
|
}
|
||||||
n = n.reverse_bits();
|
|
||||||
|
|
||||||
// Return the scrambled value.
|
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `owen_scramble()` except uses a slower more full version of
|
/// Same as `lk_scramble()` except uses a slower more full version of
|
||||||
/// Owen scrambling.
|
/// hashing.
|
||||||
///
|
///
|
||||||
/// This is mainly intended to help validate the faster Owen scrambling,
|
/// This is mainly intended to help validate the faster scrambling function,
|
||||||
/// and likely shouldn't be used for real things. It is significantly
|
/// and likely shouldn't be used for real things. It is significantly
|
||||||
/// slower.
|
/// slower.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[inline]
|
#[inline]
|
||||||
fn owen_scramble_slow(mut n: u32, scramble: u32) -> u32 {
|
fn lk_scramble_slow(mut n: u32, scramble: u32) -> u32 {
|
||||||
n = n.reverse_bits().wrapping_add(scramble).reverse_bits();
|
n = n.wrapping_add(scramble);
|
||||||
for i in 0..31 {
|
for i in 0..31 {
|
||||||
let mask = (1 << (31 - i)) - 1;
|
let low_mask = (1u32 << i).wrapping_sub(1);
|
||||||
let high_bits_hash = hash((n & (!mask)) ^ hash(i));
|
let low_bits_hash = hash((n & low_mask) ^ hash(i, 3), 3);
|
||||||
n ^= high_bits_hash & mask;
|
n ^= low_bits_hash & !low_mask;
|
||||||
}
|
}
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A simple 32-bit hash function. Its quality can be tuned with
|
||||||
|
/// the number of rounds used.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn hash(n: u32) -> u32 {
|
fn hash(n: u32, rounds: u32) -> u32 {
|
||||||
let mut hash = n ^ 0x912f69ba;
|
let mut hash = n ^ 0x912f69ba;
|
||||||
for _ in 0..3 {
|
for _ in 0..rounds {
|
||||||
hash = hash.wrapping_mul(0x736caf6f);
|
hash = hash.wrapping_mul(0x736caf6f);
|
||||||
hash ^= hash.wrapping_shr(16);
|
hash ^= hash.wrapping_shr(16);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user