diff --git a/sub_crates/sobol/src/lib.rs b/sub_crates/sobol/src/lib.rs index 811d61f..a18bb58 100644 --- a/sub_crates/sobol/src/lib.rs +++ b/sub_crates/sobol/src/lib.rs @@ -59,33 +59,33 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] { //---------------------------------------------------------------------- +// The permutation constants used in `lk_scramble()`. +// Each tuple is for one round of permutation. The first tuple is +// optimized, and the remaining are random aside from making sure +// that they are appropriately even or odd. +const PERMS: &[(u32, u32)] = &[ + (0x9ac7ea2a, 0x7d1e78d3), + (0x2ce68764, 0x9dd00551), + (0x79b82526, 0x2dfc1a6b), + (0xf358b1d0, 0x38743c65), +]; + +// How many permutation rounds to do. +// In practice it seems like one round is plenty, but I'm leaving more +// available in case we want to increase them later. +const ROUNDS: usize = 1; + /// Scrambles `n` using a novel variation on the Laine-Karras hash. /// /// This is equivalent to Owen scrambling, but on reversed bits. -#[inline] +#[inline(always)] fn lk_scramble(mut n: u32, scramble: u32) -> u32 { - // The basic idea here is that we're running a special kind of hash - // function that only allows avalanche to happen upwards (i.e. a bit is - // only affected by the bits lower than it). This is achieved by only - // doing mixing via operations that also adhere to that property. - // - // Some known valid operations that adhere to that property are: - // - // 1. n ^= constant - // 2. n += constant - // 3. n *= odd_constant - // 4. n ^= n * even_constant - // - // The original Laine-Karras function uses operations 2 and 4 above. - // However, faster and higher-quality results can be achieved with 1, 2, - // and 3, which is what we're doing here. - n = n.wrapping_add(hash(scramble, 2)); - n ^= 0xdc967795; - n = n.wrapping_mul(0x97b754b7); - n ^= 0x866350b1; - n = n.wrapping_mul(0x9e3779cd); + for &(p1, p2) in PERMS.iter().take(ROUNDS) { + n ^= n.wrapping_mul(p1); + n = n.wrapping_mul(p2); + } n } @@ -95,10 +95,10 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 { fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 { n += hash_int4([scramble; 4].into(), 2); - n ^= [0xdc967795; 4].into(); - n *= [0x97b754b7; 4].into(); - n ^= [0x866350b1; 4].into(); - n *= [0x9e3779cd; 4].into(); + for &(p1, p2) in PERMS.iter().take(ROUNDS) { + n ^= n * [p1; 4].into(); + n *= [p2; 4].into(); + } n } diff --git a/sub_crates/sobol/src/wide.rs b/sub_crates/sobol/src/wide.rs index f240043..8602a2c 100644 --- a/sub_crates/sobol/src/wide.rs +++ b/sub_crates/sobol/src/wide.rs @@ -100,16 +100,18 @@ pub(crate) mod sse { } } - impl std::ops::MulAssign for Int4 { + impl std::ops::Mul for Int4 { + type Output = Int4; + #[inline(always)] - fn mul_assign(&mut self, other: Self) { + fn mul(self, other: Self) -> Int4 { // This only works with SSE 4.1 support. #[cfg(target_feature = "sse4.1")] unsafe { use core::arch::x86_64::_mm_mullo_epi32; - *self = Int4 { + Int4 { v: _mm_mullo_epi32(self.v, other.v), - }; + } } // This works on all x86-64 chips. @@ -127,13 +129,20 @@ pub(crate) mod sse { ), _mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32), ); - *self = Int4 { + Int4 { v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)), - }; + } } } } + impl std::ops::MulAssign for Int4 { + #[inline(always)] + fn mul_assign(&mut self, other: Self) { + *self = *self * other; + } + } + impl std::ops::AddAssign for Int4 { #[inline(always)] fn add_assign(&mut self, other: Self) {