Switch to a significantly better LK hash.
With just one round, this hash is better than the previous one with two rounds. And at 2-3 rounds it seems to be just as good quality as a slow, per-bit hashing approach, which I've been using as ground-truth for testing.
This commit is contained in:
parent
776a2c1456
commit
285f714d02
|
@ -59,33 +59,33 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
// The permutation constants used in `lk_scramble()`.
|
||||||
|
// Each tuple is for one round of permutation. The first tuple is
|
||||||
|
// optimized, and the remaining are random aside from making sure
|
||||||
|
// that they are appropriately even or odd.
|
||||||
|
const PERMS: &[(u32, u32)] = &[
|
||||||
|
(0x9ac7ea2a, 0x7d1e78d3),
|
||||||
|
(0x2ce68764, 0x9dd00551),
|
||||||
|
(0x79b82526, 0x2dfc1a6b),
|
||||||
|
(0xf358b1d0, 0x38743c65),
|
||||||
|
];
|
||||||
|
|
||||||
|
// How many permutation rounds to do.
|
||||||
|
// In practice it seems like one round is plenty, but I'm leaving more
|
||||||
|
// available in case we want to increase them later.
|
||||||
|
const ROUNDS: usize = 1;
|
||||||
|
|
||||||
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
|
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
|
||||||
///
|
///
|
||||||
/// This is equivalent to Owen scrambling, but on reversed bits.
|
/// This is equivalent to Owen scrambling, but on reversed bits.
|
||||||
#[inline]
|
#[inline(always)]
|
||||||
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||||
// The basic idea here is that we're running a special kind of hash
|
|
||||||
// function that only allows avalanche to happen upwards (i.e. a bit is
|
|
||||||
// only affected by the bits lower than it). This is achieved by only
|
|
||||||
// doing mixing via operations that also adhere to that property.
|
|
||||||
//
|
|
||||||
// Some known valid operations that adhere to that property are:
|
|
||||||
//
|
|
||||||
// 1. n ^= constant
|
|
||||||
// 2. n += constant
|
|
||||||
// 3. n *= odd_constant
|
|
||||||
// 4. n ^= n * even_constant
|
|
||||||
//
|
|
||||||
// The original Laine-Karras function uses operations 2 and 4 above.
|
|
||||||
// However, faster and higher-quality results can be achieved with 1, 2,
|
|
||||||
// and 3, which is what we're doing here.
|
|
||||||
|
|
||||||
n = n.wrapping_add(hash(scramble, 2));
|
n = n.wrapping_add(hash(scramble, 2));
|
||||||
|
|
||||||
n ^= 0xdc967795;
|
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
||||||
n = n.wrapping_mul(0x97b754b7);
|
n ^= n.wrapping_mul(p1);
|
||||||
n ^= 0x866350b1;
|
n = n.wrapping_mul(p2);
|
||||||
n = n.wrapping_mul(0x9e3779cd);
|
}
|
||||||
|
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
@ -95,10 +95,10 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||||
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
||||||
n += hash_int4([scramble; 4].into(), 2);
|
n += hash_int4([scramble; 4].into(), 2);
|
||||||
|
|
||||||
n ^= [0xdc967795; 4].into();
|
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
||||||
n *= [0x97b754b7; 4].into();
|
n ^= n * [p1; 4].into();
|
||||||
n ^= [0x866350b1; 4].into();
|
n *= [p2; 4].into();
|
||||||
n *= [0x9e3779cd; 4].into();
|
}
|
||||||
|
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,16 +100,18 @@ pub(crate) mod sse {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::ops::MulAssign for Int4 {
|
impl std::ops::Mul for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn mul_assign(&mut self, other: Self) {
|
fn mul(self, other: Self) -> Int4 {
|
||||||
// This only works with SSE 4.1 support.
|
// This only works with SSE 4.1 support.
|
||||||
#[cfg(target_feature = "sse4.1")]
|
#[cfg(target_feature = "sse4.1")]
|
||||||
unsafe {
|
unsafe {
|
||||||
use core::arch::x86_64::_mm_mullo_epi32;
|
use core::arch::x86_64::_mm_mullo_epi32;
|
||||||
*self = Int4 {
|
Int4 {
|
||||||
v: _mm_mullo_epi32(self.v, other.v),
|
v: _mm_mullo_epi32(self.v, other.v),
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This works on all x86-64 chips.
|
// This works on all x86-64 chips.
|
||||||
|
@ -127,13 +129,20 @@ pub(crate) mod sse {
|
||||||
),
|
),
|
||||||
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
|
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
|
||||||
);
|
);
|
||||||
*self = Int4 {
|
Int4 {
|
||||||
v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)),
|
v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)),
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::ops::MulAssign for Int4 {
|
||||||
|
#[inline(always)]
|
||||||
|
fn mul_assign(&mut self, other: Self) {
|
||||||
|
*self = *self * other;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl std::ops::AddAssign for Int4 {
|
impl std::ops::AddAssign for Int4 {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add_assign(&mut self, other: Self) {
|
fn add_assign(&mut self, other: Self) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user