Significantly better Owen scrambling hash.

This commit is contained in:
Nathan Vegdahl 2021-01-13 22:25:16 +09:00
parent 105d6e52b5
commit c32281b04a
2 changed files with 149 additions and 69 deletions

View File

@ -59,33 +59,17 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
//----------------------------------------------------------------------
// The permutation constants used in `lk_scramble()`.
// Each tuple is for one round of permutation. The first tuple is
// optimized, and the remaining are random aside from making sure
// that they are appropriately even or odd.
const PERMS: &[(u32, u32)] = &[
(0x9ac7ea2a, 0x7d1e78d3),
(0x2ce68764, 0x9dd00551),
(0x79b82526, 0x2dfc1a6b),
(0xf358b1d0, 0x38743c65),
];
// How many permutation rounds to do.
// In practice it seems like one round is plenty, but I'm leaving more
// available in case we want to increase them later.
const ROUNDS: usize = 1;
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
///
/// This is equivalent to Owen scrambling, but on reversed bits.
#[inline(always)]
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
n = n.wrapping_add(hash(scramble, 2));
let scramble = hash(scramble);
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
n ^= n.wrapping_mul(p1);
n = n.wrapping_mul(p2);
}
n = n.wrapping_add(scramble);
n ^= n.wrapping_mul(0x3354734a);
n = n.wrapping_add(n << 2);
n ^= n.wrapping_mul(scramble & !1);
n
}
@ -93,25 +77,28 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
/// Same as `lk_scramble()`, except does it on 4 integers at a time.
#[inline(always)]
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
n += hash_int4([scramble; 4].into(), 2);
let scramble = hash_int4([scramble; 4].into());
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
n ^= n * [p1; 4].into();
n *= [p2; 4].into();
}
n += scramble;
n ^= n * [0x3354734a; 4].into();
n += n << 2;
n ^= n * (scramble & [!1; 4].into());
n
}
/// A simple 32-bit hash function. Its quality can be tuned with
/// the number of rounds used.
/// A good 32-bit hash function.
/// From https://github.com/skeeto/hash-prospector
#[inline(always)]
fn hash(n: u32, rounds: u32) -> u32 {
fn hash(n: u32) -> u32 {
let mut hash = n ^ 0x79c68e4a;
for _ in 0..rounds {
hash = hash.wrapping_mul(0x736caf6f);
hash ^= hash.wrapping_shr(16);
}
hash ^= hash >> 16;
hash = hash.wrapping_mul(0x7feb352d);
hash ^= hash >> 15;
hash = hash.wrapping_mul(0x846ca68b);
hash ^= hash >> 16;
hash
}
@ -120,12 +107,14 @@ fn hash(n: u32, rounds: u32) -> u32 {
/// Each of the four numbers gets a different hash, so even if all input
/// numbers are the same, the outputs will still be different for each of them.
#[inline(always)]
fn hash_int4(n: Int4, rounds: u32) -> Int4 {
let mut hash = n;
hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
for _ in 0..rounds {
hash *= [0x736caf6f; 4].into();
hash ^= hash.shr16();
}
fn hash_int4(n: Int4) -> Int4 {
let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
hash ^= hash >> 16;
hash *= [0x7feb352d; 4].into();
hash ^= hash >> 15;
hash *= [0x846ca68b; 4].into();
hash ^= hash >> 16;
hash
}

View File

@ -5,8 +5,8 @@
pub(crate) mod sse {
use core::arch::x86_64::{
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_slli_epi32,
_mm_srli_epi32, _mm_xor_si128,
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
_mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
};
#[derive(Debug, Copy, Clone)]
@ -91,13 +91,6 @@ pub(crate) mod sse {
Int4 { v: n }
}
}
#[inline(always)]
pub(crate) fn shr16(self) -> Int4 {
Int4 {
v: unsafe { _mm_srli_epi32(self.v, 16) },
}
}
}
impl std::ops::Mul for Int4 {
@ -152,12 +145,54 @@ pub(crate) mod sse {
}
}
impl std::ops::BitXor for Int4 {
type Output = Int4;
#[inline(always)]
fn bitxor(self, other: Self) -> Int4 {
Int4 {
v: unsafe { _mm_xor_si128(self.v, other.v) },
}
}
}
impl std::ops::BitXorAssign for Int4 {
#[inline(always)]
fn bitxor_assign(&mut self, other: Self) {
*self = Int4 {
v: unsafe { _mm_xor_si128(self.v, other.v) },
};
*self = *self ^ other;
}
}
impl std::ops::BitAnd for Int4 {
type Output = Int4;
#[inline(always)]
fn bitand(self, other: Self) -> Int4 {
Int4 {
v: unsafe { _mm_and_si128(self.v, other.v) },
}
}
}
impl std::ops::Shl<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shl(self, other: i32) -> Int4 {
Int4 {
v: unsafe { _mm_sll_epi32(self.v, _mm_set1_epi32(other)) },
}
}
}
impl std::ops::Shr<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shr(self, other: i32) -> Int4 {
Int4 {
v: unsafe { _mm_srl_epi32(self.v, _mm_set1_epi32(other)) },
}
}
}
@ -210,14 +245,18 @@ pub(crate) mod fallback {
],
}
}
}
pub(crate) fn shr16(self) -> Int4 {
impl std::ops::Mul for Int4 {
type Output = Int4;
fn mul(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] >> 16,
self.v[1] >> 16,
self.v[2] >> 16,
self.v[3] >> 16,
self.v[0].wrapping_mul(other.v[0]),
self.v[1].wrapping_mul(other.v[1]),
self.v[2].wrapping_mul(other.v[2]),
self.v[3].wrapping_mul(other.v[3]),
],
}
}
@ -225,14 +264,7 @@ pub(crate) mod fallback {
impl std::ops::MulAssign for Int4 {
fn mul_assign(&mut self, other: Self) {
*self = Int4 {
v: [
self.v[0].wrapping_mul(other.v[0]),
self.v[1].wrapping_mul(other.v[1]),
self.v[2].wrapping_mul(other.v[2]),
self.v[3].wrapping_mul(other.v[3]),
],
};
*self = *self * other;
}
}
@ -249,16 +281,75 @@ pub(crate) mod fallback {
}
}
impl std::ops::BitXorAssign for Int4 {
fn bitxor_assign(&mut self, other: Self) {
*self = Int4 {
impl std::ops::BitAnd for Int4 {
type Output = Int4;
fn bitand(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] & other.v[0],
self.v[1] & other.v[1],
self.v[2] & other.v[2],
self.v[3] & other.v[3],
],
}
}
}
impl std::ops::BitAndAssign for Int4 {
fn bitand_assign(&mut self, other: Self) {
*self = *self & other;
}
}
impl std::ops::BitXor for Int4 {
type Output = Int4;
fn bitxor(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] ^ other.v[0],
self.v[1] ^ other.v[1],
self.v[2] ^ other.v[2],
self.v[3] ^ other.v[3],
],
};
}
}
}
impl std::ops::BitXorAssign for Int4 {
fn bitxor_assign(&mut self, other: Self) {
*self = *self ^ other;
}
}
impl std::ops::Shl<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shl(self, other: i32) -> Int4 {
Int4 {
v: [
self.v[0] << other,
self.v[1] << other,
self.v[2] << other,
self.v[3] << other,
],
}
}
}
impl std::ops::Shr<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shr(self, other: i32) -> Int4 {
Int4 {
v: [
self.v[0] >> other,
self.v[1] >> other,
self.v[2] >> other,
self.v[3] >> other,
],
}
}
}