Significantly better Owen scrambling hash.
This commit is contained in:
parent
105d6e52b5
commit
c32281b04a
|
@ -59,33 +59,17 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
// The permutation constants used in `lk_scramble()`.
|
|
||||||
// Each tuple is for one round of permutation. The first tuple is
|
|
||||||
// optimized, and the remaining are random aside from making sure
|
|
||||||
// that they are appropriately even or odd.
|
|
||||||
const PERMS: &[(u32, u32)] = &[
|
|
||||||
(0x9ac7ea2a, 0x7d1e78d3),
|
|
||||||
(0x2ce68764, 0x9dd00551),
|
|
||||||
(0x79b82526, 0x2dfc1a6b),
|
|
||||||
(0xf358b1d0, 0x38743c65),
|
|
||||||
];
|
|
||||||
|
|
||||||
// How many permutation rounds to do.
|
|
||||||
// In practice it seems like one round is plenty, but I'm leaving more
|
|
||||||
// available in case we want to increase them later.
|
|
||||||
const ROUNDS: usize = 1;
|
|
||||||
|
|
||||||
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
|
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
|
||||||
///
|
///
|
||||||
/// This is equivalent to Owen scrambling, but on reversed bits.
|
/// This is equivalent to Owen scrambling, but on reversed bits.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||||
n = n.wrapping_add(hash(scramble, 2));
|
let scramble = hash(scramble);
|
||||||
|
|
||||||
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
n = n.wrapping_add(scramble);
|
||||||
n ^= n.wrapping_mul(p1);
|
n ^= n.wrapping_mul(0x3354734a);
|
||||||
n = n.wrapping_mul(p2);
|
n = n.wrapping_add(n << 2);
|
||||||
}
|
n ^= n.wrapping_mul(scramble & !1);
|
||||||
|
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
@ -93,25 +77,28 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||||
/// Same as `lk_scramble()`, except does it on 4 integers at a time.
|
/// Same as `lk_scramble()`, except does it on 4 integers at a time.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
||||||
n += hash_int4([scramble; 4].into(), 2);
|
let scramble = hash_int4([scramble; 4].into());
|
||||||
|
|
||||||
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
n += scramble;
|
||||||
n ^= n * [p1; 4].into();
|
n ^= n * [0x3354734a; 4].into();
|
||||||
n *= [p2; 4].into();
|
n += n << 2;
|
||||||
}
|
n ^= n * (scramble & [!1; 4].into());
|
||||||
|
|
||||||
n
|
n
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A simple 32-bit hash function. Its quality can be tuned with
|
/// A good 32-bit hash function.
|
||||||
/// the number of rounds used.
|
/// From https://github.com/skeeto/hash-prospector
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn hash(n: u32, rounds: u32) -> u32 {
|
fn hash(n: u32) -> u32 {
|
||||||
let mut hash = n ^ 0x79c68e4a;
|
let mut hash = n ^ 0x79c68e4a;
|
||||||
for _ in 0..rounds {
|
|
||||||
hash = hash.wrapping_mul(0x736caf6f);
|
hash ^= hash >> 16;
|
||||||
hash ^= hash.wrapping_shr(16);
|
hash = hash.wrapping_mul(0x7feb352d);
|
||||||
}
|
hash ^= hash >> 15;
|
||||||
|
hash = hash.wrapping_mul(0x846ca68b);
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
|
||||||
hash
|
hash
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,12 +107,14 @@ fn hash(n: u32, rounds: u32) -> u32 {
|
||||||
/// Each of the four numbers gets a different hash, so even if all input
|
/// Each of the four numbers gets a different hash, so even if all input
|
||||||
/// numbers are the same, the outputs will still be different for each of them.
|
/// numbers are the same, the outputs will still be different for each of them.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn hash_int4(n: Int4, rounds: u32) -> Int4 {
|
fn hash_int4(n: Int4) -> Int4 {
|
||||||
let mut hash = n;
|
let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
|
||||||
hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
|
|
||||||
for _ in 0..rounds {
|
hash ^= hash >> 16;
|
||||||
hash *= [0x736caf6f; 4].into();
|
hash *= [0x7feb352d; 4].into();
|
||||||
hash ^= hash.shr16();
|
hash ^= hash >> 15;
|
||||||
}
|
hash *= [0x846ca68b; 4].into();
|
||||||
|
hash ^= hash >> 16;
|
||||||
|
|
||||||
hash
|
hash
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
pub(crate) mod sse {
|
pub(crate) mod sse {
|
||||||
use core::arch::x86_64::{
|
use core::arch::x86_64::{
|
||||||
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
|
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
|
||||||
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_slli_epi32,
|
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
|
||||||
_mm_srli_epi32, _mm_xor_si128,
|
_mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
@ -91,13 +91,6 @@ pub(crate) mod sse {
|
||||||
Int4 { v: n }
|
Int4 { v: n }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
pub(crate) fn shr16(self) -> Int4 {
|
|
||||||
Int4 {
|
|
||||||
v: unsafe { _mm_srli_epi32(self.v, 16) },
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::ops::Mul for Int4 {
|
impl std::ops::Mul for Int4 {
|
||||||
|
@ -152,12 +145,54 @@ pub(crate) mod sse {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitXor for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn bitxor(self, other: Self) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: unsafe { _mm_xor_si128(self.v, other.v) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl std::ops::BitXorAssign for Int4 {
|
impl std::ops::BitXorAssign for Int4 {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitxor_assign(&mut self, other: Self) {
|
fn bitxor_assign(&mut self, other: Self) {
|
||||||
*self = Int4 {
|
*self = *self ^ other;
|
||||||
v: unsafe { _mm_xor_si128(self.v, other.v) },
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitAnd for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn bitand(self, other: Self) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: unsafe { _mm_and_si128(self.v, other.v) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Shl<i32> for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn shl(self, other: i32) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: unsafe { _mm_sll_epi32(self.v, _mm_set1_epi32(other)) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Shr<i32> for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn shr(self, other: i32) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: unsafe { _mm_srl_epi32(self.v, _mm_set1_epi32(other)) },
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,14 +245,18 @@ pub(crate) mod fallback {
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn shr16(self) -> Int4 {
|
impl std::ops::Mul for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
fn mul(self, other: Self) -> Int4 {
|
||||||
Int4 {
|
Int4 {
|
||||||
v: [
|
v: [
|
||||||
self.v[0] >> 16,
|
self.v[0].wrapping_mul(other.v[0]),
|
||||||
self.v[1] >> 16,
|
self.v[1].wrapping_mul(other.v[1]),
|
||||||
self.v[2] >> 16,
|
self.v[2].wrapping_mul(other.v[2]),
|
||||||
self.v[3] >> 16,
|
self.v[3].wrapping_mul(other.v[3]),
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -225,14 +264,7 @@ pub(crate) mod fallback {
|
||||||
|
|
||||||
impl std::ops::MulAssign for Int4 {
|
impl std::ops::MulAssign for Int4 {
|
||||||
fn mul_assign(&mut self, other: Self) {
|
fn mul_assign(&mut self, other: Self) {
|
||||||
*self = Int4 {
|
*self = *self * other;
|
||||||
v: [
|
|
||||||
self.v[0].wrapping_mul(other.v[0]),
|
|
||||||
self.v[1].wrapping_mul(other.v[1]),
|
|
||||||
self.v[2].wrapping_mul(other.v[2]),
|
|
||||||
self.v[3].wrapping_mul(other.v[3]),
|
|
||||||
],
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -249,16 +281,75 @@ pub(crate) mod fallback {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::ops::BitXorAssign for Int4 {
|
impl std::ops::BitAnd for Int4 {
|
||||||
fn bitxor_assign(&mut self, other: Self) {
|
type Output = Int4;
|
||||||
*self = Int4 {
|
fn bitand(self, other: Self) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: [
|
||||||
|
self.v[0] & other.v[0],
|
||||||
|
self.v[1] & other.v[1],
|
||||||
|
self.v[2] & other.v[2],
|
||||||
|
self.v[3] & other.v[3],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitAndAssign for Int4 {
|
||||||
|
fn bitand_assign(&mut self, other: Self) {
|
||||||
|
*self = *self & other;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitXor for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
fn bitxor(self, other: Self) -> Int4 {
|
||||||
|
Int4 {
|
||||||
v: [
|
v: [
|
||||||
self.v[0] ^ other.v[0],
|
self.v[0] ^ other.v[0],
|
||||||
self.v[1] ^ other.v[1],
|
self.v[1] ^ other.v[1],
|
||||||
self.v[2] ^ other.v[2],
|
self.v[2] ^ other.v[2],
|
||||||
self.v[3] ^ other.v[3],
|
self.v[3] ^ other.v[3],
|
||||||
],
|
],
|
||||||
};
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitXorAssign for Int4 {
|
||||||
|
fn bitxor_assign(&mut self, other: Self) {
|
||||||
|
*self = *self ^ other;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Shl<i32> for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn shl(self, other: i32) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: [
|
||||||
|
self.v[0] << other,
|
||||||
|
self.v[1] << other,
|
||||||
|
self.v[2] << other,
|
||||||
|
self.v[3] << other,
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Shr<i32> for Int4 {
|
||||||
|
type Output = Int4;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn shr(self, other: i32) -> Int4 {
|
||||||
|
Int4 {
|
||||||
|
v: [
|
||||||
|
self.v[0] >> other,
|
||||||
|
self.v[1] >> other,
|
||||||
|
self.v[2] >> other,
|
||||||
|
self.v[3] >> other,
|
||||||
|
],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user