Merge branch 'master' into micropoly
This commit is contained in:
commit
11de294af0
|
@ -59,33 +59,17 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
|
|||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
// The permutation constants used in `lk_scramble()`.
|
||||
// Each tuple is for one round of permutation. The first tuple is
|
||||
// optimized, and the remaining are random aside from making sure
|
||||
// that they are appropriately even or odd.
|
||||
const PERMS: &[(u32, u32)] = &[
|
||||
(0x9ac7ea2a, 0x7d1e78d3),
|
||||
(0x2ce68764, 0x9dd00551),
|
||||
(0x79b82526, 0x2dfc1a6b),
|
||||
(0xf358b1d0, 0x38743c65),
|
||||
];
|
||||
|
||||
// How many permutation rounds to do.
|
||||
// In practice it seems like one round is plenty, but I'm leaving more
|
||||
// available in case we want to increase them later.
|
||||
const ROUNDS: usize = 1;
|
||||
|
||||
/// Scrambles `n` using a novel variation on the Laine-Karras hash.
|
||||
///
|
||||
/// This is equivalent to Owen scrambling, but on reversed bits.
|
||||
#[inline(always)]
|
||||
fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
||||
n = n.wrapping_add(hash(scramble, 2));
|
||||
let scramble = hash(scramble);
|
||||
|
||||
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
||||
n ^= n.wrapping_mul(p1);
|
||||
n = n.wrapping_mul(p2);
|
||||
}
|
||||
n = n.wrapping_add(scramble);
|
||||
n ^= n.wrapping_mul(0x3354734a);
|
||||
n = n.wrapping_add(n << 2);
|
||||
n ^= n.wrapping_mul(scramble & !1);
|
||||
|
||||
n
|
||||
}
|
||||
|
@ -93,25 +77,28 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
|
|||
/// Same as `lk_scramble()`, except does it on 4 integers at a time.
|
||||
#[inline(always)]
|
||||
fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
||||
n += hash_int4([scramble; 4].into(), 2);
|
||||
let scramble = hash_int4([scramble; 4].into());
|
||||
|
||||
for &(p1, p2) in PERMS.iter().take(ROUNDS) {
|
||||
n ^= n * [p1; 4].into();
|
||||
n *= [p2; 4].into();
|
||||
}
|
||||
n += scramble;
|
||||
n ^= n * [0x3354734a; 4].into();
|
||||
n += n << 2;
|
||||
n ^= n * (scramble & [!1; 4].into());
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
/// A simple 32-bit hash function. Its quality can be tuned with
|
||||
/// the number of rounds used.
|
||||
/// A good 32-bit hash function.
|
||||
/// From https://github.com/skeeto/hash-prospector
|
||||
#[inline(always)]
|
||||
fn hash(n: u32, rounds: u32) -> u32 {
|
||||
fn hash(n: u32) -> u32 {
|
||||
let mut hash = n ^ 0x79c68e4a;
|
||||
for _ in 0..rounds {
|
||||
hash = hash.wrapping_mul(0x736caf6f);
|
||||
hash ^= hash.wrapping_shr(16);
|
||||
}
|
||||
|
||||
hash ^= hash >> 16;
|
||||
hash = hash.wrapping_mul(0x7feb352d);
|
||||
hash ^= hash >> 15;
|
||||
hash = hash.wrapping_mul(0x846ca68b);
|
||||
hash ^= hash >> 16;
|
||||
|
||||
hash
|
||||
}
|
||||
|
||||
|
@ -120,12 +107,14 @@ fn hash(n: u32, rounds: u32) -> u32 {
|
|||
/// Each of the four numbers gets a different hash, so even if all input
|
||||
/// numbers are the same, the outputs will still be different for each of them.
|
||||
#[inline(always)]
|
||||
fn hash_int4(n: Int4, rounds: u32) -> Int4 {
|
||||
let mut hash = n;
|
||||
hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
|
||||
for _ in 0..rounds {
|
||||
hash *= [0x736caf6f; 4].into();
|
||||
hash ^= hash.shr16();
|
||||
}
|
||||
fn hash_int4(n: Int4) -> Int4 {
|
||||
let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
|
||||
|
||||
hash ^= hash >> 16;
|
||||
hash *= [0x7feb352d; 4].into();
|
||||
hash ^= hash >> 15;
|
||||
hash *= [0x846ca68b; 4].into();
|
||||
hash ^= hash >> 16;
|
||||
|
||||
hash
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
pub(crate) mod sse {
|
||||
use core::arch::x86_64::{
|
||||
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
|
||||
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_slli_epi32,
|
||||
_mm_srli_epi32, _mm_xor_si128,
|
||||
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
|
||||
_mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
|
||||
};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
@ -91,13 +91,6 @@ pub(crate) mod sse {
|
|||
Int4 { v: n }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn shr16(self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_srli_epi32(self.v, 16) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Mul for Int4 {
|
||||
|
@ -152,12 +145,54 @@ pub(crate) mod sse {
|
|||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXor for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitxor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_xor_si128(self.v, other.v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXorAssign for Int4 {
|
||||
#[inline(always)]
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = Int4 {
|
||||
v: unsafe { _mm_xor_si128(self.v, other.v) },
|
||||
};
|
||||
*self = *self ^ other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAnd for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitand(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_and_si128(self.v, other.v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shl<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shl(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_sll_epi32(self.v, _mm_set1_epi32(other)) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shr(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_srl_epi32(self.v, _mm_set1_epi32(other)) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -210,14 +245,18 @@ pub(crate) mod fallback {
|
|||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn shr16(self) -> Int4 {
|
||||
impl std::ops::Mul for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
fn mul(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] >> 16,
|
||||
self.v[1] >> 16,
|
||||
self.v[2] >> 16,
|
||||
self.v[3] >> 16,
|
||||
self.v[0].wrapping_mul(other.v[0]),
|
||||
self.v[1].wrapping_mul(other.v[1]),
|
||||
self.v[2].wrapping_mul(other.v[2]),
|
||||
self.v[3].wrapping_mul(other.v[3]),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
@ -225,14 +264,7 @@ pub(crate) mod fallback {
|
|||
|
||||
impl std::ops::MulAssign for Int4 {
|
||||
fn mul_assign(&mut self, other: Self) {
|
||||
*self = Int4 {
|
||||
v: [
|
||||
self.v[0].wrapping_mul(other.v[0]),
|
||||
self.v[1].wrapping_mul(other.v[1]),
|
||||
self.v[2].wrapping_mul(other.v[2]),
|
||||
self.v[3].wrapping_mul(other.v[3]),
|
||||
],
|
||||
};
|
||||
*self = *self * other;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -249,16 +281,75 @@ pub(crate) mod fallback {
|
|||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXorAssign for Int4 {
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = Int4 {
|
||||
impl std::ops::BitAnd for Int4 {
|
||||
type Output = Int4;
|
||||
fn bitand(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] & other.v[0],
|
||||
self.v[1] & other.v[1],
|
||||
self.v[2] & other.v[2],
|
||||
self.v[3] & other.v[3],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAndAssign for Int4 {
|
||||
fn bitand_assign(&mut self, other: Self) {
|
||||
*self = *self & other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXor for Int4 {
|
||||
type Output = Int4;
|
||||
fn bitxor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] ^ other.v[0],
|
||||
self.v[1] ^ other.v[1],
|
||||
self.v[2] ^ other.v[2],
|
||||
self.v[3] ^ other.v[3],
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXorAssign for Int4 {
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = *self ^ other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shl<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shl(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] << other,
|
||||
self.v[1] << other,
|
||||
self.v[2] << other,
|
||||
self.v[3] << other,
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shr(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] >> other,
|
||||
self.v[1] >> other,
|
||||
self.v[2] >> other,
|
||||
self.v[3] >> other,
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user