Move sobol crate outside of Psychopath into its own repo.
This commit is contained in:
parent
70fba19361
commit
706902dc8e
26
Cargo.lock
generated
26
Cargo.lock
generated
|
@ -213,9 +213,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "kioku"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30217b27028ccf61ba75afa4f9bb7be8e4a66780d7882fdaacd9ea8e0cdede26"
|
||||
checksum = "6d967239fdf2a2467247931a3055f627af65358fc99b534b5dc1ef8520384999"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
|
@ -252,9 +252,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.3.4"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
||||
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
|
@ -368,7 +368,7 @@ dependencies = [
|
|||
"png_encode_mini",
|
||||
"rustc-serialize",
|
||||
"scoped_threadpool",
|
||||
"sobol",
|
||||
"sobol_burley",
|
||||
"spectral_upsampling",
|
||||
"time",
|
||||
]
|
||||
|
@ -549,18 +549,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.7"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2"
|
||||
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.23"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
|
@ -608,12 +608,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
|
||||
|
||||
[[package]]
|
||||
name = "sobol"
|
||||
name = "sobol_burley"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bencher",
|
||||
"rand 0.6.5",
|
||||
]
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92317634ef0e1ece7bd4031abc3186df3781d0bf2b05682796655200aef4c965"
|
||||
|
||||
[[package]]
|
||||
name = "spectral_upsampling"
|
||||
|
|
|
@ -5,7 +5,6 @@ members = [
|
|||
"sub_crates/compact",
|
||||
"sub_crates/halton",
|
||||
"sub_crates/math3d",
|
||||
"sub_crates/sobol",
|
||||
"sub_crates/spectral_upsampling",
|
||||
]
|
||||
|
||||
|
@ -31,6 +30,7 @@ nom = "5"
|
|||
num_cpus = "1.8"
|
||||
openexr = "0.7"
|
||||
kioku = "0.3"
|
||||
sobol_burley = "0.1"
|
||||
png_encode_mini = "0.1.2"
|
||||
rustc-serialize = "0.3"
|
||||
scoped_threadpool = "0.1"
|
||||
|
@ -54,8 +54,5 @@ path = "sub_crates/halton"
|
|||
[dependencies.math3d]
|
||||
path = "sub_crates/math3d"
|
||||
|
||||
[dependencies.sobol]
|
||||
path = "sub_crates/sobol"
|
||||
|
||||
[dependencies.spectral_upsampling]
|
||||
path = "sub_crates/spectral_upsampling"
|
||||
|
|
|
@ -706,9 +706,9 @@ fn get_sample_4d(
|
|||
let seed = pixel_co.0 ^ (pixel_co.1 << 16) ^ seed.wrapping_mul(0x736caf6f);
|
||||
|
||||
match dimension_set {
|
||||
ds if ds < sobol::MAX_DIMENSION_SET as u32 => {
|
||||
ds if ds < sobol_burley::MAX_DIMENSION_SET as u32 => {
|
||||
// Sobol sampling.
|
||||
let n4 = sobol::sample_4d(i, ds, seed);
|
||||
let n4 = sobol_burley::sample_4d(i, ds, seed);
|
||||
(n4[0], n4[1], n4[2], n4[3])
|
||||
}
|
||||
ds => {
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
[package]
|
||||
name = "sobol"
|
||||
version = "0.1.0"
|
||||
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT, Apache 2.0"
|
||||
build = "build.rs"
|
||||
|
||||
[lib]
|
||||
name = "sobol"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.6"
|
||||
bencher = "0.1.5"
|
||||
|
||||
[[bench]]
|
||||
name = "bench"
|
||||
harness = false
|
|
@ -1,18 +0,0 @@
|
|||
## Direction numbers and adapted code
|
||||
|
||||
The Sobol direction numbers under `direction_numbers/` are under their own license. See `direction_numbers/LICENSE.txt` for details.
|
||||
|
||||
Some of the code in `build.rs` is adapted to Rust from another source. The license for that code is embedded in the comments in that file.
|
||||
|
||||
|
||||
|
||||
## Remaining code
|
||||
|
||||
Copyright (c) 2020 Nathan Vegdahl
|
||||
|
||||
This project is licensed under either of
|
||||
|
||||
* MIT license (licenses/MIT.txt or http://opensource.org/licenses/MIT)
|
||||
* Apache License, Version 2.0, (licenses/Apache-2.0.txt or http://www.apache.org/licenses/LICENSE-2.0)
|
||||
|
||||
at your option.
|
|
@ -1,34 +0,0 @@
|
|||
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
|
||||
use rand::{rngs::SmallRng, FromEntropy, Rng};
|
||||
use sobol::sample_4d;
|
||||
|
||||
//----
|
||||
|
||||
fn gen_1000_samples(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
for i in 0..1000u32 {
|
||||
black_box(sample_4d(i, 0, 1234567890));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn gen_1000_samples_incoherent(bench: &mut Bencher) {
|
||||
let mut rng = SmallRng::from_entropy();
|
||||
bench.iter(|| {
|
||||
let s = rng.gen::<u32>();
|
||||
let d = rng.gen::<u32>();
|
||||
let seed = rng.gen::<u32>();
|
||||
for i in 0..1000u32 {
|
||||
black_box(sample_4d(
|
||||
s.wrapping_add(i).wrapping_mul(512),
|
||||
d.wrapping_add(i).wrapping_mul(97) % 32,
|
||||
seed,
|
||||
));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
//----
|
||||
|
||||
benchmark_group!(benches, gen_1000_samples, gen_1000_samples_incoherent,);
|
||||
benchmark_main!(benches);
|
|
@ -1,190 +0,0 @@
|
|||
//! This file generates the Sobol direction vectors used by this crate's
|
||||
//! Sobol sequence.
|
||||
|
||||
use std::{env, fs::File, io::Write, path::Path};
|
||||
|
||||
/// How many components to generate.
|
||||
const NUM_DIMENSIONS: usize = 128;
|
||||
|
||||
/// What file to generate the numbers from.
|
||||
const DIRECTION_NUMBERS_TEXT: &str = include_str!("direction_numbers/new-joe-kuo-6.1024.txt");
|
||||
|
||||
fn main() {
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join("vectors.inc");
|
||||
let mut f = File::create(&dest_path).unwrap();
|
||||
|
||||
// Init direction vectors.
|
||||
let vectors = generate_direction_vectors(NUM_DIMENSIONS);
|
||||
|
||||
// Write dimensions limit.
|
||||
f.write_all(format!("const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes())
|
||||
.unwrap();
|
||||
|
||||
// Write the vectors.
|
||||
// We write them in a rather atypical way because of how the library
|
||||
// uses them. First, we interleave the numbers of each set of four
|
||||
// dimensions, for SIMD evaluation. Second, each number is written
|
||||
// with reversed bits, to avoid needing to reverse them before scrambling.
|
||||
f.write_all(format!("const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", SOBOL_BITS).as_bytes())
|
||||
.unwrap();
|
||||
for d4 in vectors.chunks_exact(4) {
|
||||
f.write_all(" [\n".as_bytes()).unwrap();
|
||||
for ((a, b), (c, d)) in d4[0]
|
||||
.iter()
|
||||
.zip(d4[1].iter())
|
||||
.zip(d4[2].iter().zip(d4[3].iter()))
|
||||
{
|
||||
f.write_all(
|
||||
format!(
|
||||
" [0x{:08x}, 0x{:08x}, 0x{:08x}, 0x{:08x}],\n",
|
||||
a.reverse_bits(),
|
||||
b.reverse_bits(),
|
||||
c.reverse_bits(),
|
||||
d.reverse_bits()
|
||||
)
|
||||
.as_bytes(),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
f.write_all(" ],\n".as_bytes()).unwrap();
|
||||
}
|
||||
f.write_all("];\n".as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
|
||||
// The following is adapted from the code on this webpage:
|
||||
//
|
||||
// http://web.maths.unsw.edu.au/~fkuo/sobol/
|
||||
//
|
||||
// From these papers:
|
||||
//
|
||||
// * S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing Sobol's
|
||||
// quasirandom sequence generator, ACM Trans. Math. Softw. 29,
|
||||
// 49-57 (2003)
|
||||
//
|
||||
// * S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
|
||||
// two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654 (2008)
|
||||
//
|
||||
// The adapted code is under the following license:
|
||||
//
|
||||
// Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
//
|
||||
// * Neither the names of the copyright holders nor the names of the
|
||||
// University of New South Wales and the University of Waikato
|
||||
// and its contributors may be used to endorse or promote products
|
||||
// derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
type SobolInt = u32;
|
||||
const SOBOL_BITS: usize = std::mem::size_of::<SobolInt>() * 8;
|
||||
|
||||
pub fn generate_direction_vectors(dimensions: usize) -> Vec<[SobolInt; SOBOL_BITS]> {
|
||||
let mut vectors = Vec::new();
|
||||
|
||||
// Calculate first dimension, which is just the van der Corput sequence.
|
||||
let mut dim_0 = [0 as SobolInt; SOBOL_BITS];
|
||||
for i in 0..SOBOL_BITS {
|
||||
dim_0[i] = 1 << (SOBOL_BITS - 1 - i);
|
||||
}
|
||||
vectors.push(dim_0);
|
||||
|
||||
// Do the rest of the dimensions.
|
||||
let mut lines = DIRECTION_NUMBERS_TEXT.lines();
|
||||
for _ in 1..dimensions {
|
||||
let mut v = [0 as SobolInt; SOBOL_BITS];
|
||||
|
||||
// Get data from the next valid line from the direction numbers text
|
||||
// file.
|
||||
let (s, a, m) = loop {
|
||||
if let Ok((a, m)) = parse_direction_numbers(
|
||||
lines
|
||||
.next()
|
||||
.expect("Not enough direction numbers for the requested number of dimensions."),
|
||||
) {
|
||||
break (m.len(), a, m);
|
||||
}
|
||||
};
|
||||
|
||||
// Generate the direction numbers for this dimension.
|
||||
if SOBOL_BITS <= s as usize {
|
||||
for i in 0..SOBOL_BITS {
|
||||
v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
|
||||
}
|
||||
} else {
|
||||
for i in 0..(s as usize) {
|
||||
v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
|
||||
}
|
||||
|
||||
for i in (s as usize)..SOBOL_BITS {
|
||||
v[i] = v[i - s as usize] ^ (v[i - s as usize] >> s);
|
||||
|
||||
for k in 1..s {
|
||||
v[i] ^= ((a >> (s - 1 - k)) & 1) as SobolInt * v[i - k as usize];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vectors.push(v);
|
||||
}
|
||||
|
||||
vectors
|
||||
}
|
||||
|
||||
/// Parses the direction numbers from a single line of the direction numbers
|
||||
/// text file. Returns the `a` and `m` parts.
|
||||
fn parse_direction_numbers(text: &str) -> Result<(u32, Vec<u32>), Box<dyn std::error::Error>> {
|
||||
let mut numbers = text.split_whitespace();
|
||||
if numbers.clone().count() < 4 || text.starts_with("#") {
|
||||
return Err(Box::new(ParseError(())));
|
||||
}
|
||||
|
||||
// Skip the first two numbers, which are just the dimension and the count
|
||||
// of direction numbers for this dimension.
|
||||
let _ = numbers.next().unwrap().parse::<u32>()?;
|
||||
let _ = numbers.next().unwrap().parse::<u32>()?;
|
||||
|
||||
let a = numbers.next().unwrap().parse::<u32>()?;
|
||||
|
||||
let mut m = Vec::new();
|
||||
for n in numbers {
|
||||
m.push(n.parse::<u32>()?);
|
||||
}
|
||||
|
||||
Ok((a, m))
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
struct ParseError(());
|
||||
impl std::error::Error for ParseError {}
|
||||
impl std::fmt::Display for ParseError {
|
||||
fn fmt(&self, _f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
The data in this folder is from the website
|
||||
http://web.maths.unsw.edu.au/~fkuo/sobol/
|
||||
|
||||
From these papers:
|
||||
* S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing
|
||||
Sobol's quasirandom sequence generator, ACM Trans. Math. Softw. 29,
|
||||
49-57 (2003)
|
||||
* S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
|
||||
two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654
|
||||
(2008)
|
||||
|
||||
Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the names of the copyright holders nor the names of the
|
||||
University of New South Wales and the University of Waikato
|
||||
and its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
File diff suppressed because it is too large
Load Diff
|
@ -1,124 +0,0 @@
|
|||
//! A seedable, Owen-scrambled Sobol sequence.
|
||||
//!
|
||||
//! This is based on the paper "Practical Hash-based Owen Scrambling"
|
||||
//! by Brent Burley, but using the scramble function from
|
||||
//! https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
|
||||
//! in place of the Laine-Karras function used in the paper, and with a
|
||||
//! larger set of direction numbers due to Kuo et al.
|
||||
//!
|
||||
//! This implementation is limited to `2^16` samples, and will loop back
|
||||
//! to the start of the sequence after that limit.
|
||||
|
||||
#![allow(clippy::unreadable_literal)]
|
||||
|
||||
mod wide;
|
||||
use wide::Int4;
|
||||
|
||||
// This `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
|
||||
// See the build.rs file for how this included file is generated.
|
||||
include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
|
||||
|
||||
pub const MAX_DIMENSION_SET: u32 = MAX_DIMENSION / 4;
|
||||
|
||||
/// Compute four dimensions of a single sample in the Sobol sequence.
|
||||
///
|
||||
/// `sample_index` specifies which sample in the Sobol sequence to compute.
|
||||
///
|
||||
/// `dimension_set` specifies which four dimensions to compute. `0` yields the
|
||||
/// first four dimensions, `1` the second four dimensions, and so on.
|
||||
///
|
||||
/// `seed` produces statistically independent Sobol sequences. Passing two
|
||||
/// different seeds will produce two different sequences that are only randomly
|
||||
/// associated, with no stratification or correlation between them.
|
||||
#[inline]
|
||||
pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
|
||||
assert!(dimension_set < MAX_DIMENSION_SET);
|
||||
let vecs = &REV_VECTORS[dimension_set as usize];
|
||||
|
||||
// Shuffle the index using the given seed to produce a unique statistically
|
||||
// independent Sobol sequence.
|
||||
let shuffled_rev_index = scramble(sample_index.reverse_bits(), seed);
|
||||
|
||||
// Compute the Sobol sample with reversed bits.
|
||||
let mut sobol_rev = Int4::zero();
|
||||
let mut index = shuffled_rev_index & 0xffff0000; // Only use the top 16 bits.
|
||||
let mut i = 0;
|
||||
while index != 0 {
|
||||
let j = index.leading_zeros();
|
||||
sobol_rev ^= vecs[(i + j) as usize].into();
|
||||
i += j + 1;
|
||||
index <<= j;
|
||||
index <<= 1;
|
||||
}
|
||||
|
||||
// Do Owen scrambling on the reversed-bits Sobol sample.
|
||||
let sobol_owen_rev = scramble_int4(sobol_rev, dimension_set ^ seed);
|
||||
|
||||
// Un-reverse the bits and convert to floating point in [0, 1).
|
||||
sobol_owen_rev.reverse_bits().to_norm_floats()
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
/// Scrambles `n` using the hash function from
|
||||
/// https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
|
||||
///
|
||||
/// This is equivalent to Owen scrambling, but on reversed bits.
|
||||
#[inline(always)]
|
||||
fn scramble(mut n: u32, scramble: u32) -> u32 {
|
||||
let scramble = hash(scramble);
|
||||
|
||||
n ^= n.wrapping_mul(0x3d20adea);
|
||||
n = n.wrapping_add(scramble);
|
||||
n = n.wrapping_mul((scramble >> 16) | 1);
|
||||
n ^= n.wrapping_mul(0x05526c56);
|
||||
n ^= n.wrapping_mul(0x53a22864);
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
/// Same as `scramble()`, except does it on 4 integers at a time.
|
||||
#[inline(always)]
|
||||
fn scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
|
||||
let scramble = hash_int4([scramble; 4].into());
|
||||
|
||||
n ^= n * [0x3d20adea; 4].into();
|
||||
n += scramble;
|
||||
n *= (scramble >> 16) | [1; 4].into();
|
||||
n ^= n * [0x05526c56; 4].into();
|
||||
n ^= n * [0x53a22864; 4].into();
|
||||
|
||||
n
|
||||
}
|
||||
|
||||
/// A good 32-bit hash function.
|
||||
/// From https://github.com/skeeto/hash-prospector
|
||||
#[inline(always)]
|
||||
fn hash(n: u32) -> u32 {
|
||||
let mut hash = n ^ 0x79c68e4a;
|
||||
|
||||
hash ^= hash >> 16;
|
||||
hash = hash.wrapping_mul(0x7feb352d);
|
||||
hash ^= hash >> 15;
|
||||
hash = hash.wrapping_mul(0x846ca68b);
|
||||
hash ^= hash >> 16;
|
||||
|
||||
hash
|
||||
}
|
||||
|
||||
/// Same as `hash()` except performs hashing on four numbers at once.
|
||||
///
|
||||
/// Each of the four numbers gets a different hash, so even if all input
|
||||
/// numbers are the same, the outputs will still be different for each of them.
|
||||
#[inline(always)]
|
||||
fn hash_int4(n: Int4) -> Int4 {
|
||||
let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
|
||||
|
||||
hash ^= hash >> 16;
|
||||
hash *= [0x7feb352d; 4].into();
|
||||
hash ^= hash >> 15;
|
||||
hash *= [0x846ca68b; 4].into();
|
||||
hash ^= hash >> 16;
|
||||
|
||||
hash
|
||||
}
|
|
@ -1,445 +0,0 @@
|
|||
//--------------------------------------------------------------------------
|
||||
// x86/64 SSE
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
// #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
|
||||
pub(crate) mod sse {
|
||||
use core::arch::x86_64::{
|
||||
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
|
||||
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
|
||||
_mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
|
||||
};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub(crate) struct Int4 {
|
||||
v: __m128i,
|
||||
}
|
||||
|
||||
impl Int4 {
|
||||
#[inline(always)]
|
||||
pub fn zero() -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_setzero_si128() },
|
||||
}
|
||||
}
|
||||
|
||||
/// For testing.
|
||||
#[allow(dead_code)]
|
||||
fn get(self, i: usize) -> u32 {
|
||||
let n: [u32; 4] = unsafe { std::mem::transmute(self) };
|
||||
n[i]
|
||||
}
|
||||
|
||||
/// Converts the full range of a 32 bit integer to a float in [0, 1).
|
||||
#[inline(always)]
|
||||
pub fn to_norm_floats(self) -> [f32; 4] {
|
||||
const ONE_OVER_31BITS: f32 = 1.0 / (1u64 << 31) as f32;
|
||||
let n4 = unsafe {
|
||||
_mm_mul_ps(
|
||||
_mm_cvtepi32_ps(_mm_srli_epi32(self.v, 1)),
|
||||
_mm_set1_ps(ONE_OVER_31BITS),
|
||||
)
|
||||
};
|
||||
|
||||
unsafe { std::mem::transmute(n4) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn reverse_bits(self) -> Int4 {
|
||||
let mut n = self.v;
|
||||
unsafe {
|
||||
let a = _mm_slli_epi32(n, 16);
|
||||
let b = _mm_srli_epi32(n, 16);
|
||||
n = _mm_or_si128(a, b);
|
||||
|
||||
//----
|
||||
let a = _mm_and_si128(
|
||||
_mm_slli_epi32(n, 8),
|
||||
_mm_set1_epi32(std::mem::transmute(0xff00ff00u32)),
|
||||
);
|
||||
let b = _mm_and_si128(
|
||||
_mm_srli_epi32(n, 8),
|
||||
_mm_set1_epi32(std::mem::transmute(0x00ff00ffu32)),
|
||||
);
|
||||
n = _mm_or_si128(a, b);
|
||||
|
||||
//----
|
||||
let a = _mm_and_si128(
|
||||
_mm_slli_epi32(n, 4),
|
||||
_mm_set1_epi32(std::mem::transmute(0xf0f0f0f0u32)),
|
||||
);
|
||||
let b = _mm_and_si128(
|
||||
_mm_srli_epi32(n, 4),
|
||||
_mm_set1_epi32(std::mem::transmute(0x0f0f0f0fu32)),
|
||||
);
|
||||
n = _mm_or_si128(a, b);
|
||||
|
||||
//----
|
||||
let a = _mm_and_si128(
|
||||
_mm_slli_epi32(n, 2),
|
||||
_mm_set1_epi32(std::mem::transmute(0xccccccccu32)),
|
||||
);
|
||||
let b = _mm_and_si128(
|
||||
_mm_srli_epi32(n, 2),
|
||||
_mm_set1_epi32(std::mem::transmute(0x33333333u32)),
|
||||
);
|
||||
n = _mm_or_si128(a, b);
|
||||
|
||||
//----
|
||||
let a = _mm_and_si128(
|
||||
_mm_slli_epi32(n, 1),
|
||||
_mm_set1_epi32(std::mem::transmute(0xaaaaaaaau32)),
|
||||
);
|
||||
let b = _mm_and_si128(
|
||||
_mm_srli_epi32(n, 1),
|
||||
_mm_set1_epi32(std::mem::transmute(0x55555555u32)),
|
||||
);
|
||||
n = _mm_or_si128(a, b);
|
||||
|
||||
Int4 { v: n }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Mul for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn mul(self, other: Self) -> Int4 {
|
||||
// This only works with SSE 4.1 support.
|
||||
#[cfg(target_feature = "sse4.1")]
|
||||
unsafe {
|
||||
use core::arch::x86_64::_mm_mullo_epi32;
|
||||
Int4 {
|
||||
v: _mm_mullo_epi32(self.v, other.v),
|
||||
}
|
||||
}
|
||||
|
||||
// This works on all x86-64 chips.
|
||||
#[cfg(not(target_feature = "sse4.1"))]
|
||||
unsafe {
|
||||
use core::arch::x86_64::{_mm_mul_epu32, _mm_shuffle_epi32};
|
||||
let a = _mm_and_si128(
|
||||
_mm_mul_epu32(self.v, other.v),
|
||||
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
|
||||
);
|
||||
let b = _mm_and_si128(
|
||||
_mm_mul_epu32(
|
||||
_mm_shuffle_epi32(self.v, 0b11_11_01_01),
|
||||
_mm_shuffle_epi32(other.v, 0b11_11_01_01),
|
||||
),
|
||||
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
|
||||
);
|
||||
Int4 {
|
||||
v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::MulAssign for Int4 {
|
||||
#[inline(always)]
|
||||
fn mul_assign(&mut self, other: Self) {
|
||||
*self = *self * other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::AddAssign for Int4 {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
*self = Int4 {
|
||||
v: unsafe { _mm_add_epi32(self.v, other.v) },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAnd for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitand(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_and_si128(self.v, other.v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAndAssign for Int4 {
|
||||
fn bitand_assign(&mut self, other: Self) {
|
||||
*self = *self & other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitOr for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_or_si128(self.v, other.v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitOrAssign for Int4 {
|
||||
fn bitor_assign(&mut self, other: Self) {
|
||||
*self = *self | other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXor for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitxor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_xor_si128(self.v, other.v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXorAssign for Int4 {
|
||||
#[inline(always)]
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = *self ^ other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shl<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shl(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_sll_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shr(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: unsafe { _mm_srl_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[u32; 4]> for Int4 {
|
||||
#[inline(always)]
|
||||
fn from(v: [u32; 4]) -> Self {
|
||||
Int4 {
|
||||
v: unsafe { std::mem::transmute(v) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn from_array() {
|
||||
let a = Int4::from([1, 2, 3, 4]);
|
||||
assert_eq!(a.get(0), 1);
|
||||
assert_eq!(a.get(1), 2);
|
||||
assert_eq!(a.get(2), 3);
|
||||
assert_eq!(a.get(3), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shr() {
|
||||
let a = Int4::from([0xffffffff; 4]) >> 16;
|
||||
assert_eq!(a.get(0), 0x0000ffff);
|
||||
assert_eq!(a.get(1), 0x0000ffff);
|
||||
assert_eq!(a.get(2), 0x0000ffff);
|
||||
assert_eq!(a.get(3), 0x0000ffff);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shl() {
|
||||
let a = Int4::from([0xffffffff; 4]) << 16;
|
||||
assert_eq!(a.get(0), 0xffff0000);
|
||||
assert_eq!(a.get(1), 0xffff0000);
|
||||
assert_eq!(a.get(2), 0xffff0000);
|
||||
assert_eq!(a.get(3), 0xffff0000);
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub(crate) use sse::Int4;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Fallback
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
// #[cfg(not(all(target_arch = "x86_64", target_feature = "sse4.1")))]
|
||||
pub(crate) mod fallback {
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[repr(align(16))]
|
||||
pub(crate) struct Int4 {
|
||||
v: [u32; 4],
|
||||
}
|
||||
|
||||
impl Int4 {
|
||||
pub fn zero() -> Int4 {
|
||||
Int4 { v: [0, 0, 0, 0] }
|
||||
}
|
||||
|
||||
/// Converts the full range of a 32 bit integer to a float in [0, 1).
|
||||
pub fn to_norm_floats(self) -> [f32; 4] {
|
||||
const ONE_OVER_32BITS: f32 = 1.0 / (1u64 << 32) as f32;
|
||||
[
|
||||
self.v[0] as f32 * ONE_OVER_32BITS,
|
||||
self.v[1] as f32 * ONE_OVER_32BITS,
|
||||
self.v[2] as f32 * ONE_OVER_32BITS,
|
||||
self.v[3] as f32 * ONE_OVER_32BITS,
|
||||
]
|
||||
}
|
||||
|
||||
pub fn reverse_bits(self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0].reverse_bits(),
|
||||
self.v[1].reverse_bits(),
|
||||
self.v[2].reverse_bits(),
|
||||
self.v[3].reverse_bits(),
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Mul for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
fn mul(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0].wrapping_mul(other.v[0]),
|
||||
self.v[1].wrapping_mul(other.v[1]),
|
||||
self.v[2].wrapping_mul(other.v[2]),
|
||||
self.v[3].wrapping_mul(other.v[3]),
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::MulAssign for Int4 {
|
||||
fn mul_assign(&mut self, other: Self) {
|
||||
*self = *self * other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::AddAssign for Int4 {
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
*self = Int4 {
|
||||
v: [
|
||||
self.v[0].wrapping_add(other.v[0]),
|
||||
self.v[1].wrapping_add(other.v[1]),
|
||||
self.v[2].wrapping_add(other.v[2]),
|
||||
self.v[3].wrapping_add(other.v[3]),
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAnd for Int4 {
|
||||
type Output = Int4;
|
||||
fn bitand(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] & other.v[0],
|
||||
self.v[1] & other.v[1],
|
||||
self.v[2] & other.v[2],
|
||||
self.v[3] & other.v[3],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitAndAssign for Int4 {
|
||||
fn bitand_assign(&mut self, other: Self) {
|
||||
*self = *self & other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitOr for Int4 {
|
||||
type Output = Int4;
|
||||
fn bitor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] | other.v[0],
|
||||
self.v[1] | other.v[1],
|
||||
self.v[2] | other.v[2],
|
||||
self.v[3] | other.v[3],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitOrAssign for Int4 {
|
||||
fn bitor_assign(&mut self, other: Self) {
|
||||
*self = *self | other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXor for Int4 {
|
||||
type Output = Int4;
|
||||
fn bitxor(self, other: Self) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] ^ other.v[0],
|
||||
self.v[1] ^ other.v[1],
|
||||
self.v[2] ^ other.v[2],
|
||||
self.v[3] ^ other.v[3],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::BitXorAssign for Int4 {
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = *self ^ other;
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shl<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shl(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] << other,
|
||||
self.v[1] << other,
|
||||
self.v[2] << other,
|
||||
self.v[3] << other,
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Shr<i32> for Int4 {
|
||||
type Output = Int4;
|
||||
|
||||
#[inline(always)]
|
||||
fn shr(self, other: i32) -> Int4 {
|
||||
Int4 {
|
||||
v: [
|
||||
self.v[0] >> other,
|
||||
self.v[1] >> other,
|
||||
self.v[2] >> other,
|
||||
self.v[3] >> other,
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[u32; 4]> for Int4 {
|
||||
fn from(v: [u32; 4]) -> Self {
|
||||
Int4 { v }
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
pub(crate) use fallback::Int4;
|
Loading…
Reference in New Issue
Block a user