Merge branch 'master' into micropoly

This commit is contained in:
Nathan Vegdahl 2021-05-11 13:38:34 -07:00
commit 927ac87e75
12 changed files with 16 additions and 1918 deletions

26
Cargo.lock generated
View File

@ -217,9 +217,9 @@ dependencies = [
[[package]]
name = "kioku"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30217b27028ccf61ba75afa4f9bb7be8e4a66780d7882fdaacd9ea8e0cdede26"
checksum = "6d967239fdf2a2467247931a3055f627af65358fc99b534b5dc1ef8520384999"
[[package]]
name = "lazy_static"
@ -256,9 +256,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.3.4"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "nom"
@ -373,7 +373,7 @@ dependencies = [
"png_encode_mini",
"rustc-serialize",
"scoped_threadpool",
"sobol",
"sobol_burley",
"spectral_upsampling",
"time",
]
@ -554,18 +554,18 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.7"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2"
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
dependencies = [
"bitflags",
]
[[package]]
name = "regex-syntax"
version = "0.6.23"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "remove_dir_all"
@ -613,12 +613,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
[[package]]
name = "sobol"
name = "sobol_burley"
version = "0.1.0"
dependencies = [
"bencher",
"rand 0.6.5",
]
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92317634ef0e1ece7bd4031abc3186df3781d0bf2b05682796655200aef4c965"
[[package]]
name = "spectral_upsampling"

View File

@ -6,7 +6,6 @@ members = [
"sub_crates/data_tree",
"sub_crates/halton",
"sub_crates/math3d",
"sub_crates/sobol",
"sub_crates/spectral_upsampling",
]
@ -32,6 +31,7 @@ nom = "5"
num_cpus = "1.8"
openexr = "0.7"
kioku = "0.3"
sobol_burley = "0.1"
png_encode_mini = "0.1.2"
rustc-serialize = "0.3"
scoped_threadpool = "0.1"
@ -58,8 +58,5 @@ path = "sub_crates/halton"
[dependencies.math3d]
path = "sub_crates/math3d"
[dependencies.sobol]
path = "sub_crates/sobol"
[dependencies.spectral_upsampling]
path = "sub_crates/spectral_upsampling"

View File

@ -706,9 +706,9 @@ fn get_sample_4d(
let seed = pixel_co.0 ^ (pixel_co.1 << 16) ^ seed.wrapping_mul(0x736caf6f);
match dimension_set {
ds if ds < sobol::MAX_DIMENSION_SET as u32 => {
ds if ds < sobol_burley::MAX_DIMENSION_SET as u32 => {
// Sobol sampling.
let n4 = sobol::sample_4d(i, ds, seed);
let n4 = sobol_burley::sample_4d(i, ds, seed);
(n4[0], n4[1], n4[2], n4[3])
}
ds => {

View File

@ -102,7 +102,7 @@ pub fn encode(xyz: (f32, f32, f32)) -> u32 {
let v = (((9.0 * V_SCALE) * xyz.1 / s) + 0.5).max(1.0).min(255.0);
((u as u32) << 8) | (v as u32)
};
}
let y_bits = xyz.1.to_bits() & 0x7fffffff;

View File

@ -1,19 +0,0 @@
[package]
name = "sobol"
version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
edition = "2018"
license = "MIT, Apache 2.0"
build = "build.rs"
[lib]
name = "sobol"
path = "src/lib.rs"
[dev-dependencies]
rand = "0.6"
bencher = "0.1.5"
[[bench]]
name = "bench"
harness = false

View File

@ -1,18 +0,0 @@
## Direction numbers and adapted code
The Sobol direction numbers under `direction_numbers/` are under their own license. See `direction_numbers/LICENSE.txt` for details.
Some of the code in `build.rs` is adapted to Rust from another source. The license for that code is embedded in the comments in that file.
## Remaining code
Copyright (c) 2020 Nathan Vegdahl
This project is licensed under either of
* MIT license (licenses/MIT.txt or http://opensource.org/licenses/MIT)
* Apache License, Version 2.0, (licenses/Apache-2.0.txt or http://www.apache.org/licenses/LICENSE-2.0)
at your option.

View File

@ -1,34 +0,0 @@
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
use rand::{rngs::SmallRng, FromEntropy, Rng};
use sobol::sample_4d;
//----
fn gen_1000_samples(bench: &mut Bencher) {
bench.iter(|| {
for i in 0..1000u32 {
black_box(sample_4d(i, 0, 1234567890));
}
});
}
fn gen_1000_samples_incoherent(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let s = rng.gen::<u32>();
let d = rng.gen::<u32>();
let seed = rng.gen::<u32>();
for i in 0..1000u32 {
black_box(sample_4d(
s.wrapping_add(i).wrapping_mul(512),
d.wrapping_add(i).wrapping_mul(97) % 32,
seed,
));
}
});
}
//----
benchmark_group!(benches, gen_1000_samples, gen_1000_samples_incoherent,);
benchmark_main!(benches);

View File

@ -1,190 +0,0 @@
//! This file generates the Sobol direction vectors used by this crate's
//! Sobol sequence.
use std::{env, fs::File, io::Write, path::Path};
/// How many components to generate.
const NUM_DIMENSIONS: usize = 128;
/// What file to generate the numbers from.
const DIRECTION_NUMBERS_TEXT: &str = include_str!("direction_numbers/new-joe-kuo-6.1024.txt");
fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("vectors.inc");
let mut f = File::create(&dest_path).unwrap();
// Init direction vectors.
let vectors = generate_direction_vectors(NUM_DIMENSIONS);
// Write dimensions limit.
f.write_all(format!("const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes())
.unwrap();
// Write the vectors.
// We write them in a rather atypical way because of how the library
// uses them. First, we interleave the numbers of each set of four
// dimensions, for SIMD evaluation. Second, each number is written
// with reversed bits, to avoid needing to reverse them before scrambling.
f.write_all(format!("const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", SOBOL_BITS).as_bytes())
.unwrap();
for d4 in vectors.chunks_exact(4) {
f.write_all(" [\n".as_bytes()).unwrap();
for ((a, b), (c, d)) in d4[0]
.iter()
.zip(d4[1].iter())
.zip(d4[2].iter().zip(d4[3].iter()))
{
f.write_all(
format!(
" [0x{:08x}, 0x{:08x}, 0x{:08x}, 0x{:08x}],\n",
a.reverse_bits(),
b.reverse_bits(),
c.reverse_bits(),
d.reverse_bits()
)
.as_bytes(),
)
.unwrap();
}
f.write_all(" ],\n".as_bytes()).unwrap();
}
f.write_all("];\n".as_bytes()).unwrap();
}
//======================================================================
// The following is adapted from the code on this webpage:
//
// http://web.maths.unsw.edu.au/~fkuo/sobol/
//
// From these papers:
//
// * S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing Sobol's
// quasirandom sequence generator, ACM Trans. Math. Softw. 29,
// 49-57 (2003)
//
// * S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
// two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654 (2008)
//
// The adapted code is under the following license:
//
// Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the
// distribution.
//
// * Neither the names of the copyright holders nor the names of the
// University of New South Wales and the University of Waikato
// and its contributors may be used to endorse or promote products
// derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
type SobolInt = u32;
const SOBOL_BITS: usize = std::mem::size_of::<SobolInt>() * 8;
pub fn generate_direction_vectors(dimensions: usize) -> Vec<[SobolInt; SOBOL_BITS]> {
let mut vectors = Vec::new();
// Calculate first dimension, which is just the van der Corput sequence.
let mut dim_0 = [0 as SobolInt; SOBOL_BITS];
for i in 0..SOBOL_BITS {
dim_0[i] = 1 << (SOBOL_BITS - 1 - i);
}
vectors.push(dim_0);
// Do the rest of the dimensions.
let mut lines = DIRECTION_NUMBERS_TEXT.lines();
for _ in 1..dimensions {
let mut v = [0 as SobolInt; SOBOL_BITS];
// Get data from the next valid line from the direction numbers text
// file.
let (s, a, m) = loop {
if let Ok((a, m)) = parse_direction_numbers(
lines
.next()
.expect("Not enough direction numbers for the requested number of dimensions."),
) {
break (m.len(), a, m);
}
};
// Generate the direction numbers for this dimension.
if SOBOL_BITS <= s as usize {
for i in 0..SOBOL_BITS {
v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
}
} else {
for i in 0..(s as usize) {
v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
}
for i in (s as usize)..SOBOL_BITS {
v[i] = v[i - s as usize] ^ (v[i - s as usize] >> s);
for k in 1..s {
v[i] ^= ((a >> (s - 1 - k)) & 1) as SobolInt * v[i - k as usize];
}
}
}
vectors.push(v);
}
vectors
}
/// Parses the direction numbers from a single line of the direction numbers
/// text file. Returns the `a` and `m` parts.
fn parse_direction_numbers(text: &str) -> Result<(u32, Vec<u32>), Box<dyn std::error::Error>> {
let mut numbers = text.split_whitespace();
if numbers.clone().count() < 4 || text.starts_with("#") {
return Err(Box::new(ParseError(())));
}
// Skip the first two numbers, which are just the dimension and the count
// of direction numbers for this dimension.
let _ = numbers.next().unwrap().parse::<u32>()?;
let _ = numbers.next().unwrap().parse::<u32>()?;
let a = numbers.next().unwrap().parse::<u32>()?;
let mut m = Vec::new();
for n in numbers {
m.push(n.parse::<u32>()?);
}
Ok((a, m))
}
#[derive(Debug, Copy, Clone)]
struct ParseError(());
impl std::error::Error for ParseError {}
impl std::fmt::Display for ParseError {
fn fmt(&self, _f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
Ok(())
}
}

View File

@ -1,43 +0,0 @@
The data in this folder is from the website
http://web.maths.unsw.edu.au/~fkuo/sobol/
From these papers:
* S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing
Sobol's quasirandom sequence generator, ACM Trans. Math. Softw. 29,
49-57 (2003)
* S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654
(2008)
Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
* Neither the names of the copyright holders nor the names of the
University of New South Wales and the University of Waikato
and its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

View File

@ -1,124 +0,0 @@
//! A seedable, Owen-scrambled Sobol sequence.
//!
//! This is based on the paper "Practical Hash-based Owen Scrambling"
//! by Brent Burley, but using the scramble function from
//! https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
//! in place of the Laine-Karras function used in the paper, and with a
//! larger set of direction numbers due to Kuo et al.
//!
//! This implementation is limited to `2^16` samples, and will loop back
//! to the start of the sequence after that limit.
#![allow(clippy::unreadable_literal)]
mod wide;
use wide::Int4;
// This `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
// See the build.rs file for how this included file is generated.
include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
pub const MAX_DIMENSION_SET: u32 = MAX_DIMENSION / 4;
/// Compute four dimensions of a single sample in the Sobol sequence.
///
/// `sample_index` specifies which sample in the Sobol sequence to compute.
///
/// `dimension_set` specifies which four dimensions to compute. `0` yields the
/// first four dimensions, `1` the second four dimensions, and so on.
///
/// `seed` produces statistically independent Sobol sequences. Passing two
/// different seeds will produce two different sequences that are only randomly
/// associated, with no stratification or correlation between them.
#[inline]
pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
assert!(dimension_set < MAX_DIMENSION_SET);
let vecs = &REV_VECTORS[dimension_set as usize];
// Shuffle the index using the given seed to produce a unique statistically
// independent Sobol sequence.
let shuffled_rev_index = scramble(sample_index.reverse_bits(), seed);
// Compute the Sobol sample with reversed bits.
let mut sobol_rev = Int4::zero();
let mut index = shuffled_rev_index & 0xffff0000; // Only use the top 16 bits.
let mut i = 0;
while index != 0 {
let j = index.leading_zeros();
sobol_rev ^= vecs[(i + j) as usize].into();
i += j + 1;
index <<= j;
index <<= 1;
}
// Do Owen scrambling on the reversed-bits Sobol sample.
let sobol_owen_rev = scramble_int4(sobol_rev, dimension_set ^ seed);
// Un-reverse the bits and convert to floating point in [0, 1).
sobol_owen_rev.reverse_bits().to_norm_floats()
}
//----------------------------------------------------------------------
/// Scrambles `n` using the hash function from
/// https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
///
/// This is equivalent to Owen scrambling, but on reversed bits.
#[inline(always)]
fn scramble(mut n: u32, scramble: u32) -> u32 {
let scramble = hash(scramble);
n ^= n.wrapping_mul(0x3d20adea);
n = n.wrapping_add(scramble);
n = n.wrapping_mul((scramble >> 16) | 1);
n ^= n.wrapping_mul(0x05526c56);
n ^= n.wrapping_mul(0x53a22864);
n
}
/// Same as `scramble()`, except does it on 4 integers at a time.
#[inline(always)]
fn scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
let scramble = hash_int4([scramble; 4].into());
n ^= n * [0x3d20adea; 4].into();
n += scramble;
n *= (scramble >> 16) | [1; 4].into();
n ^= n * [0x05526c56; 4].into();
n ^= n * [0x53a22864; 4].into();
n
}
/// A good 32-bit hash function.
/// From https://github.com/skeeto/hash-prospector
#[inline(always)]
fn hash(n: u32) -> u32 {
let mut hash = n ^ 0x79c68e4a;
hash ^= hash >> 16;
hash = hash.wrapping_mul(0x7feb352d);
hash ^= hash >> 15;
hash = hash.wrapping_mul(0x846ca68b);
hash ^= hash >> 16;
hash
}
/// Same as `hash()` except performs hashing on four numbers at once.
///
/// Each of the four numbers gets a different hash, so even if all input
/// numbers are the same, the outputs will still be different for each of them.
#[inline(always)]
fn hash_int4(n: Int4) -> Int4 {
let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
hash ^= hash >> 16;
hash *= [0x7feb352d; 4].into();
hash ^= hash >> 15;
hash *= [0x846ca68b; 4].into();
hash ^= hash >> 16;
hash
}

View File

@ -1,445 +0,0 @@
//--------------------------------------------------------------------------
// x86/64 SSE
#[cfg(target_arch = "x86_64")]
// #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
pub(crate) mod sse {
use core::arch::x86_64::{
__m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
_mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
_mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
};
#[derive(Debug, Copy, Clone)]
pub(crate) struct Int4 {
v: __m128i,
}
impl Int4 {
#[inline(always)]
pub fn zero() -> Int4 {
Int4 {
v: unsafe { _mm_setzero_si128() },
}
}
/// For testing.
#[allow(dead_code)]
fn get(self, i: usize) -> u32 {
let n: [u32; 4] = unsafe { std::mem::transmute(self) };
n[i]
}
/// Converts the full range of a 32 bit integer to a float in [0, 1).
#[inline(always)]
pub fn to_norm_floats(self) -> [f32; 4] {
const ONE_OVER_31BITS: f32 = 1.0 / (1u64 << 31) as f32;
let n4 = unsafe {
_mm_mul_ps(
_mm_cvtepi32_ps(_mm_srli_epi32(self.v, 1)),
_mm_set1_ps(ONE_OVER_31BITS),
)
};
unsafe { std::mem::transmute(n4) }
}
#[inline]
pub fn reverse_bits(self) -> Int4 {
let mut n = self.v;
unsafe {
let a = _mm_slli_epi32(n, 16);
let b = _mm_srli_epi32(n, 16);
n = _mm_or_si128(a, b);
//----
let a = _mm_and_si128(
_mm_slli_epi32(n, 8),
_mm_set1_epi32(std::mem::transmute(0xff00ff00u32)),
);
let b = _mm_and_si128(
_mm_srli_epi32(n, 8),
_mm_set1_epi32(std::mem::transmute(0x00ff00ffu32)),
);
n = _mm_or_si128(a, b);
//----
let a = _mm_and_si128(
_mm_slli_epi32(n, 4),
_mm_set1_epi32(std::mem::transmute(0xf0f0f0f0u32)),
);
let b = _mm_and_si128(
_mm_srli_epi32(n, 4),
_mm_set1_epi32(std::mem::transmute(0x0f0f0f0fu32)),
);
n = _mm_or_si128(a, b);
//----
let a = _mm_and_si128(
_mm_slli_epi32(n, 2),
_mm_set1_epi32(std::mem::transmute(0xccccccccu32)),
);
let b = _mm_and_si128(
_mm_srli_epi32(n, 2),
_mm_set1_epi32(std::mem::transmute(0x33333333u32)),
);
n = _mm_or_si128(a, b);
//----
let a = _mm_and_si128(
_mm_slli_epi32(n, 1),
_mm_set1_epi32(std::mem::transmute(0xaaaaaaaau32)),
);
let b = _mm_and_si128(
_mm_srli_epi32(n, 1),
_mm_set1_epi32(std::mem::transmute(0x55555555u32)),
);
n = _mm_or_si128(a, b);
Int4 { v: n }
}
}
}
impl std::ops::Mul for Int4 {
type Output = Int4;
#[inline(always)]
fn mul(self, other: Self) -> Int4 {
// This only works with SSE 4.1 support.
#[cfg(target_feature = "sse4.1")]
unsafe {
use core::arch::x86_64::_mm_mullo_epi32;
Int4 {
v: _mm_mullo_epi32(self.v, other.v),
}
}
// This works on all x86-64 chips.
#[cfg(not(target_feature = "sse4.1"))]
unsafe {
use core::arch::x86_64::{_mm_mul_epu32, _mm_shuffle_epi32};
let a = _mm_and_si128(
_mm_mul_epu32(self.v, other.v),
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
);
let b = _mm_and_si128(
_mm_mul_epu32(
_mm_shuffle_epi32(self.v, 0b11_11_01_01),
_mm_shuffle_epi32(other.v, 0b11_11_01_01),
),
_mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
);
Int4 {
v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)),
}
}
}
}
impl std::ops::MulAssign for Int4 {
#[inline(always)]
fn mul_assign(&mut self, other: Self) {
*self = *self * other;
}
}
impl std::ops::AddAssign for Int4 {
#[inline(always)]
fn add_assign(&mut self, other: Self) {
*self = Int4 {
v: unsafe { _mm_add_epi32(self.v, other.v) },
};
}
}
impl std::ops::BitAnd for Int4 {
type Output = Int4;
#[inline(always)]
fn bitand(self, other: Self) -> Int4 {
Int4 {
v: unsafe { _mm_and_si128(self.v, other.v) },
}
}
}
impl std::ops::BitAndAssign for Int4 {
fn bitand_assign(&mut self, other: Self) {
*self = *self & other;
}
}
impl std::ops::BitOr for Int4 {
type Output = Int4;
#[inline(always)]
fn bitor(self, other: Self) -> Int4 {
Int4 {
v: unsafe { _mm_or_si128(self.v, other.v) },
}
}
}
impl std::ops::BitOrAssign for Int4 {
fn bitor_assign(&mut self, other: Self) {
*self = *self | other;
}
}
impl std::ops::BitXor for Int4 {
type Output = Int4;
#[inline(always)]
fn bitxor(self, other: Self) -> Int4 {
Int4 {
v: unsafe { _mm_xor_si128(self.v, other.v) },
}
}
}
impl std::ops::BitXorAssign for Int4 {
#[inline(always)]
fn bitxor_assign(&mut self, other: Self) {
*self = *self ^ other;
}
}
impl std::ops::Shl<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shl(self, other: i32) -> Int4 {
Int4 {
v: unsafe { _mm_sll_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
}
}
}
impl std::ops::Shr<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shr(self, other: i32) -> Int4 {
Int4 {
v: unsafe { _mm_srl_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
}
}
}
impl From<[u32; 4]> for Int4 {
#[inline(always)]
fn from(v: [u32; 4]) -> Self {
Int4 {
v: unsafe { std::mem::transmute(v) },
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_array() {
let a = Int4::from([1, 2, 3, 4]);
assert_eq!(a.get(0), 1);
assert_eq!(a.get(1), 2);
assert_eq!(a.get(2), 3);
assert_eq!(a.get(3), 4);
}
#[test]
fn shr() {
let a = Int4::from([0xffffffff; 4]) >> 16;
assert_eq!(a.get(0), 0x0000ffff);
assert_eq!(a.get(1), 0x0000ffff);
assert_eq!(a.get(2), 0x0000ffff);
assert_eq!(a.get(3), 0x0000ffff);
}
#[test]
fn shl() {
let a = Int4::from([0xffffffff; 4]) << 16;
assert_eq!(a.get(0), 0xffff0000);
assert_eq!(a.get(1), 0xffff0000);
assert_eq!(a.get(2), 0xffff0000);
assert_eq!(a.get(3), 0xffff0000);
}
}
}
#[cfg(target_arch = "x86_64")]
pub(crate) use sse::Int4;
//--------------------------------------------------------------------------
// Fallback
#[cfg(not(target_arch = "x86_64"))]
// #[cfg(not(all(target_arch = "x86_64", target_feature = "sse4.1")))]
pub(crate) mod fallback {
#[derive(Debug, Copy, Clone)]
#[repr(align(16))]
pub(crate) struct Int4 {
v: [u32; 4],
}
impl Int4 {
pub fn zero() -> Int4 {
Int4 { v: [0, 0, 0, 0] }
}
/// Converts the full range of a 32 bit integer to a float in [0, 1).
pub fn to_norm_floats(self) -> [f32; 4] {
const ONE_OVER_32BITS: f32 = 1.0 / (1u64 << 32) as f32;
[
self.v[0] as f32 * ONE_OVER_32BITS,
self.v[1] as f32 * ONE_OVER_32BITS,
self.v[2] as f32 * ONE_OVER_32BITS,
self.v[3] as f32 * ONE_OVER_32BITS,
]
}
pub fn reverse_bits(self) -> Int4 {
Int4 {
v: [
self.v[0].reverse_bits(),
self.v[1].reverse_bits(),
self.v[2].reverse_bits(),
self.v[3].reverse_bits(),
],
}
}
}
impl std::ops::Mul for Int4 {
type Output = Int4;
fn mul(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0].wrapping_mul(other.v[0]),
self.v[1].wrapping_mul(other.v[1]),
self.v[2].wrapping_mul(other.v[2]),
self.v[3].wrapping_mul(other.v[3]),
],
}
}
}
impl std::ops::MulAssign for Int4 {
fn mul_assign(&mut self, other: Self) {
*self = *self * other;
}
}
impl std::ops::AddAssign for Int4 {
fn add_assign(&mut self, other: Self) {
*self = Int4 {
v: [
self.v[0].wrapping_add(other.v[0]),
self.v[1].wrapping_add(other.v[1]),
self.v[2].wrapping_add(other.v[2]),
self.v[3].wrapping_add(other.v[3]),
],
};
}
}
impl std::ops::BitAnd for Int4 {
type Output = Int4;
fn bitand(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] & other.v[0],
self.v[1] & other.v[1],
self.v[2] & other.v[2],
self.v[3] & other.v[3],
],
}
}
}
impl std::ops::BitAndAssign for Int4 {
fn bitand_assign(&mut self, other: Self) {
*self = *self & other;
}
}
impl std::ops::BitOr for Int4 {
type Output = Int4;
fn bitor(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] | other.v[0],
self.v[1] | other.v[1],
self.v[2] | other.v[2],
self.v[3] | other.v[3],
],
}
}
}
impl std::ops::BitOrAssign for Int4 {
fn bitor_assign(&mut self, other: Self) {
*self = *self | other;
}
}
impl std::ops::BitXor for Int4 {
type Output = Int4;
fn bitxor(self, other: Self) -> Int4 {
Int4 {
v: [
self.v[0] ^ other.v[0],
self.v[1] ^ other.v[1],
self.v[2] ^ other.v[2],
self.v[3] ^ other.v[3],
],
}
}
}
impl std::ops::BitXorAssign for Int4 {
fn bitxor_assign(&mut self, other: Self) {
*self = *self ^ other;
}
}
impl std::ops::Shl<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shl(self, other: i32) -> Int4 {
Int4 {
v: [
self.v[0] << other,
self.v[1] << other,
self.v[2] << other,
self.v[3] << other,
],
}
}
}
impl std::ops::Shr<i32> for Int4 {
type Output = Int4;
#[inline(always)]
fn shr(self, other: i32) -> Int4 {
Int4 {
v: [
self.v[0] >> other,
self.v[1] >> other,
self.v[2] >> other,
self.v[3] >> other,
],
}
}
}
impl From<[u32; 4]> for Int4 {
fn from(v: [u32; 4]) -> Self {
Int4 { v }
}
}
}
#[cfg(not(target_arch = "x86_64"))]
pub(crate) use fallback::Int4;