Move sobol crate outside of Psychopath into its own repo.

2021-05-11 13:35:17 -07:00 · 2021-05-11 13:35:17 -07:00 · 706902dc8e
commit 706902dc8e
parent 70fba19361
11 changed files with 15 additions and 1917 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -213,9 +213,9 @@ dependencies = [

 [[package]]
 name = "kioku"
-version = "0.3.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30217b27028ccf61ba75afa4f9bb7be8e4a66780d7882fdaacd9ea8e0cdede26"
+checksum = "6d967239fdf2a2467247931a3055f627af65358fc99b534b5dc1ef8520384999"

 [[package]]
 name = "lazy_static"
@ -252,9 +252,9 @@ dependencies = [

 [[package]]
 name = "memchr"
-version = "2.3.4"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
+checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"

 [[package]]
 name = "nom"
@ -368,7 +368,7 @@ dependencies = [
 "png_encode_mini",
 "rustc-serialize",
 "scoped_threadpool",
- "sobol",
+ "sobol_burley",
 "spectral_upsampling",
 "time",
 ]
@ -549,18 +549,18 @@ dependencies = [

 [[package]]
 name = "redox_syscall"
-version = "0.2.7"
+version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2"
+checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
 dependencies = [
 "bitflags",
 ]

 [[package]]
 name = "regex-syntax"
-version = "0.6.23"
+version = "0.6.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"

 [[package]]
 name = "remove_dir_all"
@ -608,12 +608,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"

 [[package]]
-name = "sobol"
+name = "sobol_burley"
 version = "0.1.0"
-dependencies = [
- "bencher",
- "rand 0.6.5",
-]
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92317634ef0e1ece7bd4031abc3186df3781d0bf2b05682796655200aef4c965"

 [[package]]
 name = "spectral_upsampling"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -5,7 +5,6 @@ members = [
    "sub_crates/compact",
    "sub_crates/halton",
    "sub_crates/math3d",
-    "sub_crates/sobol",
    "sub_crates/spectral_upsampling",
 ]

@ -31,6 +30,7 @@ nom = "5"
 num_cpus = "1.8"
 openexr = "0.7"
 kioku = "0.3"
+sobol_burley = "0.1"
 png_encode_mini = "0.1.2"
 rustc-serialize = "0.3"
 scoped_threadpool = "0.1"
@ -54,8 +54,5 @@ path = "sub_crates/halton"
 [dependencies.math3d]
 path = "sub_crates/math3d"

-[dependencies.sobol]
-path = "sub_crates/sobol"
-
 [dependencies.spectral_upsampling]
 path = "sub_crates/spectral_upsampling"
--- a/src/renderer.rs
+++ b/src/renderer.rs
@ -706,9 +706,9 @@ fn get_sample_4d(
    let seed = pixel_co.0 ^ (pixel_co.1 << 16) ^ seed.wrapping_mul(0x736caf6f);

    match dimension_set {
-        ds if ds < sobol::MAX_DIMENSION_SET as u32 => {
+        ds if ds < sobol_burley::MAX_DIMENSION_SET as u32 => {
            // Sobol sampling.
-            let n4 = sobol::sample_4d(i, ds, seed);
+            let n4 = sobol_burley::sample_4d(i, ds, seed);
            (n4[0], n4[1], n4[2], n4[3])
        }
        ds => {
--- a/sub_crates/sobol/Cargo.toml
+++ b/sub_crates/sobol/Cargo.toml
@ -1,19 +0,0 @@
-[package]
-name = "sobol"
-version = "0.1.0"
-authors = ["Nathan Vegdahl <cessen@cessen.com>"]
-edition = "2018"
-license = "MIT, Apache 2.0"
-build = "build.rs"
-
-[lib]
-name = "sobol"
-path = "src/lib.rs"
-
-[dev-dependencies]
-rand = "0.6"
-bencher = "0.1.5"
-
-[[bench]]
-name = "bench"
-harness = false
--- a/sub_crates/sobol/LICENSE.md
+++ b/sub_crates/sobol/LICENSE.md
@ -1,18 +0,0 @@
-## Direction numbers and adapted code
-
-The Sobol direction numbers under `direction_numbers/` are under their own license.  See `direction_numbers/LICENSE.txt` for details.
-
-Some of the code in `build.rs` is adapted to Rust from another source.  The license for that code is embedded in the comments in that file.
-
-
-
-## Remaining code
-
-Copyright (c) 2020 Nathan Vegdahl
-
-This project is licensed under either of
-
-* MIT license (licenses/MIT.txt or http://opensource.org/licenses/MIT)
-* Apache License, Version 2.0, (licenses/Apache-2.0.txt or http://www.apache.org/licenses/LICENSE-2.0)
-
-at your option.
--- a/sub_crates/sobol/benches/bench.rs
+++ b/sub_crates/sobol/benches/bench.rs
@ -1,34 +0,0 @@
-use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
-use rand::{rngs::SmallRng, FromEntropy, Rng};
-use sobol::sample_4d;
-
-//----
-
-fn gen_1000_samples(bench: &mut Bencher) {
-    bench.iter(|| {
-        for i in 0..1000u32 {
-            black_box(sample_4d(i, 0, 1234567890));
-        }
-    });
-}
-
-fn gen_1000_samples_incoherent(bench: &mut Bencher) {
-    let mut rng = SmallRng::from_entropy();
-    bench.iter(|| {
-        let s = rng.gen::<u32>();
-        let d = rng.gen::<u32>();
-        let seed = rng.gen::<u32>();
-        for i in 0..1000u32 {
-            black_box(sample_4d(
-                s.wrapping_add(i).wrapping_mul(512),
-                d.wrapping_add(i).wrapping_mul(97) % 32,
-                seed,
-            ));
-        }
-    });
-}
-
-//----
-
-benchmark_group!(benches, gen_1000_samples, gen_1000_samples_incoherent,);
-benchmark_main!(benches);
--- a/sub_crates/sobol/build.rs
+++ b/sub_crates/sobol/build.rs
@ -1,190 +0,0 @@
-//! This file generates the Sobol direction vectors used by this crate's
-//! Sobol sequence.
-
-use std::{env, fs::File, io::Write, path::Path};
-
-/// How many components to generate.
-const NUM_DIMENSIONS: usize = 128;
-
-/// What file to generate the numbers from.
-const DIRECTION_NUMBERS_TEXT: &str = include_str!("direction_numbers/new-joe-kuo-6.1024.txt");
-
-fn main() {
-    let out_dir = env::var("OUT_DIR").unwrap();
-    let dest_path = Path::new(&out_dir).join("vectors.inc");
-    let mut f = File::create(&dest_path).unwrap();
-
-    // Init direction vectors.
-    let vectors = generate_direction_vectors(NUM_DIMENSIONS);
-
-    // Write dimensions limit.
-    f.write_all(format!("const MAX_DIMENSION: u32 = {};\n", NUM_DIMENSIONS).as_bytes())
-        .unwrap();
-
-    // Write the vectors.
-    // We write them in a rather atypical way because of how the library
-    // uses them.  First, we interleave the numbers of each set of four
-    // dimensions, for SIMD evaluation.  Second, each number is written
-    // with reversed bits, to avoid needing to reverse them before scrambling.
-    f.write_all(format!("const REV_VECTORS: &[[[u{0}; 4]; {0}]] = &[\n", SOBOL_BITS).as_bytes())
-        .unwrap();
-    for d4 in vectors.chunks_exact(4) {
-        f.write_all("  [\n".as_bytes()).unwrap();
-        for ((a, b), (c, d)) in d4[0]
-            .iter()
-            .zip(d4[1].iter())
-            .zip(d4[2].iter().zip(d4[3].iter()))
-        {
-            f.write_all(
-                format!(
-                    "    [0x{:08x}, 0x{:08x}, 0x{:08x}, 0x{:08x}],\n",
-                    a.reverse_bits(),
-                    b.reverse_bits(),
-                    c.reverse_bits(),
-                    d.reverse_bits()
-                )
-                .as_bytes(),
-            )
-            .unwrap();
-        }
-        f.write_all("  ],\n".as_bytes()).unwrap();
-    }
-    f.write_all("];\n".as_bytes()).unwrap();
-}
-
-//======================================================================
-
-// The following is adapted from the code on this webpage:
-//
-// http://web.maths.unsw.edu.au/~fkuo/sobol/
-//
-// From these papers:
-//
-//     * S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing Sobol's
-//       quasirandom sequence generator, ACM Trans. Math. Softw. 29,
-//       49-57 (2003)
-//
-//     * S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
-//       two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654 (2008)
-//
-// The adapted code is under the following license:
-//
-//     Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
-//     All rights reserved.
-//
-//     Redistribution and use in source and binary forms, with or without
-//     modification, are permitted provided that the following conditions are
-//     met:
-//
-//       * Redistributions of source code must retain the above copyright
-//         notice, this list of conditions and the following disclaimer.
-//
-//       * Redistributions in binary form must reproduce the above copyright
-//         notice, this list of conditions and the following disclaimer in the
-//         documentation and/or other materials provided with the
-//         distribution.
-//
-//       * Neither the names of the copyright holders nor the names of the
-//         University of New South Wales and the University of Waikato
-//         and its contributors may be used to endorse or promote products
-//         derived from this software without specific prior written
-//         permission.
-//
-//     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-//     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-//     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-//     PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
-//     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-//     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-//     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-//     BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-//     WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-//     OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-//     IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-type SobolInt = u32;
-const SOBOL_BITS: usize = std::mem::size_of::<SobolInt>() * 8;
-
-pub fn generate_direction_vectors(dimensions: usize) -> Vec<[SobolInt; SOBOL_BITS]> {
-    let mut vectors = Vec::new();
-
-    // Calculate first dimension, which is just the van der Corput sequence.
-    let mut dim_0 = [0 as SobolInt; SOBOL_BITS];
-    for i in 0..SOBOL_BITS {
-        dim_0[i] = 1 << (SOBOL_BITS - 1 - i);
-    }
-    vectors.push(dim_0);
-
-    // Do the rest of the dimensions.
-    let mut lines = DIRECTION_NUMBERS_TEXT.lines();
-    for _ in 1..dimensions {
-        let mut v = [0 as SobolInt; SOBOL_BITS];
-
-        // Get data from the next valid line from the direction numbers text
-        // file.
-        let (s, a, m) = loop {
-            if let Ok((a, m)) = parse_direction_numbers(
-                lines
-                    .next()
-                    .expect("Not enough direction numbers for the requested number of dimensions."),
-            ) {
-                break (m.len(), a, m);
-            }
-        };
-
-        // Generate the direction numbers for this dimension.
-        if SOBOL_BITS <= s as usize {
-            for i in 0..SOBOL_BITS {
-                v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
-            }
-        } else {
-            for i in 0..(s as usize) {
-                v[i] = (m[i] << (SOBOL_BITS - 1 - i)) as SobolInt;
-            }
-
-            for i in (s as usize)..SOBOL_BITS {
-                v[i] = v[i - s as usize] ^ (v[i - s as usize] >> s);
-
-                for k in 1..s {
-                    v[i] ^= ((a >> (s - 1 - k)) & 1) as SobolInt * v[i - k as usize];
-                }
-            }
-        }
-
-        vectors.push(v);
-    }
-
-    vectors
-}
-
-/// Parses the direction numbers from a single line of the direction numbers
-/// text file.  Returns the `a` and `m` parts.
-fn parse_direction_numbers(text: &str) -> Result<(u32, Vec<u32>), Box<dyn std::error::Error>> {
-    let mut numbers = text.split_whitespace();
-    if numbers.clone().count() < 4 || text.starts_with("#") {
-        return Err(Box::new(ParseError(())));
-    }
-
-    // Skip the first two numbers, which are just the dimension and the count
-    // of direction numbers for this dimension.
-    let _ = numbers.next().unwrap().parse::<u32>()?;
-    let _ = numbers.next().unwrap().parse::<u32>()?;
-
-    let a = numbers.next().unwrap().parse::<u32>()?;
-
-    let mut m = Vec::new();
-    for n in numbers {
-        m.push(n.parse::<u32>()?);
-    }
-
-    Ok((a, m))
-}
-
-#[derive(Debug, Copy, Clone)]
-struct ParseError(());
-impl std::error::Error for ParseError {}
-impl std::fmt::Display for ParseError {
-    fn fmt(&self, _f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
-        Ok(())
-    }
-}
--- a/sub_crates/sobol/direction_numbers/LICENSE.txt
+++ b/sub_crates/sobol/direction_numbers/LICENSE.txt
@ -1,43 +0,0 @@
-The data in this folder is from the website
-http://web.maths.unsw.edu.au/~fkuo/sobol/
-
-From these papers:
-  * S. Joe and F. Y. Kuo, Remark on Algorithm 659: Implementing
-    Sobol's quasirandom sequence generator, ACM Trans. Math. Softw. 29,
-    49-57 (2003)
-  * S. Joe and F. Y. Kuo, Constructing Sobol sequences with better
-    two-dimensional projections, SIAM J. Sci. Comput. 30, 2635-2654
-    (2008)
-
-Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the
-    distribution.
-
-  * Neither the names of the copyright holders nor the names of the
-    University of New South Wales and the University of Waikato
-    and its contributors may be used to endorse or promote products
-    derived from this software without specific prior written
-    permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/sub_crates/sobol/direction_numbers/new-joe-kuo-6.1024.txt
+++ b/sub_crates/sobol/direction_numbers/new-joe-kuo-6.1024.txt
--- a/sub_crates/sobol/src/lib.rs
+++ b/sub_crates/sobol/src/lib.rs
@ -1,124 +0,0 @@
-//! A seedable, Owen-scrambled Sobol sequence.
-//!
-//! This is based on the paper "Practical Hash-based Owen Scrambling"
-//! by Brent Burley, but using the scramble function from
-//! https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
-//! in place of the Laine-Karras function used in the paper, and with a
-//! larger set of direction numbers due to Kuo et al.
-//!
-//! This implementation is limited to `2^16` samples, and will loop back
-//! to the start of the sequence after that limit.
-
-#![allow(clippy::unreadable_literal)]
-
-mod wide;
-use wide::Int4;
-
-// This `include` provides `MAX_DIMENSION` and `REV_VECTORS`.
-// See the build.rs file for how this included file is generated.
-include!(concat!(env!("OUT_DIR"), "/vectors.inc"));
-
-pub const MAX_DIMENSION_SET: u32 = MAX_DIMENSION / 4;
-
-/// Compute four dimensions of a single sample in the Sobol sequence.
-///
-/// `sample_index` specifies which sample in the Sobol sequence to compute.
-///
-/// `dimension_set` specifies which four dimensions to compute. `0` yields the
-/// first four dimensions, `1` the second four dimensions, and so on.
-///
-/// `seed` produces statistically independent Sobol sequences.  Passing two
-/// different seeds will produce two different sequences that are only randomly
-/// associated, with no stratification or correlation between them.
-#[inline]
-pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
-    assert!(dimension_set < MAX_DIMENSION_SET);
-    let vecs = &REV_VECTORS[dimension_set as usize];
-
-    // Shuffle the index using the given seed to produce a unique statistically
-    // independent Sobol sequence.
-    let shuffled_rev_index = scramble(sample_index.reverse_bits(), seed);
-
-    // Compute the Sobol sample with reversed bits.
-    let mut sobol_rev = Int4::zero();
-    let mut index = shuffled_rev_index & 0xffff0000; // Only use the top 16 bits.
-    let mut i = 0;
-    while index != 0 {
-        let j = index.leading_zeros();
-        sobol_rev ^= vecs[(i + j) as usize].into();
-        i += j + 1;
-        index <<= j;
-        index <<= 1;
-    }
-
-    // Do Owen scrambling on the reversed-bits Sobol sample.
-    let sobol_owen_rev = scramble_int4(sobol_rev, dimension_set ^ seed);
-
-    // Un-reverse the bits and convert to floating point in [0, 1).
-    sobol_owen_rev.reverse_bits().to_norm_floats()
-}
-
-//----------------------------------------------------------------------
-
-/// Scrambles `n` using the hash function from
-/// https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
-///
-/// This is equivalent to Owen scrambling, but on reversed bits.
-#[inline(always)]
-fn scramble(mut n: u32, scramble: u32) -> u32 {
-    let scramble = hash(scramble);
-
-    n ^= n.wrapping_mul(0x3d20adea);
-    n = n.wrapping_add(scramble);
-    n = n.wrapping_mul((scramble >> 16) | 1);
-    n ^= n.wrapping_mul(0x05526c56);
-    n ^= n.wrapping_mul(0x53a22864);
-
-    n
-}
-
-/// Same as `scramble()`, except does it on 4 integers at a time.
-#[inline(always)]
-fn scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
-    let scramble = hash_int4([scramble; 4].into());
-
-    n ^= n * [0x3d20adea; 4].into();
-    n += scramble;
-    n *= (scramble >> 16) | [1; 4].into();
-    n ^= n * [0x05526c56; 4].into();
-    n ^= n * [0x53a22864; 4].into();
-
-    n
-}
-
-/// A good 32-bit hash function.
-/// From https://github.com/skeeto/hash-prospector
-#[inline(always)]
-fn hash(n: u32) -> u32 {
-    let mut hash = n ^ 0x79c68e4a;
-
-    hash ^= hash >> 16;
-    hash = hash.wrapping_mul(0x7feb352d);
-    hash ^= hash >> 15;
-    hash = hash.wrapping_mul(0x846ca68b);
-    hash ^= hash >> 16;
-
-    hash
-}
-
-/// Same as `hash()` except performs hashing on four numbers at once.
-///
-/// Each of the four numbers gets a different hash, so even if all input
-/// numbers are the same, the outputs will still be different for each of them.
-#[inline(always)]
-fn hash_int4(n: Int4) -> Int4 {
-    let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
-
-    hash ^= hash >> 16;
-    hash *= [0x7feb352d; 4].into();
-    hash ^= hash >> 15;
-    hash *= [0x846ca68b; 4].into();
-    hash ^= hash >> 16;
-
-    hash
-}
--- a/sub_crates/sobol/src/wide.rs
+++ b/sub_crates/sobol/src/wide.rs
@ -1,445 +0,0 @@
-//--------------------------------------------------------------------------
-// x86/64 SSE
-#[cfg(target_arch = "x86_64")]
-// #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
-pub(crate) mod sse {
-    use core::arch::x86_64::{
-        __m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
-        _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
-        _mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
-    };
-
-    #[derive(Debug, Copy, Clone)]
-    pub(crate) struct Int4 {
-        v: __m128i,
-    }
-
-    impl Int4 {
-        #[inline(always)]
-        pub fn zero() -> Int4 {
-            Int4 {
-                v: unsafe { _mm_setzero_si128() },
-            }
-        }
-
-        /// For testing.
-        #[allow(dead_code)]
-        fn get(self, i: usize) -> u32 {
-            let n: [u32; 4] = unsafe { std::mem::transmute(self) };
-            n[i]
-        }
-
-        /// Converts the full range of a 32 bit integer to a float in [0, 1).
-        #[inline(always)]
-        pub fn to_norm_floats(self) -> [f32; 4] {
-            const ONE_OVER_31BITS: f32 = 1.0 / (1u64 << 31) as f32;
-            let n4 = unsafe {
-                _mm_mul_ps(
-                    _mm_cvtepi32_ps(_mm_srli_epi32(self.v, 1)),
-                    _mm_set1_ps(ONE_OVER_31BITS),
-                )
-            };
-
-            unsafe { std::mem::transmute(n4) }
-        }
-
-        #[inline]
-        pub fn reverse_bits(self) -> Int4 {
-            let mut n = self.v;
-            unsafe {
-                let a = _mm_slli_epi32(n, 16);
-                let b = _mm_srli_epi32(n, 16);
-                n = _mm_or_si128(a, b);
-
-                //----
-                let a = _mm_and_si128(
-                    _mm_slli_epi32(n, 8),
-                    _mm_set1_epi32(std::mem::transmute(0xff00ff00u32)),
-                );
-                let b = _mm_and_si128(
-                    _mm_srli_epi32(n, 8),
-                    _mm_set1_epi32(std::mem::transmute(0x00ff00ffu32)),
-                );
-                n = _mm_or_si128(a, b);
-
-                //----
-                let a = _mm_and_si128(
-                    _mm_slli_epi32(n, 4),
-                    _mm_set1_epi32(std::mem::transmute(0xf0f0f0f0u32)),
-                );
-                let b = _mm_and_si128(
-                    _mm_srli_epi32(n, 4),
-                    _mm_set1_epi32(std::mem::transmute(0x0f0f0f0fu32)),
-                );
-                n = _mm_or_si128(a, b);
-
-                //----
-                let a = _mm_and_si128(
-                    _mm_slli_epi32(n, 2),
-                    _mm_set1_epi32(std::mem::transmute(0xccccccccu32)),
-                );
-                let b = _mm_and_si128(
-                    _mm_srli_epi32(n, 2),
-                    _mm_set1_epi32(std::mem::transmute(0x33333333u32)),
-                );
-                n = _mm_or_si128(a, b);
-
-                //----
-                let a = _mm_and_si128(
-                    _mm_slli_epi32(n, 1),
-                    _mm_set1_epi32(std::mem::transmute(0xaaaaaaaau32)),
-                );
-                let b = _mm_and_si128(
-                    _mm_srli_epi32(n, 1),
-                    _mm_set1_epi32(std::mem::transmute(0x55555555u32)),
-                );
-                n = _mm_or_si128(a, b);
-
-                Int4 { v: n }
-            }
-        }
-    }
-
-    impl std::ops::Mul for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn mul(self, other: Self) -> Int4 {
-            // This only works with SSE 4.1 support.
-            #[cfg(target_feature = "sse4.1")]
-            unsafe {
-                use core::arch::x86_64::_mm_mullo_epi32;
-                Int4 {
-                    v: _mm_mullo_epi32(self.v, other.v),
-                }
-            }
-
-            // This works on all x86-64 chips.
-            #[cfg(not(target_feature = "sse4.1"))]
-            unsafe {
-                use core::arch::x86_64::{_mm_mul_epu32, _mm_shuffle_epi32};
-                let a = _mm_and_si128(
-                    _mm_mul_epu32(self.v, other.v),
-                    _mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
-                );
-                let b = _mm_and_si128(
-                    _mm_mul_epu32(
-                        _mm_shuffle_epi32(self.v, 0b11_11_01_01),
-                        _mm_shuffle_epi32(other.v, 0b11_11_01_01),
-                    ),
-                    _mm_set_epi32(0, 0xffffffffu32 as i32, 0, 0xffffffffu32 as i32),
-                );
-                Int4 {
-                    v: _mm_or_si128(a, _mm_shuffle_epi32(b, 0b10_11_00_01)),
-                }
-            }
-        }
-    }
-
-    impl std::ops::MulAssign for Int4 {
-        #[inline(always)]
-        fn mul_assign(&mut self, other: Self) {
-            *self = *self * other;
-        }
-    }
-
-    impl std::ops::AddAssign for Int4 {
-        #[inline(always)]
-        fn add_assign(&mut self, other: Self) {
-            *self = Int4 {
-                v: unsafe { _mm_add_epi32(self.v, other.v) },
-            };
-        }
-    }
-
-    impl std::ops::BitAnd for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn bitand(self, other: Self) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_and_si128(self.v, other.v) },
-            }
-        }
-    }
-
-    impl std::ops::BitAndAssign for Int4 {
-        fn bitand_assign(&mut self, other: Self) {
-            *self = *self & other;
-        }
-    }
-
-    impl std::ops::BitOr for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn bitor(self, other: Self) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_or_si128(self.v, other.v) },
-            }
-        }
-    }
-
-    impl std::ops::BitOrAssign for Int4 {
-        fn bitor_assign(&mut self, other: Self) {
-            *self = *self | other;
-        }
-    }
-
-    impl std::ops::BitXor for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn bitxor(self, other: Self) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_xor_si128(self.v, other.v) },
-            }
-        }
-    }
-
-    impl std::ops::BitXorAssign for Int4 {
-        #[inline(always)]
-        fn bitxor_assign(&mut self, other: Self) {
-            *self = *self ^ other;
-        }
-    }
-
-    impl std::ops::Shl<i32> for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn shl(self, other: i32) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_sll_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
-            }
-        }
-    }
-
-    impl std::ops::Shr<i32> for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn shr(self, other: i32) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_srl_epi32(self.v, _mm_set_epi32(0, 0, 0, other)) },
-            }
-        }
-    }
-
-    impl From<[u32; 4]> for Int4 {
-        #[inline(always)]
-        fn from(v: [u32; 4]) -> Self {
-            Int4 {
-                v: unsafe { std::mem::transmute(v) },
-            }
-        }
-    }
-
-    #[cfg(test)]
-    mod tests {
-        use super::*;
-
-        #[test]
-        fn from_array() {
-            let a = Int4::from([1, 2, 3, 4]);
-            assert_eq!(a.get(0), 1);
-            assert_eq!(a.get(1), 2);
-            assert_eq!(a.get(2), 3);
-            assert_eq!(a.get(3), 4);
-        }
-
-        #[test]
-        fn shr() {
-            let a = Int4::from([0xffffffff; 4]) >> 16;
-            assert_eq!(a.get(0), 0x0000ffff);
-            assert_eq!(a.get(1), 0x0000ffff);
-            assert_eq!(a.get(2), 0x0000ffff);
-            assert_eq!(a.get(3), 0x0000ffff);
-        }
-
-        #[test]
-        fn shl() {
-            let a = Int4::from([0xffffffff; 4]) << 16;
-            assert_eq!(a.get(0), 0xffff0000);
-            assert_eq!(a.get(1), 0xffff0000);
-            assert_eq!(a.get(2), 0xffff0000);
-            assert_eq!(a.get(3), 0xffff0000);
-        }
-    }
-}
-#[cfg(target_arch = "x86_64")]
-pub(crate) use sse::Int4;
-
-//--------------------------------------------------------------------------
-// Fallback
-#[cfg(not(target_arch = "x86_64"))]
-// #[cfg(not(all(target_arch = "x86_64", target_feature = "sse4.1")))]
-pub(crate) mod fallback {
-    #[derive(Debug, Copy, Clone)]
-    #[repr(align(16))]
-    pub(crate) struct Int4 {
-        v: [u32; 4],
-    }
-
-    impl Int4 {
-        pub fn zero() -> Int4 {
-            Int4 { v: [0, 0, 0, 0] }
-        }
-
-        /// Converts the full range of a 32 bit integer to a float in [0, 1).
-        pub fn to_norm_floats(self) -> [f32; 4] {
-            const ONE_OVER_32BITS: f32 = 1.0 / (1u64 << 32) as f32;
-            [
-                self.v[0] as f32 * ONE_OVER_32BITS,
-                self.v[1] as f32 * ONE_OVER_32BITS,
-                self.v[2] as f32 * ONE_OVER_32BITS,
-                self.v[3] as f32 * ONE_OVER_32BITS,
-            ]
-        }
-
-        pub fn reverse_bits(self) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0].reverse_bits(),
-                    self.v[1].reverse_bits(),
-                    self.v[2].reverse_bits(),
-                    self.v[3].reverse_bits(),
-                ],
-            }
-        }
-    }
-
-    impl std::ops::Mul for Int4 {
-        type Output = Int4;
-
-        fn mul(self, other: Self) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0].wrapping_mul(other.v[0]),
-                    self.v[1].wrapping_mul(other.v[1]),
-                    self.v[2].wrapping_mul(other.v[2]),
-                    self.v[3].wrapping_mul(other.v[3]),
-                ],
-            }
-        }
-    }
-
-    impl std::ops::MulAssign for Int4 {
-        fn mul_assign(&mut self, other: Self) {
-            *self = *self * other;
-        }
-    }
-
-    impl std::ops::AddAssign for Int4 {
-        fn add_assign(&mut self, other: Self) {
-            *self = Int4 {
-                v: [
-                    self.v[0].wrapping_add(other.v[0]),
-                    self.v[1].wrapping_add(other.v[1]),
-                    self.v[2].wrapping_add(other.v[2]),
-                    self.v[3].wrapping_add(other.v[3]),
-                ],
-            };
-        }
-    }
-
-    impl std::ops::BitAnd for Int4 {
-        type Output = Int4;
-        fn bitand(self, other: Self) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0] & other.v[0],
-                    self.v[1] & other.v[1],
-                    self.v[2] & other.v[2],
-                    self.v[3] & other.v[3],
-                ],
-            }
-        }
-    }
-
-    impl std::ops::BitAndAssign for Int4 {
-        fn bitand_assign(&mut self, other: Self) {
-            *self = *self & other;
-        }
-    }
-
-    impl std::ops::BitOr for Int4 {
-        type Output = Int4;
-        fn bitor(self, other: Self) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0] | other.v[0],
-                    self.v[1] | other.v[1],
-                    self.v[2] | other.v[2],
-                    self.v[3] | other.v[3],
-                ],
-            }
-        }
-    }
-
-    impl std::ops::BitOrAssign for Int4 {
-        fn bitor_assign(&mut self, other: Self) {
-            *self = *self | other;
-        }
-    }
-
-    impl std::ops::BitXor for Int4 {
-        type Output = Int4;
-        fn bitxor(self, other: Self) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0] ^ other.v[0],
-                    self.v[1] ^ other.v[1],
-                    self.v[2] ^ other.v[2],
-                    self.v[3] ^ other.v[3],
-                ],
-            }
-        }
-    }
-
-    impl std::ops::BitXorAssign for Int4 {
-        fn bitxor_assign(&mut self, other: Self) {
-            *self = *self ^ other;
-        }
-    }
-
-    impl std::ops::Shl<i32> for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn shl(self, other: i32) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0] << other,
-                    self.v[1] << other,
-                    self.v[2] << other,
-                    self.v[3] << other,
-                ],
-            }
-        }
-    }
-
-    impl std::ops::Shr<i32> for Int4 {
-        type Output = Int4;
-
-        #[inline(always)]
-        fn shr(self, other: i32) -> Int4 {
-            Int4 {
-                v: [
-                    self.v[0] >> other,
-                    self.v[1] >> other,
-                    self.v[2] >> other,
-                    self.v[3] >> other,
-                ],
-            }
-        }
-    }
-
-    impl From<[u32; 4]> for Int4 {
-        fn from(v: [u32; 4]) -> Self {
-            Int4 { v }
-        }
-    }
-}
-#[cfg(not(target_arch = "x86_64"))]
-pub(crate) use fallback::Int4;