diff --git a/sub_crates/sobol/src/lib.rs b/sub_crates/sobol/src/lib.rs
index a18bb58..b149ea8 100644
--- a/sub_crates/sobol/src/lib.rs
+++ b/sub_crates/sobol/src/lib.rs
@@ -59,33 +59,17 @@ pub fn sample_4d(sample_index: u32, dimension_set: u32, seed: u32) -> [f32; 4] {
 
 //----------------------------------------------------------------------
 
-// The permutation constants used in `lk_scramble()`.
-// Each tuple is for one round of permutation.  The first tuple is
-// optimized, and the remaining are random aside from making sure
-// that they are appropriately even or odd.
-const PERMS: &[(u32, u32)] = &[
-    (0x9ac7ea2a, 0x7d1e78d3),
-    (0x2ce68764, 0x9dd00551),
-    (0x79b82526, 0x2dfc1a6b),
-    (0xf358b1d0, 0x38743c65),
-];
-
-// How many permutation rounds to do.
-// In practice it seems like one round is plenty, but I'm leaving more
-// available in case we want to increase them later.
-const ROUNDS: usize = 1;
-
 /// Scrambles `n` using a novel variation on the Laine-Karras hash.
 ///
 /// This is equivalent to Owen scrambling, but on reversed bits.
 #[inline(always)]
 fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
-    n = n.wrapping_add(hash(scramble, 2));
+    let scramble = hash(scramble);
 
-    for &(p1, p2) in PERMS.iter().take(ROUNDS) {
-        n ^= n.wrapping_mul(p1);
-        n = n.wrapping_mul(p2);
-    }
+    n = n.wrapping_add(scramble);
+    n ^= n.wrapping_mul(0x3354734a);
+    n = n.wrapping_add(n << 2);
+    n ^= n.wrapping_mul(scramble & !1);
 
     n
 }
@@ -93,25 +77,28 @@ fn lk_scramble(mut n: u32, scramble: u32) -> u32 {
 /// Same as `lk_scramble()`, except does it on 4 integers at a time.
 #[inline(always)]
 fn lk_scramble_int4(mut n: Int4, scramble: u32) -> Int4 {
-    n += hash_int4([scramble; 4].into(), 2);
+    let scramble = hash_int4([scramble; 4].into());
 
-    for &(p1, p2) in PERMS.iter().take(ROUNDS) {
-        n ^= n * [p1; 4].into();
-        n *= [p2; 4].into();
-    }
+    n += scramble;
+    n ^= n * [0x3354734a; 4].into();
+    n += n << 2;
+    n ^= n * (scramble & [!1; 4].into());
 
     n
 }
 
-/// A simple 32-bit hash function.  Its quality can be tuned with
-/// the number of rounds used.
+/// A good 32-bit hash function.
+/// From https://github.com/skeeto/hash-prospector
 #[inline(always)]
-fn hash(n: u32, rounds: u32) -> u32 {
+fn hash(n: u32) -> u32 {
     let mut hash = n ^ 0x79c68e4a;
-    for _ in 0..rounds {
-        hash = hash.wrapping_mul(0x736caf6f);
-        hash ^= hash.wrapping_shr(16);
-    }
+
+    hash ^= hash >> 16;
+    hash = hash.wrapping_mul(0x7feb352d);
+    hash ^= hash >> 15;
+    hash = hash.wrapping_mul(0x846ca68b);
+    hash ^= hash >> 16;
+
     hash
 }
 
@@ -120,12 +107,14 @@ fn hash(n: u32, rounds: u32) -> u32 {
 /// Each of the four numbers gets a different hash, so even if all input
 /// numbers are the same, the outputs will still be different for each of them.
 #[inline(always)]
-fn hash_int4(n: Int4, rounds: u32) -> Int4 {
-    let mut hash = n;
-    hash ^= [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
-    for _ in 0..rounds {
-        hash *= [0x736caf6f; 4].into();
-        hash ^= hash.shr16();
-    }
+fn hash_int4(n: Int4) -> Int4 {
+    let mut hash = n ^ [0x912f69ba, 0x174f18ab, 0x691e72ca, 0xb40cc1b8].into();
+
+    hash ^= hash >> 16;
+    hash *= [0x7feb352d; 4].into();
+    hash ^= hash >> 15;
+    hash *= [0x846ca68b; 4].into();
+    hash ^= hash >> 16;
+
     hash
 }
diff --git a/sub_crates/sobol/src/wide.rs b/sub_crates/sobol/src/wide.rs
index 8602a2c..831648d 100644
--- a/sub_crates/sobol/src/wide.rs
+++ b/sub_crates/sobol/src/wide.rs
@@ -5,8 +5,8 @@
 pub(crate) mod sse {
     use core::arch::x86_64::{
         __m128i, _mm_add_epi32, _mm_and_si128, _mm_cvtepi32_ps, _mm_mul_ps, _mm_or_si128,
-        _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_slli_epi32,
-        _mm_srli_epi32, _mm_xor_si128,
+        _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_setzero_si128, _mm_sll_epi32,
+        _mm_slli_epi32, _mm_srl_epi32, _mm_srli_epi32, _mm_xor_si128,
     };
 
     #[derive(Debug, Copy, Clone)]
@@ -91,13 +91,6 @@ pub(crate) mod sse {
                 Int4 { v: n }
             }
         }
-
-        #[inline(always)]
-        pub(crate) fn shr16(self) -> Int4 {
-            Int4 {
-                v: unsafe { _mm_srli_epi32(self.v, 16) },
-            }
-        }
     }
 
     impl std::ops::Mul for Int4 {
@@ -152,12 +145,54 @@ pub(crate) mod sse {
         }
     }
 
+    impl std::ops::BitXor for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn bitxor(self, other: Self) -> Int4 {
+            Int4 {
+                v: unsafe { _mm_xor_si128(self.v, other.v) },
+            }
+        }
+    }
+
     impl std::ops::BitXorAssign for Int4 {
         #[inline(always)]
         fn bitxor_assign(&mut self, other: Self) {
-            *self = Int4 {
-                v: unsafe { _mm_xor_si128(self.v, other.v) },
-            };
+            *self = *self ^ other;
+        }
+    }
+
+    impl std::ops::BitAnd for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn bitand(self, other: Self) -> Int4 {
+            Int4 {
+                v: unsafe { _mm_and_si128(self.v, other.v) },
+            }
+        }
+    }
+
+    impl std::ops::Shl<i32> for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn shl(self, other: i32) -> Int4 {
+            Int4 {
+                v: unsafe { _mm_sll_epi32(self.v, _mm_set1_epi32(other)) },
+            }
+        }
+    }
+
+    impl std::ops::Shr<i32> for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn shr(self, other: i32) -> Int4 {
+            Int4 {
+                v: unsafe { _mm_srl_epi32(self.v, _mm_set1_epi32(other)) },
+            }
         }
     }
 
@@ -210,14 +245,18 @@ pub(crate) mod fallback {
                 ],
             }
         }
+    }
 
-        pub(crate) fn shr16(self) -> Int4 {
+    impl std::ops::Mul for Int4 {
+        type Output = Int4;
+
+        fn mul(self, other: Self) -> Int4 {
             Int4 {
                 v: [
-                    self.v[0] >> 16,
-                    self.v[1] >> 16,
-                    self.v[2] >> 16,
-                    self.v[3] >> 16,
+                    self.v[0].wrapping_mul(other.v[0]),
+                    self.v[1].wrapping_mul(other.v[1]),
+                    self.v[2].wrapping_mul(other.v[2]),
+                    self.v[3].wrapping_mul(other.v[3]),
                 ],
             }
         }
@@ -225,14 +264,7 @@ pub(crate) mod fallback {
 
     impl std::ops::MulAssign for Int4 {
         fn mul_assign(&mut self, other: Self) {
-            *self = Int4 {
-                v: [
-                    self.v[0].wrapping_mul(other.v[0]),
-                    self.v[1].wrapping_mul(other.v[1]),
-                    self.v[2].wrapping_mul(other.v[2]),
-                    self.v[3].wrapping_mul(other.v[3]),
-                ],
-            };
+            *self = *self * other;
         }
     }
 
@@ -249,16 +281,75 @@ pub(crate) mod fallback {
         }
     }
 
-    impl std::ops::BitXorAssign for Int4 {
-        fn bitxor_assign(&mut self, other: Self) {
-            *self = Int4 {
+    impl std::ops::BitAnd for Int4 {
+        type Output = Int4;
+        fn bitand(self, other: Self) -> Int4 {
+            Int4 {
+                v: [
+                    self.v[0] & other.v[0],
+                    self.v[1] & other.v[1],
+                    self.v[2] & other.v[2],
+                    self.v[3] & other.v[3],
+                ],
+            }
+        }
+    }
+
+    impl std::ops::BitAndAssign for Int4 {
+        fn bitand_assign(&mut self, other: Self) {
+            *self = *self & other;
+        }
+    }
+
+    impl std::ops::BitXor for Int4 {
+        type Output = Int4;
+        fn bitxor(self, other: Self) -> Int4 {
+            Int4 {
                 v: [
                     self.v[0] ^ other.v[0],
                     self.v[1] ^ other.v[1],
                     self.v[2] ^ other.v[2],
                     self.v[3] ^ other.v[3],
                 ],
-            };
+            }
+        }
+    }
+
+    impl std::ops::BitXorAssign for Int4 {
+        fn bitxor_assign(&mut self, other: Self) {
+            *self = *self ^ other;
+        }
+    }
+
+    impl std::ops::Shl<i32> for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn shl(self, other: i32) -> Int4 {
+            Int4 {
+                v: [
+                    self.v[0] << other,
+                    self.v[1] << other,
+                    self.v[2] << other,
+                    self.v[3] << other,
+                ],
+            }
+        }
+    }
+
+    impl std::ops::Shr<i32> for Int4 {
+        type Output = Int4;
+
+        #[inline(always)]
+        fn shr(self, other: i32) -> Int4 {
+            Int4 {
+                v: [
+                    self.v[0] >> other,
+                    self.v[1] >> other,
+                    self.v[2] >> other,
+                    self.v[3] >> other,
+                ],
+            }
         }
     }