diff --git a/Cargo.lock b/Cargo.lock index f3a62d6..998b57d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,9 +67,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "float4" version = "0.1.0" -dependencies = [ - "simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "half" @@ -198,11 +195,6 @@ name = "scoped_threadpool" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "simd" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "sobol" version = "0.1.0" @@ -296,7 +288,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" "checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" "checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" -"checksum simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3dd0805c7363ab51a829a1511ad24b6ed0349feaa756c4bc2f977f9f496e6673" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" diff --git a/Cargo.toml b/Cargo.toml index a5192e8..c6343ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,9 +15,6 @@ name = "psychopath" version = "0.1.0" authors = ["Nathan Vegdahl "] -[features] -simd_perf = ["float4/simd_perf", "math3d/simd_perf"] - [profile.release] debug = true diff --git a/sub_crates/float4/Cargo.toml b/sub_crates/float4/Cargo.toml index e2efc1c..d7c4b70 100644 --- a/sub_crates/float4/Cargo.toml +++ b/sub_crates/float4/Cargo.toml @@ -7,10 +7,3 @@ license = "MIT" [lib] name = "float4" path = "src/lib.rs" - -[features] -simd_perf = ["simd"] - -[dependencies] -# Crates.io dependencies -simd = { version = "0.2.1", optional = true } \ No newline at end of file diff --git a/sub_crates/float4/src/lib.rs b/sub_crates/float4/src/lib.rs index 8358d82..fe7bafc 100644 --- a/sub_crates/float4/src/lib.rs +++ b/sub_crates/float4/src/lib.rs @@ -1,115 +1,485 @@ #![allow(dead_code)] -#[cfg(feature = "simd_perf")] -extern crate simd; +/// Implementation of Float4 for x86_64 platforms with sse support +#[cfg(all(target_arch = "x86_64", target_feature = "sse"))] +mod x86_64_sse { + use std::arch::x86_64::__m128; + use std::cmp::PartialEq; + use std::ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; -use std::cmp::PartialEq; -use std::ops::{Add, AddAssign, BitAnd, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; + #[derive(Debug, Copy, Clone)] + pub struct Float4 { + data: __m128, + } -#[cfg(feature = "simd_perf")] -use simd::{bool32fx4, f32x4}; - -/// Essentially a tuple of four floats, which will use SIMD operations -/// where possible on a platform. -#[cfg(feature = "simd_perf")] -#[derive(Debug, Copy, Clone)] -pub struct Float4 { - data: f32x4, -} - -#[cfg(not(feature = "simd_perf"))] -#[derive(Debug, Copy, Clone)] -pub struct Float4 { - data: [f32; 4], -} - -impl Float4 { - #[inline(always)] - pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { - #[cfg(feature = "simd_perf")] - { + impl Float4 { + #[inline(always)] + pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { + use std::arch::x86_64::_mm_set_ps; Float4 { - data: f32x4::new(a, b, c, d), + data: unsafe { _mm_set_ps(d, c, b, a) }, } } - #[cfg(not(feature = "simd_perf"))] - { + + #[inline(always)] + pub fn splat(n: f32) -> Float4 { + use std::arch::x86_64::_mm_set1_ps; + Float4 { + data: unsafe { _mm_set1_ps(n) }, + } + } + + #[inline] + pub fn h_sum(&self) -> f32 { + (self.get_0() + self.get_1()) + (self.get_2() + self.get_3()) + } + + #[inline] + pub fn h_product(&self) -> f32 { + (self.get_0() * self.get_1()) * (self.get_2() * self.get_3()) + } + + #[inline] + pub fn h_min(&self) -> f32 { + let n1 = if self.get_0() < self.get_1() { + self.get_0() + } else { + self.get_1() + }; + let n2 = if self.get_2() < self.get_3() { + self.get_2() + } else { + self.get_3() + }; + if n1 < n2 { + n1 + } else { + n2 + } + } + + #[inline] + pub fn h_max(&self) -> f32 { + let n1 = if self.get_0() > self.get_1() { + self.get_0() + } else { + self.get_1() + }; + let n2 = if self.get_2() > self.get_3() { + self.get_2() + } else { + self.get_3() + }; + if n1 > n2 { + n1 + } else { + n2 + } + } + + #[inline(always)] + pub fn v_min(&self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_min_ps; + Float4 { + data: unsafe { _mm_min_ps(self.data, other.data) }, + } + } + + #[inline(always)] + pub fn v_max(&self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_max_ps; + Float4 { + data: unsafe { _mm_max_ps(self.data, other.data) }, + } + } + + #[inline(always)] + pub fn lt(&self, other: Float4) -> Bool4 { + use std::arch::x86_64::_mm_cmplt_ps; + Bool4 { + data: unsafe { _mm_cmplt_ps(self.data, other.data) }, + } + } + + #[inline(always)] + pub fn lte(&self, other: Float4) -> Bool4 { + use std::arch::x86_64::_mm_cmple_ps; + Bool4 { + data: unsafe { _mm_cmple_ps(self.data, other.data) }, + } + } + + #[inline(always)] + pub fn gt(&self, other: Float4) -> Bool4 { + use std::arch::x86_64::_mm_cmpgt_ps; + Bool4 { + data: unsafe { _mm_cmpgt_ps(self.data, other.data) }, + } + } + + #[inline(always)] + pub fn gte(&self, other: Float4) -> Bool4 { + use std::arch::x86_64::_mm_cmpge_ps; + Bool4 { + data: unsafe { _mm_cmpge_ps(self.data, other.data) }, + } + } + + /// Set the nth element to the given value. + #[inline(always)] + pub fn set_n(&mut self, n: usize, v: f32) { + use std::mem::transmute; + assert!( + n <= 3, + "Attempted to set element of Float4 outside of bounds." + ); + + unsafe { *transmute::<*mut __m128, *mut f32>(&mut self.data).offset(n as isize) = v } + } + + /// Set the 0th element to the given value. + #[inline(always)] + pub fn set_0(&mut self, v: f32) { + self.set_n(0, v); + } + + /// Set the 1th element to the given value. + #[inline(always)] + pub fn set_1(&mut self, v: f32) { + self.set_n(1, v); + } + + /// Set the 2th element to the given value. + #[inline(always)] + pub fn set_2(&mut self, v: f32) { + self.set_n(2, v); + } + + /// Set the 3th element to the given value. + #[inline(always)] + pub fn set_3(&mut self, v: f32) { + self.set_n(3, v); + } + + /// Returns the value of the nth element. + #[inline(always)] + pub fn get_n(&self, n: usize) -> f32 { + use std::mem::transmute; + assert!( + n <= 3, + "Attempted to access element of Float4 outside of bounds." + ); + + unsafe { *transmute::<*const __m128, *const f32>(&self.data).offset(n as isize) } + } + + /// Returns the value of the 0th element. + #[inline(always)] + pub fn get_0(&self) -> f32 { + self.get_n(0) + } + + /// Returns the value of the 1th element. + #[inline(always)] + pub fn get_1(&self) -> f32 { + self.get_n(1) + } + + /// Returns the value of the 2th element. + #[inline(always)] + pub fn get_2(&self) -> f32 { + self.get_n(2) + } + + /// Returns the value of the 3th element. + #[inline(always)] + pub fn get_3(&self) -> f32 { + self.get_n(3) + } + } + + impl PartialEq for Float4 { + #[inline] + fn eq(&self, other: &Float4) -> bool { + self.get_0() == other.get_0() + && self.get_1() == other.get_1() + && self.get_2() == other.get_2() + && self.get_3() == other.get_3() + } + } + + impl Add for Float4 { + type Output = Float4; + + #[inline(always)] + fn add(self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_add_ps; + Float4 { + data: unsafe { _mm_add_ps(self.data, other.data) }, + } + } + } + + impl AddAssign for Float4 { + #[inline(always)] + fn add_assign(&mut self, rhs: Float4) { + *self = *self + rhs; + } + } + + impl Sub for Float4 { + type Output = Float4; + + #[inline(always)] + fn sub(self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_sub_ps; + Float4 { + data: unsafe { _mm_sub_ps(self.data, other.data) }, + } + } + } + + impl SubAssign for Float4 { + #[inline(always)] + fn sub_assign(&mut self, rhs: Float4) { + *self = *self - rhs; + } + } + + impl Mul for Float4 { + type Output = Float4; + + #[inline(always)] + fn mul(self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_mul_ps; + Float4 { + data: unsafe { _mm_mul_ps(self.data, other.data) }, + } + } + } + + impl Mul for Float4 { + type Output = Float4; + + #[inline(always)] + fn mul(self, other: f32) -> Float4 { + self * Float4::splat(other) + } + } + + impl MulAssign for Float4 { + #[inline(always)] + fn mul_assign(&mut self, rhs: Float4) { + *self = *self * rhs; + } + } + + impl MulAssign for Float4 { + #[inline(always)] + fn mul_assign(&mut self, rhs: f32) { + *self = *self * rhs; + } + } + + impl Div for Float4 { + type Output = Float4; + + #[inline(always)] + fn div(self, other: Float4) -> Float4 { + use std::arch::x86_64::_mm_div_ps; + Float4 { + data: unsafe { _mm_div_ps(self.data, other.data) }, + } + } + } + + impl Div for Float4 { + type Output = Float4; + + #[inline(always)] + fn div(self, other: f32) -> Float4 { + self / Float4::splat(other) + } + } + + impl DivAssign for Float4 { + #[inline(always)] + fn div_assign(&mut self, rhs: Float4) { + *self = *self / rhs; + } + } + + impl DivAssign for Float4 { + #[inline(always)] + fn div_assign(&mut self, rhs: f32) { + *self = *self / rhs; + } + } + + #[inline(always)] + pub fn v_min(a: Float4, b: Float4) -> Float4 { + a.v_min(b) + } + + #[inline(always)] + pub fn v_max(a: Float4, b: Float4) -> Float4 { + a.v_max(b) + } + + /// Essentially a tuple of four bools, which will use SIMD operations + /// where possible on a platform. + #[derive(Debug, Copy, Clone)] + pub struct Bool4 { + data: __m128, + } + + impl Bool4 { + /// Returns the value of the nth element. + #[inline(always)] + pub fn get_n(&self, n: usize) -> bool { + use std::mem::transmute; + assert!( + n <= 3, + "Attempted to access element of Bool4 outside of bounds." + ); + + 0 != unsafe { *transmute::<*const __m128, *const u32>(&self.data).offset(n as isize) } + } + + /// Returns the value of the 0th element. + #[inline(always)] + pub fn get_0(&self) -> bool { + self.get_n(0) + } + + /// Returns the value of the 1th element. + #[inline(always)] + pub fn get_1(&self) -> bool { + self.get_n(1) + } + + /// Returns the value of the 2th element. + #[inline(always)] + pub fn get_2(&self) -> bool { + self.get_n(2) + } + + /// Returns the value of the 3th element. + #[inline(always)] + pub fn get_3(&self) -> bool { + self.get_n(3) + } + + #[inline] + pub fn to_bitmask(&self) -> u8 { + use std::mem::transmute; + let a = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(0) }; + let b = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(4) }; + let c = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(8) }; + let d = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(12) }; + (a & 0b00000001) | (b & 0b00000010) | (c & 0b00000100) | (d & 0b00001000) + } + } + + impl BitAnd for Bool4 { + type Output = Bool4; + + #[inline(always)] + fn bitand(self, rhs: Bool4) -> Bool4 { + use std::arch::x86_64::_mm_and_ps; + Bool4 { + data: unsafe { _mm_and_ps(self.data, rhs.data) }, + } + } + } + + impl BitOr for Bool4 { + type Output = Bool4; + + #[inline(always)] + fn bitor(self, rhs: Bool4) -> Bool4 { + use std::arch::x86_64::_mm_or_ps; + Bool4 { + data: unsafe { _mm_or_ps(self.data, rhs.data) }, + } + } + } +} + +//=========================================================================== + +/// Implementation fo Float4 for any platform, foregoing any +/// platform-specific optimizations. +mod fallback { + use std::cmp::PartialEq; + use std::ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; + + #[derive(Debug, Copy, Clone)] + pub struct Float4 { + data: [f32; 4], + } + + impl Float4 { + #[inline(always)] + pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { Float4 { data: [a, b, c, d] } } - } - #[inline(always)] - pub fn splat(n: f32) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: f32x4::splat(n), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn splat(n: f32) -> Float4 { Float4 { data: [n, n, n, n] } } - } - #[inline] - pub fn h_sum(&self) -> f32 { - (self.get_0() + self.get_1()) + (self.get_2() + self.get_3()) - } - - #[inline] - pub fn h_product(&self) -> f32 { - (self.get_0() * self.get_1()) * (self.get_2() * self.get_3()) - } - - #[inline] - pub fn h_min(&self) -> f32 { - let n1 = if self.get_0() < self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() < self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 < n2 { - n1 - } else { - n2 + #[inline] + pub fn h_sum(&self) -> f32 { + (self.get_0() + self.get_1()) + (self.get_2() + self.get_3()) } - } - #[inline] - pub fn h_max(&self) -> f32 { - let n1 = if self.get_0() > self.get_1() { - self.get_0() - } else { - self.get_1() - }; - let n2 = if self.get_2() > self.get_3() { - self.get_2() - } else { - self.get_3() - }; - if n1 > n2 { - n1 - } else { - n2 + #[inline] + pub fn h_product(&self) -> f32 { + (self.get_0() * self.get_1()) * (self.get_2() * self.get_3()) } - } - #[inline(always)] - pub fn v_min(&self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data.min(other.data), + #[inline] + pub fn h_min(&self) -> f32 { + let n1 = if self.get_0() < self.get_1() { + self.get_0() + } else { + self.get_1() + }; + let n2 = if self.get_2() < self.get_3() { + self.get_2() + } else { + self.get_3() + }; + if n1 < n2 { + n1 + } else { + n2 } } - #[cfg(not(feature = "simd_perf"))] - { + + #[inline] + pub fn h_max(&self) -> f32 { + let n1 = if self.get_0() > self.get_1() { + self.get_0() + } else { + self.get_1() + }; + let n2 = if self.get_2() > self.get_3() { + self.get_2() + } else { + self.get_3() + }; + if n1 > n2 { + n1 + } else { + n2 + } + } + + #[inline(always)] + pub fn v_min(&self, other: Float4) -> Float4 { Float4::new( if self.get_0() < other.get_0() { self.get_0() @@ -133,18 +503,9 @@ impl Float4 { }, ) } - } - #[inline(always)] - pub fn v_max(&self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data.max(other.data), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn v_max(&self, other: Float4) -> Float4 { Float4::new( if self.get_0() > other.get_0() { self.get_0() @@ -168,18 +529,9 @@ impl Float4 { }, ) } - } - #[inline(always)] - pub fn lt(&self, other: Float4) -> Bool4 { - #[cfg(feature = "simd_perf")] - { - Bool4 { - data: self.data.lt(other.data), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn lt(&self, other: Float4) -> Bool4 { Bool4 { data: [ self.data[0] < other.data[0], @@ -189,18 +541,9 @@ impl Float4 { ], } } - } - #[inline(always)] - pub fn lte(&self, other: Float4) -> Bool4 { - #[cfg(feature = "simd_perf")] - { - Bool4 { - data: self.data.le(other.data), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn lte(&self, other: Float4) -> Bool4 { Bool4 { data: [ self.data[0] <= other.data[0], @@ -210,18 +553,9 @@ impl Float4 { ], } } - } - #[inline(always)] - pub fn gt(&self, other: Float4) -> Bool4 { - #[cfg(feature = "simd_perf")] - { - Bool4 { - data: self.data.gt(other.data), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn gt(&self, other: Float4) -> Bool4 { Bool4 { data: [ self.data[0] > other.data[0], @@ -231,18 +565,9 @@ impl Float4 { ], } } - } - #[inline(always)] - pub fn gte(&self, other: Float4) -> Bool4 { - #[cfg(feature = "simd_perf")] - { - Bool4 { - data: self.data.ge(other.data), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + pub fn gte(&self, other: Float4) -> Bool4 { Bool4 { data: [ self.data[0] >= other.data[0], @@ -252,110 +577,93 @@ impl Float4 { ], } } - } - /// Set the nth element to the given value. - #[inline(always)] - pub fn set_n(&mut self, n: usize, v: f32) { - assert!( - n <= 3, - "Attempted to set element of Float4 outside of bounds." - ); - #[cfg(feature = "simd_perf")] - { - self.data = self.data.replace(n as u32, v); - } - #[cfg(not(feature = "simd_perf"))] - unsafe { - *self.data.get_unchecked_mut(n) = v; - } - } - - /// Set the 0th element to the given value. - #[inline(always)] - pub fn set_0(&mut self, v: f32) { - self.set_n(0, v); - } - - /// Set the 1th element to the given value. - #[inline(always)] - pub fn set_1(&mut self, v: f32) { - self.set_n(1, v); - } - - /// Set the 2th element to the given value. - #[inline(always)] - pub fn set_2(&mut self, v: f32) { - self.set_n(2, v); - } - - /// Set the 3th element to the given value. - #[inline(always)] - pub fn set_3(&mut self, v: f32) { - self.set_n(3, v); - } - - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(&self, n: usize) -> f32 { - assert!( - n <= 3, - "Attempted to access element of Float4 outside of bounds." - ); - #[cfg(feature = "simd_perf")] - { - self.data.extract(n as u32) - } - #[cfg(not(feature = "simd_perf"))] - unsafe { *self.data.get_unchecked(n) } - } - - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(&self) -> f32 { - self.get_n(0) - } - - /// Returns the value of the 1th element. - #[inline(always)] - pub fn get_1(&self) -> f32 { - self.get_n(1) - } - - /// Returns the value of the 2th element. - #[inline(always)] - pub fn get_2(&self) -> f32 { - self.get_n(2) - } - - /// Returns the value of the 3th element. - #[inline(always)] - pub fn get_3(&self) -> f32 { - self.get_n(3) - } -} - -impl PartialEq for Float4 { - #[inline] - fn eq(&self, other: &Float4) -> bool { - self.get_0() == other.get_0() && self.get_1() == other.get_1() - && self.get_2() == other.get_2() && self.get_3() == other.get_3() - } -} - -impl Add for Float4 { - type Output = Float4; - - #[inline(always)] - fn add(self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data + other.data, + /// Set the nth element to the given value. + #[inline(always)] + pub fn set_n(&mut self, n: usize, v: f32) { + assert!( + n <= 3, + "Attempted to set element of Float4 outside of bounds." + ); + unsafe { + *self.data.get_unchecked_mut(n) = v; } } - #[cfg(not(feature = "simd_perf"))] - { + + /// Set the 0th element to the given value. + #[inline(always)] + pub fn set_0(&mut self, v: f32) { + self.set_n(0, v); + } + + /// Set the 1th element to the given value. + #[inline(always)] + pub fn set_1(&mut self, v: f32) { + self.set_n(1, v); + } + + /// Set the 2th element to the given value. + #[inline(always)] + pub fn set_2(&mut self, v: f32) { + self.set_n(2, v); + } + + /// Set the 3th element to the given value. + #[inline(always)] + pub fn set_3(&mut self, v: f32) { + self.set_n(3, v); + } + + /// Returns the value of the nth element. + #[inline(always)] + pub fn get_n(&self, n: usize) -> f32 { + assert!( + n <= 3, + "Attempted to access element of Float4 outside of bounds." + ); + unsafe { *self.data.get_unchecked(n) } + } + + /// Returns the value of the 0th element. + #[inline(always)] + pub fn get_0(&self) -> f32 { + self.get_n(0) + } + + /// Returns the value of the 1th element. + #[inline(always)] + pub fn get_1(&self) -> f32 { + self.get_n(1) + } + + /// Returns the value of the 2th element. + #[inline(always)] + pub fn get_2(&self) -> f32 { + self.get_n(2) + } + + /// Returns the value of the 3th element. + #[inline(always)] + pub fn get_3(&self) -> f32 { + self.get_n(3) + } + } + + impl PartialEq for Float4 { + #[inline] + fn eq(&self, other: &Float4) -> bool { + self.get_0() == other.get_0() + && self.get_1() == other.get_1() + && self.get_2() == other.get_2() + && self.get_3() == other.get_3() + } + } + + impl Add for Float4 { + type Output = Float4; + + #[inline(always)] + fn add(self, other: Float4) -> Float4 { Float4 { data: [ self.get_0() + other.get_0(), @@ -366,28 +674,19 @@ impl Add for Float4 { } } } -} -impl AddAssign for Float4 { - #[inline(always)] - fn add_assign(&mut self, rhs: Float4) { - *self = *self + rhs; - } -} - -impl Sub for Float4 { - type Output = Float4; - - #[inline(always)] - fn sub(self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data - other.data, - } + impl AddAssign for Float4 { + #[inline(always)] + fn add_assign(&mut self, rhs: Float4) { + *self = *self + rhs; } - #[cfg(not(feature = "simd_perf"))] - { + } + + impl Sub for Float4 { + type Output = Float4; + + #[inline(always)] + fn sub(self, other: Float4) -> Float4 { Float4 { data: [ self.get_0() - other.get_0(), @@ -398,28 +697,19 @@ impl Sub for Float4 { } } } -} -impl SubAssign for Float4 { - #[inline(always)] - fn sub_assign(&mut self, rhs: Float4) { - *self = *self - rhs; - } -} - -impl Mul for Float4 { - type Output = Float4; - - #[inline(always)] - fn mul(self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data * other.data, - } + impl SubAssign for Float4 { + #[inline(always)] + fn sub_assign(&mut self, rhs: Float4) { + *self = *self - rhs; } - #[cfg(not(feature = "simd_perf"))] - { + } + + impl Mul for Float4 { + type Output = Float4; + + #[inline(always)] + fn mul(self, other: Float4) -> Float4 { Float4 { data: [ self.get_0() * other.get_0(), @@ -430,21 +720,12 @@ impl Mul for Float4 { } } } -} -impl Mul for Float4 { - type Output = Float4; + impl Mul for Float4 { + type Output = Float4; - #[inline(always)] - fn mul(self, other: f32) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data * f32x4::splat(other), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + fn mul(self, other: f32) -> Float4 { Float4 { data: [ self.get_0() * other, @@ -455,35 +736,26 @@ impl Mul for Float4 { } } } -} -impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: Float4) { - *self = *self * rhs; - } -} - -impl MulAssign for Float4 { - #[inline(always)] - fn mul_assign(&mut self, rhs: f32) { - *self = *self * rhs; - } -} - -impl Div for Float4 { - type Output = Float4; - - #[inline(always)] - fn div(self, other: Float4) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data / other.data, - } + impl MulAssign for Float4 { + #[inline(always)] + fn mul_assign(&mut self, rhs: Float4) { + *self = *self * rhs; } - #[cfg(not(feature = "simd_perf"))] - { + } + + impl MulAssign for Float4 { + #[inline(always)] + fn mul_assign(&mut self, rhs: f32) { + *self = *self * rhs; + } + } + + impl Div for Float4 { + type Output = Float4; + + #[inline(always)] + fn div(self, other: Float4) -> Float4 { Float4 { data: [ self.get_0() / other.get_0(), @@ -494,21 +766,12 @@ impl Div for Float4 { } } } -} -impl Div for Float4 { - type Output = Float4; + impl Div for Float4 { + type Output = Float4; - #[inline(always)] - fn div(self, other: f32) -> Float4 { - #[cfg(feature = "simd_perf")] - { - Float4 { - data: self.data / f32x4::splat(other), - } - } - #[cfg(not(feature = "simd_perf"))] - { + #[inline(always)] + fn div(self, other: f32) -> Float4 { Float4 { data: [ self.get_0() / other, @@ -519,108 +782,94 @@ impl Div for Float4 { } } } -} -impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: Float4) { - *self = *self / rhs; - } -} - -impl DivAssign for Float4 { - #[inline(always)] - fn div_assign(&mut self, rhs: f32) { - *self = *self / rhs; - } -} - -#[inline(always)] -pub fn v_min(a: Float4, b: Float4) -> Float4 { - a.v_min(b) -} - -#[inline(always)] -pub fn v_max(a: Float4, b: Float4) -> Float4 { - a.v_max(b) -} - -/// Essentially a tuple of four bools, which will use SIMD operations -/// where possible on a platform. -#[cfg(feature = "simd_perf")] -#[derive(Debug, Copy, Clone)] -pub struct Bool4 { - data: bool32fx4, -} - -#[cfg(not(feature = "simd_perf"))] -#[derive(Debug, Copy, Clone)] -pub struct Bool4 { - data: [bool; 4], -} - -impl Bool4 { - /// Returns the value of the nth element. - #[inline(always)] - pub fn get_n(&self, n: usize) -> bool { - assert!( - n <= 3, - "Attempted to access element of Bool4 outside of bounds." - ); - #[cfg(feature = "simd_perf")] - { - self.data.extract(n as u32) + impl DivAssign for Float4 { + #[inline(always)] + fn div_assign(&mut self, rhs: Float4) { + *self = *self / rhs; } - #[cfg(not(feature = "simd_perf"))] - { + } + + impl DivAssign for Float4 { + #[inline(always)] + fn div_assign(&mut self, rhs: f32) { + *self = *self / rhs; + } + } + + #[inline(always)] + pub fn v_min(a: Float4, b: Float4) -> Float4 { + a.v_min(b) + } + + #[inline(always)] + pub fn v_max(a: Float4, b: Float4) -> Float4 { + a.v_max(b) + } + + /// Essentially a tuple of four bools, which will use SIMD operations + /// where possible on a platform. + #[cfg(feature = "simd_perf")] + #[derive(Debug, Copy, Clone)] + pub struct Bool4 { + data: bool32fx4, + } + + #[cfg(not(feature = "simd_perf"))] + #[derive(Debug, Copy, Clone)] + pub struct Bool4 { + data: [bool; 4], + } + + impl Bool4 { + /// Returns the value of the nth element. + #[inline(always)] + pub fn get_n(&self, n: usize) -> bool { + assert!( + n <= 3, + "Attempted to access element of Bool4 outside of bounds." + ); unsafe { *self.data.get_unchecked(n) } } - } - /// Returns the value of the 0th element. - #[inline(always)] - pub fn get_0(&self) -> bool { - self.get_n(0) - } - - /// Returns the value of the 1th element. - #[inline(always)] - pub fn get_1(&self) -> bool { - self.get_n(1) - } - - /// Returns the value of the 2th element. - #[inline(always)] - pub fn get_2(&self) -> bool { - self.get_n(2) - } - - /// Returns the value of the 3th element. - #[inline(always)] - pub fn get_3(&self) -> bool { - self.get_n(3) - } - - #[inline] - pub fn to_bitmask(&self) -> u8 { - (self.get_0() as u8) | ((self.get_1() as u8) << 1) | ((self.get_2() as u8) << 2) - | ((self.get_3() as u8) << 3) - } -} - -impl BitAnd for Bool4 { - type Output = Bool4; - - #[inline(always)] - fn bitand(self, rhs: Bool4) -> Bool4 { - #[cfg(feature = "simd_perf")] - { - Bool4 { - data: self.data & rhs.data, - } + /// Returns the value of the 0th element. + #[inline(always)] + pub fn get_0(&self) -> bool { + self.get_n(0) } - #[cfg(not(feature = "simd_perf"))] - { + + /// Returns the value of the 1th element. + #[inline(always)] + pub fn get_1(&self) -> bool { + self.get_n(1) + } + + /// Returns the value of the 2th element. + #[inline(always)] + pub fn get_2(&self) -> bool { + self.get_n(2) + } + + /// Returns the value of the 3th element. + #[inline(always)] + pub fn get_3(&self) -> bool { + self.get_n(3) + } + + #[inline] + pub fn to_bitmask(&self) -> u8 { + (self.get_0() as u8) + | ((self.get_1() as u8) << 1) + | ((self.get_2() as u8) << 2) + | ((self.get_3() as u8) << 3) + } + } + + impl BitAnd for Bool4 { + type Output = Bool4; + + #[inline(always)] + fn bitand(self, rhs: Bool4) -> Bool4 { Bool4 { data: [ self.data[0] && rhs.data[0], @@ -631,8 +880,34 @@ impl BitAnd for Bool4 { } } } + + impl BitOr for Bool4 { + type Output = Bool4; + + #[inline(always)] + fn bitor(self, rhs: Bool4) -> Bool4 { + Bool4 { + data: [ + self.data[0] || rhs.data[0], + self.data[1] || rhs.data[1], + self.data[2] || rhs.data[2], + self.data[3] || rhs.data[3], + ], + } + } + } } +//=========================================================================== + +#[cfg(all(target_arch = "x86_64", target_feature = "sse"))] +pub use x86_64_sse::{v_max, v_min, Bool4, Float4}; + +#[cfg(not(all(target_arch = "x86_64", target_feature = "sse")))] +pub use fallback::{v_max, v_min, Bool4, Float4}; + +//=========================================================================== + #[cfg(test)] mod tests { use super::*; @@ -778,4 +1053,57 @@ mod tests { assert_eq!(f1 / v, f2); } + + #[test] + fn lt() { + let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); + let f2 = Float4::new(0.5, 2.0, 3.5, 2.0); + + let r = f1.lt(f2); + + assert_eq!(r.get_0(), false); + assert_eq!(r.get_1(), false); + assert_eq!(r.get_2(), true); + assert_eq!(r.get_3(), false); + } + + #[test] + fn gt() { + let f1 = Float4::new(1.0, 2.0, 3.0, 4.0); + let f2 = Float4::new(0.5, 2.0, 3.5, 2.0); + + let r = f1.gt(f2); + + assert_eq!(r.get_0(), true); + assert_eq!(r.get_1(), false); + assert_eq!(r.get_2(), false); + assert_eq!(r.get_3(), true); + } + + #[test] + fn bool4_bitmask_01() { + let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); + let f2 = Float4::new(-1.0, -1.0, 1.0, -1.0); + let r = f1.lt(f2).to_bitmask(); + + assert_eq!(r, 0b00000100); + } + + #[test] + fn bool4_bitmask_02() { + let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); + let f2 = Float4::new(1.0, -1.0, 1.0, -1.0); + let r = f1.lt(f2).to_bitmask(); + + assert_eq!(r, 0b00000101); + } + + #[test] + fn bool4_bitmask_03() { + let f1 = Float4::new(0.0, 0.0, 0.0, 0.0); + let f2 = Float4::new(-1.0, 1.0, -1.0, 1.0); + let r = f1.lt(f2).to_bitmask(); + + assert_eq!(r, 0b00001010); + } } diff --git a/sub_crates/math3d/Cargo.toml b/sub_crates/math3d/Cargo.toml index de115e4..53c875f 100644 --- a/sub_crates/math3d/Cargo.toml +++ b/sub_crates/math3d/Cargo.toml @@ -8,9 +8,6 @@ license = "MIT" name = "math3d" path = "src/lib.rs" -[features] -simd_perf = ["float4/simd_perf"] - # Local crate dependencies [dependencies.float4] path = "../float4" \ No newline at end of file