Switch to stable SIMD intrinsics.

Rust 1.27 stablized a variety of cpu intrinsics, including SIMD
on x86/64 platforms.  This commit moves to using those intrinsics
for the optimized Float4 implementation.  This means Psychopath
now compiles on stable Rust with all optimizations.  Yay!
This commit is contained in:
Nathan Vegdahl 2018-06-24 15:32:09 -07:00
parent d92ae4b2d7
commit 27d1b2286b
5 changed files with 766 additions and 460 deletions

9
Cargo.lock generated
View File

@ -67,9 +67,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "float4" name = "float4"
version = "0.1.0" version = "0.1.0"
dependencies = [
"simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "half" name = "half"
@ -198,11 +195,6 @@ name = "scoped_threadpool"
version = "0.1.9" version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "simd"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "sobol" name = "sobol"
version = "0.1.0" version = "0.1.0"
@ -296,7 +288,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" "checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
"checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" "checksum scoped_threadpool 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
"checksum simd 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3dd0805c7363ab51a829a1511ad24b6ed0349feaa756c4bc2f977f9f496e6673"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
"checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693"

View File

@ -15,9 +15,6 @@ name = "psychopath"
version = "0.1.0" version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"] authors = ["Nathan Vegdahl <cessen@cessen.com>"]
[features]
simd_perf = ["float4/simd_perf", "math3d/simd_perf"]
[profile.release] [profile.release]
debug = true debug = true

View File

@ -7,10 +7,3 @@ license = "MIT"
[lib] [lib]
name = "float4" name = "float4"
path = "src/lib.rs" path = "src/lib.rs"
[features]
simd_perf = ["simd"]
[dependencies]
# Crates.io dependencies
simd = { version = "0.2.1", optional = true }

View File

@ -1,54 +1,31 @@
#![allow(dead_code)] #![allow(dead_code)]
#[cfg(feature = "simd_perf")] /// Implementation of Float4 for x86_64 platforms with sse support
extern crate simd; #[cfg(all(target_arch = "x86_64", target_feature = "sse"))]
mod x86_64_sse {
use std::arch::x86_64::__m128;
use std::cmp::PartialEq;
use std::ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign};
use std::cmp::PartialEq; #[derive(Debug, Copy, Clone)]
use std::ops::{Add, AddAssign, BitAnd, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; pub struct Float4 {
data: __m128,
}
#[cfg(feature = "simd_perf")] impl Float4 {
use simd::{bool32fx4, f32x4};
/// Essentially a tuple of four floats, which will use SIMD operations
/// where possible on a platform.
#[cfg(feature = "simd_perf")]
#[derive(Debug, Copy, Clone)]
pub struct Float4 {
data: f32x4,
}
#[cfg(not(feature = "simd_perf"))]
#[derive(Debug, Copy, Clone)]
pub struct Float4 {
data: [f32; 4],
}
impl Float4 {
#[inline(always)] #[inline(always)]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
#[cfg(feature = "simd_perf")] use std::arch::x86_64::_mm_set_ps;
{
Float4 { Float4 {
data: f32x4::new(a, b, c, d), data: unsafe { _mm_set_ps(d, c, b, a) },
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { data: [a, b, c, d] }
} }
} }
#[inline(always)] #[inline(always)]
pub fn splat(n: f32) -> Float4 { pub fn splat(n: f32) -> Float4 {
#[cfg(feature = "simd_perf")] use std::arch::x86_64::_mm_set1_ps;
{
Float4 { Float4 {
data: f32x4::splat(n), data: unsafe { _mm_set1_ps(n) },
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { data: [n, n, n, n] }
} }
} }
@ -102,14 +79,407 @@ impl Float4 {
#[inline(always)] #[inline(always)]
pub fn v_min(&self, other: Float4) -> Float4 { pub fn v_min(&self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")] use std::arch::x86_64::_mm_min_ps;
{
Float4 { Float4 {
data: self.data.min(other.data), data: unsafe { _mm_min_ps(self.data, other.data) },
} }
} }
#[cfg(not(feature = "simd_perf"))]
{ #[inline(always)]
pub fn v_max(&self, other: Float4) -> Float4 {
use std::arch::x86_64::_mm_max_ps;
Float4 {
data: unsafe { _mm_max_ps(self.data, other.data) },
}
}
#[inline(always)]
pub fn lt(&self, other: Float4) -> Bool4 {
use std::arch::x86_64::_mm_cmplt_ps;
Bool4 {
data: unsafe { _mm_cmplt_ps(self.data, other.data) },
}
}
#[inline(always)]
pub fn lte(&self, other: Float4) -> Bool4 {
use std::arch::x86_64::_mm_cmple_ps;
Bool4 {
data: unsafe { _mm_cmple_ps(self.data, other.data) },
}
}
#[inline(always)]
pub fn gt(&self, other: Float4) -> Bool4 {
use std::arch::x86_64::_mm_cmpgt_ps;
Bool4 {
data: unsafe { _mm_cmpgt_ps(self.data, other.data) },
}
}
#[inline(always)]
pub fn gte(&self, other: Float4) -> Bool4 {
use std::arch::x86_64::_mm_cmpge_ps;
Bool4 {
data: unsafe { _mm_cmpge_ps(self.data, other.data) },
}
}
/// Set the nth element to the given value.
#[inline(always)]
pub fn set_n(&mut self, n: usize, v: f32) {
use std::mem::transmute;
assert!(
n <= 3,
"Attempted to set element of Float4 outside of bounds."
);
unsafe { *transmute::<*mut __m128, *mut f32>(&mut self.data).offset(n as isize) = v }
}
/// Set the 0th element to the given value.
#[inline(always)]
pub fn set_0(&mut self, v: f32) {
self.set_n(0, v);
}
/// Set the 1th element to the given value.
#[inline(always)]
pub fn set_1(&mut self, v: f32) {
self.set_n(1, v);
}
/// Set the 2th element to the given value.
#[inline(always)]
pub fn set_2(&mut self, v: f32) {
self.set_n(2, v);
}
/// Set the 3th element to the given value.
#[inline(always)]
pub fn set_3(&mut self, v: f32) {
self.set_n(3, v);
}
/// Returns the value of the nth element.
#[inline(always)]
pub fn get_n(&self, n: usize) -> f32 {
use std::mem::transmute;
assert!(
n <= 3,
"Attempted to access element of Float4 outside of bounds."
);
unsafe { *transmute::<*const __m128, *const f32>(&self.data).offset(n as isize) }
}
/// Returns the value of the 0th element.
#[inline(always)]
pub fn get_0(&self) -> f32 {
self.get_n(0)
}
/// Returns the value of the 1th element.
#[inline(always)]
pub fn get_1(&self) -> f32 {
self.get_n(1)
}
/// Returns the value of the 2th element.
#[inline(always)]
pub fn get_2(&self) -> f32 {
self.get_n(2)
}
/// Returns the value of the 3th element.
#[inline(always)]
pub fn get_3(&self) -> f32 {
self.get_n(3)
}
}
impl PartialEq for Float4 {
#[inline]
fn eq(&self, other: &Float4) -> bool {
self.get_0() == other.get_0()
&& self.get_1() == other.get_1()
&& self.get_2() == other.get_2()
&& self.get_3() == other.get_3()
}
}
impl Add for Float4 {
type Output = Float4;
#[inline(always)]
fn add(self, other: Float4) -> Float4 {
use std::arch::x86_64::_mm_add_ps;
Float4 {
data: unsafe { _mm_add_ps(self.data, other.data) },
}
}
}
impl AddAssign for Float4 {
#[inline(always)]
fn add_assign(&mut self, rhs: Float4) {
*self = *self + rhs;
}
}
impl Sub for Float4 {
type Output = Float4;
#[inline(always)]
fn sub(self, other: Float4) -> Float4 {
use std::arch::x86_64::_mm_sub_ps;
Float4 {
data: unsafe { _mm_sub_ps(self.data, other.data) },
}
}
}
impl SubAssign for Float4 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Float4) {
*self = *self - rhs;
}
}
impl Mul for Float4 {
type Output = Float4;
#[inline(always)]
fn mul(self, other: Float4) -> Float4 {
use std::arch::x86_64::_mm_mul_ps;
Float4 {
data: unsafe { _mm_mul_ps(self.data, other.data) },
}
}
}
impl Mul<f32> for Float4 {
type Output = Float4;
#[inline(always)]
fn mul(self, other: f32) -> Float4 {
self * Float4::splat(other)
}
}
impl MulAssign for Float4 {
#[inline(always)]
fn mul_assign(&mut self, rhs: Float4) {
*self = *self * rhs;
}
}
impl MulAssign<f32> for Float4 {
#[inline(always)]
fn mul_assign(&mut self, rhs: f32) {
*self = *self * rhs;
}
}
impl Div for Float4 {
type Output = Float4;
#[inline(always)]
fn div(self, other: Float4) -> Float4 {
use std::arch::x86_64::_mm_div_ps;
Float4 {
data: unsafe { _mm_div_ps(self.data, other.data) },
}
}
}
impl Div<f32> for Float4 {
type Output = Float4;
#[inline(always)]
fn div(self, other: f32) -> Float4 {
self / Float4::splat(other)
}
}
impl DivAssign for Float4 {
#[inline(always)]
fn div_assign(&mut self, rhs: Float4) {
*self = *self / rhs;
}
}
impl DivAssign<f32> for Float4 {
#[inline(always)]
fn div_assign(&mut self, rhs: f32) {
*self = *self / rhs;
}
}
#[inline(always)]
pub fn v_min(a: Float4, b: Float4) -> Float4 {
a.v_min(b)
}
#[inline(always)]
pub fn v_max(a: Float4, b: Float4) -> Float4 {
a.v_max(b)
}
/// Essentially a tuple of four bools, which will use SIMD operations
/// where possible on a platform.
#[derive(Debug, Copy, Clone)]
pub struct Bool4 {
data: __m128,
}
impl Bool4 {
/// Returns the value of the nth element.
#[inline(always)]
pub fn get_n(&self, n: usize) -> bool {
use std::mem::transmute;
assert!(
n <= 3,
"Attempted to access element of Bool4 outside of bounds."
);
0 != unsafe { *transmute::<*const __m128, *const u32>(&self.data).offset(n as isize) }
}
/// Returns the value of the 0th element.
#[inline(always)]
pub fn get_0(&self) -> bool {
self.get_n(0)
}
/// Returns the value of the 1th element.
#[inline(always)]
pub fn get_1(&self) -> bool {
self.get_n(1)
}
/// Returns the value of the 2th element.
#[inline(always)]
pub fn get_2(&self) -> bool {
self.get_n(2)
}
/// Returns the value of the 3th element.
#[inline(always)]
pub fn get_3(&self) -> bool {
self.get_n(3)
}
#[inline]
pub fn to_bitmask(&self) -> u8 {
use std::mem::transmute;
let a = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(0) };
let b = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(4) };
let c = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(8) };
let d = unsafe { *transmute::<*const __m128, *const u8>(&self.data).offset(12) };
(a & 0b00000001) | (b & 0b00000010) | (c & 0b00000100) | (d & 0b00001000)
}
}
impl BitAnd for Bool4 {
type Output = Bool4;
#[inline(always)]
fn bitand(self, rhs: Bool4) -> Bool4 {
use std::arch::x86_64::_mm_and_ps;
Bool4 {
data: unsafe { _mm_and_ps(self.data, rhs.data) },
}
}
}
impl BitOr for Bool4 {
type Output = Bool4;
#[inline(always)]
fn bitor(self, rhs: Bool4) -> Bool4 {
use std::arch::x86_64::_mm_or_ps;
Bool4 {
data: unsafe { _mm_or_ps(self.data, rhs.data) },
}
}
}
}
//===========================================================================
/// Implementation fo Float4 for any platform, foregoing any
/// platform-specific optimizations.
mod fallback {
use std::cmp::PartialEq;
use std::ops::{Add, AddAssign, BitAnd, BitOr, Div, DivAssign, Mul, MulAssign, Sub, SubAssign};
#[derive(Debug, Copy, Clone)]
pub struct Float4 {
data: [f32; 4],
}
impl Float4 {
#[inline(always)]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
Float4 { data: [a, b, c, d] }
}
#[inline(always)]
pub fn splat(n: f32) -> Float4 {
Float4 { data: [n, n, n, n] }
}
#[inline]
pub fn h_sum(&self) -> f32 {
(self.get_0() + self.get_1()) + (self.get_2() + self.get_3())
}
#[inline]
pub fn h_product(&self) -> f32 {
(self.get_0() * self.get_1()) * (self.get_2() * self.get_3())
}
#[inline]
pub fn h_min(&self) -> f32 {
let n1 = if self.get_0() < self.get_1() {
self.get_0()
} else {
self.get_1()
};
let n2 = if self.get_2() < self.get_3() {
self.get_2()
} else {
self.get_3()
};
if n1 < n2 {
n1
} else {
n2
}
}
#[inline]
pub fn h_max(&self) -> f32 {
let n1 = if self.get_0() > self.get_1() {
self.get_0()
} else {
self.get_1()
};
let n2 = if self.get_2() > self.get_3() {
self.get_2()
} else {
self.get_3()
};
if n1 > n2 {
n1
} else {
n2
}
}
#[inline(always)]
pub fn v_min(&self, other: Float4) -> Float4 {
Float4::new( Float4::new(
if self.get_0() < other.get_0() { if self.get_0() < other.get_0() {
self.get_0() self.get_0()
@ -133,18 +503,9 @@ impl Float4 {
}, },
) )
} }
}
#[inline(always)] #[inline(always)]
pub fn v_max(&self, other: Float4) -> Float4 { pub fn v_max(&self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data.max(other.data),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4::new( Float4::new(
if self.get_0() > other.get_0() { if self.get_0() > other.get_0() {
self.get_0() self.get_0()
@ -168,18 +529,9 @@ impl Float4 {
}, },
) )
} }
}
#[inline(always)] #[inline(always)]
pub fn lt(&self, other: Float4) -> Bool4 { pub fn lt(&self, other: Float4) -> Bool4 {
#[cfg(feature = "simd_perf")]
{
Bool4 {
data: self.data.lt(other.data),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Bool4 { Bool4 {
data: [ data: [
self.data[0] < other.data[0], self.data[0] < other.data[0],
@ -189,18 +541,9 @@ impl Float4 {
], ],
} }
} }
}
#[inline(always)] #[inline(always)]
pub fn lte(&self, other: Float4) -> Bool4 { pub fn lte(&self, other: Float4) -> Bool4 {
#[cfg(feature = "simd_perf")]
{
Bool4 {
data: self.data.le(other.data),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Bool4 { Bool4 {
data: [ data: [
self.data[0] <= other.data[0], self.data[0] <= other.data[0],
@ -210,18 +553,9 @@ impl Float4 {
], ],
} }
} }
}
#[inline(always)] #[inline(always)]
pub fn gt(&self, other: Float4) -> Bool4 { pub fn gt(&self, other: Float4) -> Bool4 {
#[cfg(feature = "simd_perf")]
{
Bool4 {
data: self.data.gt(other.data),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Bool4 { Bool4 {
data: [ data: [
self.data[0] > other.data[0], self.data[0] > other.data[0],
@ -231,18 +565,9 @@ impl Float4 {
], ],
} }
} }
}
#[inline(always)] #[inline(always)]
pub fn gte(&self, other: Float4) -> Bool4 { pub fn gte(&self, other: Float4) -> Bool4 {
#[cfg(feature = "simd_perf")]
{
Bool4 {
data: self.data.ge(other.data),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Bool4 { Bool4 {
data: [ data: [
self.data[0] >= other.data[0], self.data[0] >= other.data[0],
@ -252,7 +577,6 @@ impl Float4 {
], ],
} }
} }
}
/// Set the nth element to the given value. /// Set the nth element to the given value.
#[inline(always)] #[inline(always)]
@ -261,11 +585,6 @@ impl Float4 {
n <= 3, n <= 3,
"Attempted to set element of Float4 outside of bounds." "Attempted to set element of Float4 outside of bounds."
); );
#[cfg(feature = "simd_perf")]
{
self.data = self.data.replace(n as u32, v);
}
#[cfg(not(feature = "simd_perf"))]
unsafe { unsafe {
*self.data.get_unchecked_mut(n) = v; *self.data.get_unchecked_mut(n) = v;
} }
@ -302,11 +621,6 @@ impl Float4 {
n <= 3, n <= 3,
"Attempted to access element of Float4 outside of bounds." "Attempted to access element of Float4 outside of bounds."
); );
#[cfg(feature = "simd_perf")]
{
self.data.extract(n as u32)
}
#[cfg(not(feature = "simd_perf"))]
unsafe { *self.data.get_unchecked(n) } unsafe { *self.data.get_unchecked(n) }
} }
@ -333,29 +647,23 @@ impl Float4 {
pub fn get_3(&self) -> f32 { pub fn get_3(&self) -> f32 {
self.get_n(3) self.get_n(3)
} }
} }
impl PartialEq for Float4 { impl PartialEq for Float4 {
#[inline] #[inline]
fn eq(&self, other: &Float4) -> bool { fn eq(&self, other: &Float4) -> bool {
self.get_0() == other.get_0() && self.get_1() == other.get_1() self.get_0() == other.get_0()
&& self.get_2() == other.get_2() && self.get_3() == other.get_3() && self.get_1() == other.get_1()
&& self.get_2() == other.get_2()
&& self.get_3() == other.get_3()
}
} }
}
impl Add for Float4 { impl Add for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn add(self, other: Float4) -> Float4 { fn add(self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data + other.data,
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() + other.get_0(), self.get_0() + other.get_0(),
@ -366,28 +674,19 @@ impl Add for Float4 {
} }
} }
} }
}
impl AddAssign for Float4 { impl AddAssign for Float4 {
#[inline(always)] #[inline(always)]
fn add_assign(&mut self, rhs: Float4) { fn add_assign(&mut self, rhs: Float4) {
*self = *self + rhs; *self = *self + rhs;
} }
} }
impl Sub for Float4 { impl Sub for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn sub(self, other: Float4) -> Float4 { fn sub(self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data - other.data,
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() - other.get_0(), self.get_0() - other.get_0(),
@ -398,28 +697,19 @@ impl Sub for Float4 {
} }
} }
} }
}
impl SubAssign for Float4 { impl SubAssign for Float4 {
#[inline(always)] #[inline(always)]
fn sub_assign(&mut self, rhs: Float4) { fn sub_assign(&mut self, rhs: Float4) {
*self = *self - rhs; *self = *self - rhs;
} }
} }
impl Mul for Float4 { impl Mul for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn mul(self, other: Float4) -> Float4 { fn mul(self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data * other.data,
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() * other.get_0(), self.get_0() * other.get_0(),
@ -430,21 +720,12 @@ impl Mul for Float4 {
} }
} }
} }
}
impl Mul<f32> for Float4 { impl Mul<f32> for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn mul(self, other: f32) -> Float4 { fn mul(self, other: f32) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data * f32x4::splat(other),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() * other, self.get_0() * other,
@ -455,35 +736,26 @@ impl Mul<f32> for Float4 {
} }
} }
} }
}
impl MulAssign for Float4 { impl MulAssign for Float4 {
#[inline(always)] #[inline(always)]
fn mul_assign(&mut self, rhs: Float4) { fn mul_assign(&mut self, rhs: Float4) {
*self = *self * rhs; *self = *self * rhs;
} }
} }
impl MulAssign<f32> for Float4 { impl MulAssign<f32> for Float4 {
#[inline(always)] #[inline(always)]
fn mul_assign(&mut self, rhs: f32) { fn mul_assign(&mut self, rhs: f32) {
*self = *self * rhs; *self = *self * rhs;
} }
} }
impl Div for Float4 { impl Div for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn div(self, other: Float4) -> Float4 { fn div(self, other: Float4) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data / other.data,
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() / other.get_0(), self.get_0() / other.get_0(),
@ -494,21 +766,12 @@ impl Div for Float4 {
} }
} }
} }
}
impl Div<f32> for Float4 { impl Div<f32> for Float4 {
type Output = Float4; type Output = Float4;
#[inline(always)] #[inline(always)]
fn div(self, other: f32) -> Float4 { fn div(self, other: f32) -> Float4 {
#[cfg(feature = "simd_perf")]
{
Float4 {
data: self.data / f32x4::splat(other),
}
}
#[cfg(not(feature = "simd_perf"))]
{
Float4 { Float4 {
data: [ data: [
self.get_0() / other, self.get_0() / other,
@ -519,47 +782,46 @@ impl Div<f32> for Float4 {
} }
} }
} }
}
impl DivAssign for Float4 { impl DivAssign for Float4 {
#[inline(always)] #[inline(always)]
fn div_assign(&mut self, rhs: Float4) { fn div_assign(&mut self, rhs: Float4) {
*self = *self / rhs; *self = *self / rhs;
} }
} }
impl DivAssign<f32> for Float4 { impl DivAssign<f32> for Float4 {
#[inline(always)] #[inline(always)]
fn div_assign(&mut self, rhs: f32) { fn div_assign(&mut self, rhs: f32) {
*self = *self / rhs; *self = *self / rhs;
} }
} }
#[inline(always)] #[inline(always)]
pub fn v_min(a: Float4, b: Float4) -> Float4 { pub fn v_min(a: Float4, b: Float4) -> Float4 {
a.v_min(b) a.v_min(b)
} }
#[inline(always)] #[inline(always)]
pub fn v_max(a: Float4, b: Float4) -> Float4 { pub fn v_max(a: Float4, b: Float4) -> Float4 {
a.v_max(b) a.v_max(b)
} }
/// Essentially a tuple of four bools, which will use SIMD operations /// Essentially a tuple of four bools, which will use SIMD operations
/// where possible on a platform. /// where possible on a platform.
#[cfg(feature = "simd_perf")] #[cfg(feature = "simd_perf")]
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Bool4 { pub struct Bool4 {
data: bool32fx4, data: bool32fx4,
} }
#[cfg(not(feature = "simd_perf"))] #[cfg(not(feature = "simd_perf"))]
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Bool4 { pub struct Bool4 {
data: [bool; 4], data: [bool; 4],
} }
impl Bool4 { impl Bool4 {
/// Returns the value of the nth element. /// Returns the value of the nth element.
#[inline(always)] #[inline(always)]
pub fn get_n(&self, n: usize) -> bool { pub fn get_n(&self, n: usize) -> bool {
@ -567,15 +829,8 @@ impl Bool4 {
n <= 3, n <= 3,
"Attempted to access element of Bool4 outside of bounds." "Attempted to access element of Bool4 outside of bounds."
); );
#[cfg(feature = "simd_perf")]
{
self.data.extract(n as u32)
}
#[cfg(not(feature = "simd_perf"))]
{
unsafe { *self.data.get_unchecked(n) } unsafe { *self.data.get_unchecked(n) }
} }
}
/// Returns the value of the 0th element. /// Returns the value of the 0th element.
#[inline(always)] #[inline(always)]
@ -603,24 +858,18 @@ impl Bool4 {
#[inline] #[inline]
pub fn to_bitmask(&self) -> u8 { pub fn to_bitmask(&self) -> u8 {
(self.get_0() as u8) | ((self.get_1() as u8) << 1) | ((self.get_2() as u8) << 2) (self.get_0() as u8)
| ((self.get_1() as u8) << 1)
| ((self.get_2() as u8) << 2)
| ((self.get_3() as u8) << 3) | ((self.get_3() as u8) << 3)
} }
} }
impl BitAnd for Bool4 { impl BitAnd for Bool4 {
type Output = Bool4; type Output = Bool4;
#[inline(always)] #[inline(always)]
fn bitand(self, rhs: Bool4) -> Bool4 { fn bitand(self, rhs: Bool4) -> Bool4 {
#[cfg(feature = "simd_perf")]
{
Bool4 {
data: self.data & rhs.data,
}
}
#[cfg(not(feature = "simd_perf"))]
{
Bool4 { Bool4 {
data: [ data: [
self.data[0] && rhs.data[0], self.data[0] && rhs.data[0],
@ -631,8 +880,34 @@ impl BitAnd for Bool4 {
} }
} }
} }
impl BitOr for Bool4 {
type Output = Bool4;
#[inline(always)]
fn bitor(self, rhs: Bool4) -> Bool4 {
Bool4 {
data: [
self.data[0] || rhs.data[0],
self.data[1] || rhs.data[1],
self.data[2] || rhs.data[2],
self.data[3] || rhs.data[3],
],
}
}
}
} }
//===========================================================================
#[cfg(all(target_arch = "x86_64", target_feature = "sse"))]
pub use x86_64_sse::{v_max, v_min, Bool4, Float4};
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse")))]
pub use fallback::{v_max, v_min, Bool4, Float4};
//===========================================================================
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -778,4 +1053,57 @@ mod tests {
assert_eq!(f1 / v, f2); assert_eq!(f1 / v, f2);
} }
#[test]
fn lt() {
let f1 = Float4::new(1.0, 2.0, 3.0, 4.0);
let f2 = Float4::new(0.5, 2.0, 3.5, 2.0);
let r = f1.lt(f2);
assert_eq!(r.get_0(), false);
assert_eq!(r.get_1(), false);
assert_eq!(r.get_2(), true);
assert_eq!(r.get_3(), false);
}
#[test]
fn gt() {
let f1 = Float4::new(1.0, 2.0, 3.0, 4.0);
let f2 = Float4::new(0.5, 2.0, 3.5, 2.0);
let r = f1.gt(f2);
assert_eq!(r.get_0(), true);
assert_eq!(r.get_1(), false);
assert_eq!(r.get_2(), false);
assert_eq!(r.get_3(), true);
}
#[test]
fn bool4_bitmask_01() {
let f1 = Float4::new(0.0, 0.0, 0.0, 0.0);
let f2 = Float4::new(-1.0, -1.0, 1.0, -1.0);
let r = f1.lt(f2).to_bitmask();
assert_eq!(r, 0b00000100);
}
#[test]
fn bool4_bitmask_02() {
let f1 = Float4::new(0.0, 0.0, 0.0, 0.0);
let f2 = Float4::new(1.0, -1.0, 1.0, -1.0);
let r = f1.lt(f2).to_bitmask();
assert_eq!(r, 0b00000101);
}
#[test]
fn bool4_bitmask_03() {
let f1 = Float4::new(0.0, 0.0, 0.0, 0.0);
let f2 = Float4::new(-1.0, 1.0, -1.0, 1.0);
let r = f1.lt(f2).to_bitmask();
assert_eq!(r, 0b00001010);
}
} }

View File

@ -8,9 +8,6 @@ license = "MIT"
name = "math3d" name = "math3d"
path = "src/lib.rs" path = "src/lib.rs"
[features]
simd_perf = ["float4/simd_perf"]
# Local crate dependencies # Local crate dependencies
[dependencies.float4] [dependencies.float4]
path = "../float4" path = "../float4"