Optional use of SIMD intrinsics, via feature in cargo.

This is disabled by default right now, because it only builds on
nightly.
This commit is contained in:
Nathan Vegdahl 2016-07-16 19:58:39 -07:00
parent 4db96bc758
commit e4c94d0c58
4 changed files with 147 additions and 1 deletions

6
Cargo.lock generated
View File

@ -10,6 +10,7 @@ dependencies = [
"quickersort 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "quickersort 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"scoped_threadpool 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "scoped_threadpool 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]] [[package]]
@ -131,6 +132,11 @@ name = "scoped_threadpool"
version = "0.1.7" version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "simd"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.3.0" version = "0.3.0"

View File

@ -3,6 +3,9 @@ name = "psychopath"
version = "0.1.0" version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"] authors = ["Nathan Vegdahl <cessen@cessen.com>"]
[features]
simd_perf = ["simd"]
[profile.release] [profile.release]
debug = true debug = true
@ -15,3 +18,5 @@ crossbeam = "0.2"
num_cpus = "0.2" num_cpus = "0.2"
quickersort = "2.0" quickersort = "2.0"
lodepng = "0.8" lodepng = "0.8"
simd = { version = "0.1.1", optional = true }

View File

@ -3,18 +3,42 @@
use std::ops::{Index, IndexMut, Add, Sub, Mul, Div}; use std::ops::{Index, IndexMut, Add, Sub, Mul, Div};
use std::cmp::PartialEq; use std::cmp::PartialEq;
#[cfg(feature = "simd_perf")]
use simd::f32x4;
/// Essentially a tuple of four floats, which will use SIMD operations /// Essentially a tuple of four floats, which will use SIMD operations
/// where possible on a platform. /// where possible on a platform.
#[cfg(feature = "simd_perf")]
#[derive(Debug, Copy, Clone)]
pub struct Float4 {
data: f32x4,
}
#[cfg(not(feature = "simd_perf"))]
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Float4 { pub struct Float4 {
data: [f32; 4], data: [f32; 4],
} }
impl Float4 { impl Float4 {
#[cfg(feature = "simd_perf")]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
Float4 { data: f32x4::new(a, b, c, d) }
}
#[cfg(not(feature = "simd_perf"))]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 { pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
Float4 { data: [a, b, c, d] } Float4 { data: [a, b, c, d] }
} }
#[cfg(feature = "simd_perf")]
pub fn splat(n: f32) -> Float4 {
Float4 { data: f32x4::splat(n) }
}
#[cfg(not(feature = "simd_perf"))]
pub fn splat(n: f32) -> Float4 {
Float4 { data: [n, n, n, n] }
}
pub fn h_sum(&self) -> f32 { pub fn h_sum(&self) -> f32 {
self.get_0() + self.get_1() + self.get_2() + self.get_3() self.get_0() + self.get_1() + self.get_2() + self.get_3()
} }
@ -59,6 +83,11 @@ impl Float4 {
} }
} }
#[cfg(feature = "simd_perf")]
pub fn v_min(&self, other: Float4) -> Float4 {
Float4 { data: self.data.min(other.data) }
}
#[cfg(not(feature = "simd_perf"))]
pub fn v_min(&self, other: Float4) -> Float4 { pub fn v_min(&self, other: Float4) -> Float4 {
Float4::new(if self.get_0() < other.get_0() { Float4::new(if self.get_0() < other.get_0() {
self.get_0() self.get_0()
@ -83,6 +112,11 @@ impl Float4 {
} }
#[cfg(feature = "simd_perf")]
pub fn v_max(&self, other: Float4) -> Float4 {
Float4 { data: self.data.max(other.data) }
}
#[cfg(not(feature = "simd_perf"))]
pub fn v_max(&self, other: Float4) -> Float4 { pub fn v_max(&self, other: Float4) -> Float4 {
Float4::new(if self.get_0() > other.get_0() { Float4::new(if self.get_0() > other.get_0() {
self.get_0() self.get_0()
@ -106,42 +140,90 @@ impl Float4 {
}) })
} }
/// Set the 0th element to the given value.
#[cfg(feature = "simd_perf")]
pub fn set_0(&mut self, n: f32) {
self.data = self.data.replace(0, n);
}
#[cfg(not(feature = "simd_perf"))]
pub fn set_0(&mut self, n: f32) { pub fn set_0(&mut self, n: f32) {
unsafe { unsafe {
*self.data.get_unchecked_mut(0) = n; *self.data.get_unchecked_mut(0) = n;
} }
} }
/// Set the 1th element to the given value.
#[cfg(feature = "simd_perf")]
pub fn set_1(&mut self, n: f32) {
self.data = self.data.replace(1, n);
}
#[cfg(not(feature = "simd_perf"))]
pub fn set_1(&mut self, n: f32) { pub fn set_1(&mut self, n: f32) {
unsafe { unsafe {
*self.data.get_unchecked_mut(1) = n; *self.data.get_unchecked_mut(1) = n;
} }
} }
/// Set the 2th element to the given value.
#[cfg(feature = "simd_perf")]
pub fn set_2(&mut self, n: f32) {
self.data = self.data.replace(2, n);
}
#[cfg(not(feature = "simd_perf"))]
pub fn set_2(&mut self, n: f32) { pub fn set_2(&mut self, n: f32) {
unsafe { unsafe {
*self.data.get_unchecked_mut(2) = n; *self.data.get_unchecked_mut(2) = n;
} }
} }
/// Set the 3th element to the given value.
#[cfg(feature = "simd_perf")]
pub fn set_3(&mut self, n: f32) {
self.data = self.data.replace(3, n);
}
#[cfg(not(feature = "simd_perf"))]
pub fn set_3(&mut self, n: f32) { pub fn set_3(&mut self, n: f32) {
unsafe { unsafe {
*self.data.get_unchecked_mut(3) = n; *self.data.get_unchecked_mut(3) = n;
} }
} }
/// Returns the value of the 0th element.
#[cfg(feature = "simd_perf")]
pub fn get_0(&self) -> f32 {
self.data.extract(0)
}
#[cfg(not(feature = "simd_perf"))]
pub fn get_0(&self) -> f32 { pub fn get_0(&self) -> f32 {
unsafe { *self.data.get_unchecked(0) } unsafe { *self.data.get_unchecked(0) }
} }
/// Returns the value of the 1th element.
#[cfg(feature = "simd_perf")]
pub fn get_1(&self) -> f32 {
self.data.extract(1)
}
#[cfg(not(feature = "simd_perf"))]
pub fn get_1(&self) -> f32 { pub fn get_1(&self) -> f32 {
unsafe { *self.data.get_unchecked(1) } unsafe { *self.data.get_unchecked(1) }
} }
/// Returns the value of the 2th element.
#[cfg(feature = "simd_perf")]
pub fn get_2(&self) -> f32 {
self.data.extract(2)
}
#[cfg(not(feature = "simd_perf"))]
pub fn get_2(&self) -> f32 { pub fn get_2(&self) -> f32 {
unsafe { *self.data.get_unchecked(2) } unsafe { *self.data.get_unchecked(2) }
} }
/// Returns the value of the 3th element.
#[cfg(feature = "simd_perf")]
pub fn get_3(&self) -> f32 {
self.data.extract(3)
}
#[cfg(not(feature = "simd_perf"))]
pub fn get_3(&self) -> f32 { pub fn get_3(&self) -> f32 {
unsafe { *self.data.get_unchecked(3) } unsafe { *self.data.get_unchecked(3) }
} }
@ -151,12 +233,31 @@ impl Float4 {
impl Index<usize> for Float4 { impl Index<usize> for Float4 {
type Output = f32; type Output = f32;
#[cfg(feature = "simd_perf")]
fn index(&self, index: usize) -> &f32 {
// TODO: this might not be correct! It works, but need to make sure
// to do this in a way with proper defined behavior.
use std::mem::transmute;
let vs: &[f32; 4] = unsafe { transmute(&self.data) };
&vs[index]
}
#[cfg(not(feature = "simd_perf"))]
fn index(&self, index: usize) -> &f32 { fn index(&self, index: usize) -> &f32 {
&self.data[index] &self.data[index]
} }
} }
impl IndexMut<usize> for Float4 { impl IndexMut<usize> for Float4 {
#[cfg(feature = "simd_perf")]
fn index_mut(&mut self, index: usize) -> &mut f32 {
// TODO: this might not be correct! It works, but need to make sure
// to do this in a way with proper defined behavior.
use std::mem::transmute;
let vs: &mut [f32; 4] = unsafe { transmute(&mut self.data) };
&mut vs[index]
}
#[cfg(not(feature = "simd_perf"))]
fn index_mut(&mut self, index: usize) -> &mut f32 { fn index_mut(&mut self, index: usize) -> &mut f32 {
&mut self.data[index] &mut self.data[index]
} }
@ -174,6 +275,11 @@ impl PartialEq for Float4 {
impl Add for Float4 { impl Add for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn add(self, other: Float4) -> Float4 {
Float4 { data: self.data + other.data }
}
#[cfg(not(feature = "simd_perf"))]
fn add(self, other: Float4) -> Float4 { fn add(self, other: Float4) -> Float4 {
Float4 { Float4 {
data: [self.get_0() + other.get_0(), data: [self.get_0() + other.get_0(),
@ -188,6 +294,11 @@ impl Add for Float4 {
impl Sub for Float4 { impl Sub for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn sub(self, other: Float4) -> Float4 {
Float4 { data: self.data - other.data }
}
#[cfg(not(feature = "simd_perf"))]
fn sub(self, other: Float4) -> Float4 { fn sub(self, other: Float4) -> Float4 {
Float4 { Float4 {
data: [self.get_0() - other.get_0(), data: [self.get_0() - other.get_0(),
@ -202,6 +313,11 @@ impl Sub for Float4 {
impl Mul for Float4 { impl Mul for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn mul(self, other: Float4) -> Float4 {
Float4 { data: self.data * other.data }
}
#[cfg(not(feature = "simd_perf"))]
fn mul(self, other: Float4) -> Float4 { fn mul(self, other: Float4) -> Float4 {
Float4 { Float4 {
data: [self.get_0() * other.get_0(), data: [self.get_0() * other.get_0(),
@ -215,6 +331,11 @@ impl Mul for Float4 {
impl Mul<f32> for Float4 { impl Mul<f32> for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn mul(self, other: f32) -> Float4 {
Float4 { data: self.data * f32x4::splat(other) }
}
#[cfg(not(feature = "simd_perf"))]
fn mul(self, other: f32) -> Float4 { fn mul(self, other: f32) -> Float4 {
Float4 { Float4 {
data: [self.get_0() * other, data: [self.get_0() * other,
@ -229,6 +350,11 @@ impl Mul<f32> for Float4 {
impl Div for Float4 { impl Div for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn div(self, other: Float4) -> Float4 {
Float4 { data: self.data / other.data }
}
#[cfg(not(feature = "simd_perf"))]
fn div(self, other: Float4) -> Float4 { fn div(self, other: Float4) -> Float4 {
Float4 { Float4 {
data: [self.get_0() / other.get_0(), data: [self.get_0() / other.get_0(),
@ -242,6 +368,11 @@ impl Div for Float4 {
impl Div<f32> for Float4 { impl Div<f32> for Float4 {
type Output = Float4; type Output = Float4;
#[cfg(feature = "simd_perf")]
fn div(self, other: f32) -> Float4 {
Float4 { data: self.data / f32x4::splat(other) }
}
#[cfg(not(feature = "simd_perf"))]
fn div(self, other: f32) -> Float4 { fn div(self, other: f32) -> Float4 {
Float4 { Float4 {
data: [self.get_0() / other, data: [self.get_0() / other,

View File

@ -5,6 +5,10 @@ extern crate crossbeam;
extern crate num_cpus; extern crate num_cpus;
extern crate quickersort; extern crate quickersort;
extern crate lodepng; extern crate lodepng;
#[cfg(feature = "simd_perf")]
extern crate simd;
#[macro_use] #[macro_use]
extern crate nom; extern crate nom;