Optional use of SIMD intrinsics, via feature in cargo.
This is disabled by default right now, because it only builds on nightly.
This commit is contained in:
parent
4db96bc758
commit
e4c94d0c58
6
Cargo.lock
generated
6
Cargo.lock
generated
|
@ -10,6 +10,7 @@ dependencies = [
|
||||||
"quickersort 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"quickersort 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"scoped_threadpool 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
"scoped_threadpool 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -131,6 +132,11 @@ name = "scoped_threadpool"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "simd"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
|
|
@ -3,6 +3,9 @@ name = "psychopath"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
|
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
|
||||||
|
|
||||||
|
[features]
|
||||||
|
simd_perf = ["simd"]
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = true
|
||||||
|
|
||||||
|
@ -15,3 +18,5 @@ crossbeam = "0.2"
|
||||||
num_cpus = "0.2"
|
num_cpus = "0.2"
|
||||||
quickersort = "2.0"
|
quickersort = "2.0"
|
||||||
lodepng = "0.8"
|
lodepng = "0.8"
|
||||||
|
|
||||||
|
simd = { version = "0.1.1", optional = true }
|
131
src/float4.rs
131
src/float4.rs
|
@ -3,18 +3,42 @@
|
||||||
use std::ops::{Index, IndexMut, Add, Sub, Mul, Div};
|
use std::ops::{Index, IndexMut, Add, Sub, Mul, Div};
|
||||||
use std::cmp::PartialEq;
|
use std::cmp::PartialEq;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
use simd::f32x4;
|
||||||
|
|
||||||
/// Essentially a tuple of four floats, which will use SIMD operations
|
/// Essentially a tuple of four floats, which will use SIMD operations
|
||||||
/// where possible on a platform.
|
/// where possible on a platform.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct Float4 {
|
||||||
|
data: f32x4,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
pub struct Float4 {
|
pub struct Float4 {
|
||||||
data: [f32; 4],
|
data: [f32; 4],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Float4 {
|
impl Float4 {
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
|
||||||
|
Float4 { data: f32x4::new(a, b, c, d) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
|
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Float4 {
|
||||||
Float4 { data: [a, b, c, d] }
|
Float4 { data: [a, b, c, d] }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn splat(n: f32) -> Float4 {
|
||||||
|
Float4 { data: f32x4::splat(n) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
|
pub fn splat(n: f32) -> Float4 {
|
||||||
|
Float4 { data: [n, n, n, n] }
|
||||||
|
}
|
||||||
|
|
||||||
pub fn h_sum(&self) -> f32 {
|
pub fn h_sum(&self) -> f32 {
|
||||||
self.get_0() + self.get_1() + self.get_2() + self.get_3()
|
self.get_0() + self.get_1() + self.get_2() + self.get_3()
|
||||||
}
|
}
|
||||||
|
@ -59,6 +83,11 @@ impl Float4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn v_min(&self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data.min(other.data) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn v_min(&self, other: Float4) -> Float4 {
|
pub fn v_min(&self, other: Float4) -> Float4 {
|
||||||
Float4::new(if self.get_0() < other.get_0() {
|
Float4::new(if self.get_0() < other.get_0() {
|
||||||
self.get_0()
|
self.get_0()
|
||||||
|
@ -83,6 +112,11 @@ impl Float4 {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn v_max(&self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data.max(other.data) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn v_max(&self, other: Float4) -> Float4 {
|
pub fn v_max(&self, other: Float4) -> Float4 {
|
||||||
Float4::new(if self.get_0() > other.get_0() {
|
Float4::new(if self.get_0() > other.get_0() {
|
||||||
self.get_0()
|
self.get_0()
|
||||||
|
@ -106,42 +140,90 @@ impl Float4 {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the 0th element to the given value.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn set_0(&mut self, n: f32) {
|
||||||
|
self.data = self.data.replace(0, n);
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn set_0(&mut self, n: f32) {
|
pub fn set_0(&mut self, n: f32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
*self.data.get_unchecked_mut(0) = n;
|
*self.data.get_unchecked_mut(0) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the 1th element to the given value.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn set_1(&mut self, n: f32) {
|
||||||
|
self.data = self.data.replace(1, n);
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn set_1(&mut self, n: f32) {
|
pub fn set_1(&mut self, n: f32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
*self.data.get_unchecked_mut(1) = n;
|
*self.data.get_unchecked_mut(1) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the 2th element to the given value.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn set_2(&mut self, n: f32) {
|
||||||
|
self.data = self.data.replace(2, n);
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn set_2(&mut self, n: f32) {
|
pub fn set_2(&mut self, n: f32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
*self.data.get_unchecked_mut(2) = n;
|
*self.data.get_unchecked_mut(2) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the 3th element to the given value.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn set_3(&mut self, n: f32) {
|
||||||
|
self.data = self.data.replace(3, n);
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn set_3(&mut self, n: f32) {
|
pub fn set_3(&mut self, n: f32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
*self.data.get_unchecked_mut(3) = n;
|
*self.data.get_unchecked_mut(3) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value of the 0th element.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn get_0(&self) -> f32 {
|
||||||
|
self.data.extract(0)
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn get_0(&self) -> f32 {
|
pub fn get_0(&self) -> f32 {
|
||||||
unsafe { *self.data.get_unchecked(0) }
|
unsafe { *self.data.get_unchecked(0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value of the 1th element.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn get_1(&self) -> f32 {
|
||||||
|
self.data.extract(1)
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn get_1(&self) -> f32 {
|
pub fn get_1(&self) -> f32 {
|
||||||
unsafe { *self.data.get_unchecked(1) }
|
unsafe { *self.data.get_unchecked(1) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value of the 2th element.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn get_2(&self) -> f32 {
|
||||||
|
self.data.extract(2)
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn get_2(&self) -> f32 {
|
pub fn get_2(&self) -> f32 {
|
||||||
unsafe { *self.data.get_unchecked(2) }
|
unsafe { *self.data.get_unchecked(2) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value of the 3th element.
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
pub fn get_3(&self) -> f32 {
|
||||||
|
self.data.extract(3)
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
pub fn get_3(&self) -> f32 {
|
pub fn get_3(&self) -> f32 {
|
||||||
unsafe { *self.data.get_unchecked(3) }
|
unsafe { *self.data.get_unchecked(3) }
|
||||||
}
|
}
|
||||||
|
@ -151,12 +233,31 @@ impl Float4 {
|
||||||
impl Index<usize> for Float4 {
|
impl Index<usize> for Float4 {
|
||||||
type Output = f32;
|
type Output = f32;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn index(&self, index: usize) -> &f32 {
|
||||||
|
// TODO: this might not be correct! It works, but need to make sure
|
||||||
|
// to do this in a way with proper defined behavior.
|
||||||
|
use std::mem::transmute;
|
||||||
|
let vs: &[f32; 4] = unsafe { transmute(&self.data) };
|
||||||
|
&vs[index]
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn index(&self, index: usize) -> &f32 {
|
fn index(&self, index: usize) -> &f32 {
|
||||||
&self.data[index]
|
&self.data[index]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
impl IndexMut<usize> for Float4 {
|
impl IndexMut<usize> for Float4 {
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn index_mut(&mut self, index: usize) -> &mut f32 {
|
||||||
|
// TODO: this might not be correct! It works, but need to make sure
|
||||||
|
// to do this in a way with proper defined behavior.
|
||||||
|
use std::mem::transmute;
|
||||||
|
let vs: &mut [f32; 4] = unsafe { transmute(&mut self.data) };
|
||||||
|
&mut vs[index]
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn index_mut(&mut self, index: usize) -> &mut f32 {
|
fn index_mut(&mut self, index: usize) -> &mut f32 {
|
||||||
&mut self.data[index]
|
&mut self.data[index]
|
||||||
}
|
}
|
||||||
|
@ -174,6 +275,11 @@ impl PartialEq for Float4 {
|
||||||
impl Add for Float4 {
|
impl Add for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn add(self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data + other.data }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn add(self, other: Float4) -> Float4 {
|
fn add(self, other: Float4) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() + other.get_0(),
|
data: [self.get_0() + other.get_0(),
|
||||||
|
@ -188,6 +294,11 @@ impl Add for Float4 {
|
||||||
impl Sub for Float4 {
|
impl Sub for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn sub(self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data - other.data }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn sub(self, other: Float4) -> Float4 {
|
fn sub(self, other: Float4) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() - other.get_0(),
|
data: [self.get_0() - other.get_0(),
|
||||||
|
@ -202,6 +313,11 @@ impl Sub for Float4 {
|
||||||
impl Mul for Float4 {
|
impl Mul for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn mul(self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data * other.data }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn mul(self, other: Float4) -> Float4 {
|
fn mul(self, other: Float4) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() * other.get_0(),
|
data: [self.get_0() * other.get_0(),
|
||||||
|
@ -215,6 +331,11 @@ impl Mul for Float4 {
|
||||||
impl Mul<f32> for Float4 {
|
impl Mul<f32> for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn mul(self, other: f32) -> Float4 {
|
||||||
|
Float4 { data: self.data * f32x4::splat(other) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn mul(self, other: f32) -> Float4 {
|
fn mul(self, other: f32) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() * other,
|
data: [self.get_0() * other,
|
||||||
|
@ -229,6 +350,11 @@ impl Mul<f32> for Float4 {
|
||||||
impl Div for Float4 {
|
impl Div for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn div(self, other: Float4) -> Float4 {
|
||||||
|
Float4 { data: self.data / other.data }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn div(self, other: Float4) -> Float4 {
|
fn div(self, other: Float4) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() / other.get_0(),
|
data: [self.get_0() / other.get_0(),
|
||||||
|
@ -242,6 +368,11 @@ impl Div for Float4 {
|
||||||
impl Div<f32> for Float4 {
|
impl Div<f32> for Float4 {
|
||||||
type Output = Float4;
|
type Output = Float4;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
fn div(self, other: f32) -> Float4 {
|
||||||
|
Float4 { data: self.data / f32x4::splat(other) }
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "simd_perf"))]
|
||||||
fn div(self, other: f32) -> Float4 {
|
fn div(self, other: f32) -> Float4 {
|
||||||
Float4 {
|
Float4 {
|
||||||
data: [self.get_0() / other,
|
data: [self.get_0() / other,
|
||||||
|
|
|
@ -5,6 +5,10 @@ extern crate crossbeam;
|
||||||
extern crate num_cpus;
|
extern crate num_cpus;
|
||||||
extern crate quickersort;
|
extern crate quickersort;
|
||||||
extern crate lodepng;
|
extern crate lodepng;
|
||||||
|
|
||||||
|
#[cfg(feature = "simd_perf")]
|
||||||
|
extern crate simd;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate nom;
|
extern crate nom;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user