From b3cc5c070a50952f6364e5b7465e24e2cf99fc24 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 19 Jun 2019 17:45:04 +0900 Subject: [PATCH] Added fused multiple-add method to Float4. --- sub_crates/float4/src/lib.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sub_crates/float4/src/lib.rs b/sub_crates/float4/src/lib.rs index 8173883..4f5d934 100644 --- a/sub_crates/float4/src/lib.rs +++ b/sub_crates/float4/src/lib.rs @@ -232,6 +232,24 @@ mod x86_64_sse { data: unsafe { _mm_sqrt_ps(self.data) }, } } + + /// Performs a fused multiply add. + /// + /// i.e. self * b + c + #[inline(always)] + pub fn fmadd(&self, b: Float4, c: Float4) -> Float4 { + #[cfg(target_feature = "fma")] + { + use std::arch::x86_64::_mm_fmadd_ps; + Float4 { + data: unsafe { _mm_fmadd_ps(self.data, b.data, c.data) }, + } + } + #[cfg(not(target_feature = "fma"))] + { + (*self * b) + c + } + } } impl PartialEq for Float4 { @@ -886,6 +904,14 @@ mod fallback { self.get_3().sqrt(), ) } + + /// Performs a fused multiply add. + /// + /// i.e. self * b + c + #[inline(always)] + pub fn fmadd(&self, b: Float4, c: Float4) -> Float4 { + (*self * b) + c + } } impl PartialEq for Float4 {