diff --git a/src/light/rectangle_light.rs b/src/light/rectangle_light.rs index ab183c1..231e687 100644 --- a/src/light/rectangle_light.rs +++ b/src/light/rectangle_light.rs @@ -81,7 +81,7 @@ impl<'a> RectangleLight<'a> { let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3); // World-space surface normal - let normal = Normal::new(0.0, 0.0, 1.0).xform(space); + let normal = Normal::new(0.0, 0.0, 1.0).xform_fast(space); // PDF if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD { @@ -156,7 +156,7 @@ impl<'a> SurfaceLight for RectangleLight<'a> { let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3); // Calculate world-space surface normal - let normal = Normal::new(0.0, 0.0, 1.0).xform(space); + let normal = Normal::new(0.0, 0.0, 1.0).xform_fast(space); if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD { // Simple sampling for more distant lights diff --git a/src/light/sphere_light.rs b/src/light/sphere_light.rs index a7ef6ab..9ff1664 100644 --- a/src/light/sphere_light.rs +++ b/src/light/sphere_light.rs @@ -157,7 +157,7 @@ impl<'a> SurfaceLight for SphereLight<'a> { let point = normal * radius as f32; ( point.into_point().xform(space), - normal.into_normal().xform(space), + normal.into_normal().xform_fast(space), ) }; let pdf = uniform_sample_cone_pdf(cos_theta_max); @@ -175,7 +175,7 @@ impl<'a> SurfaceLight for SphereLight<'a> { let point = normal * radius as f32; ( point.into_point().xform(space), - normal.into_normal().xform(space), + normal.into_normal().xform_fast(space), ) }; let pdf = 1.0 / (4.0 * PI_64); @@ -296,7 +296,7 @@ impl<'a> Surface for SphereLight<'a> { // TODO: proper error bounds. let pos_err = 0.001; - let normal = unit_pos.into_normal().xform(&xform); + let normal = unit_pos.into_normal().xform_fast(&xform); let intersection_data = SurfaceIntersectionData { incoming: rays.dir(ray_idx), diff --git a/src/math.rs b/src/math.rs index b29f6b1..2fd40da 100644 --- a/src/math.rs +++ b/src/math.rs @@ -3,7 +3,8 @@ use std::f32; pub use rmath::{ - cross, dot, wide4::Float4, CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull, + cross, cross_fast, dot, dot_fast, wide4::Float4, CrossProduct, DotProduct, Normal, Point, + Vector, Xform, XformFull, }; /// Clamps a value between a min and max. diff --git a/src/sampling/monte_carlo.rs b/src/sampling/monte_carlo.rs index a17072b..11125c3 100644 --- a/src/sampling/monte_carlo.rs +++ b/src/sampling/monte_carlo.rs @@ -2,7 +2,7 @@ use std::{f32::consts::FRAC_PI_4 as QPI_32, f32::consts::PI as PI_32, f64::consts::PI as PI_64}; -use crate::math::{cross, dot, Point, Vector}; +use crate::math::{cross_fast, dot_fast, Point, Vector}; /// Maps the unit square to the unit circle. /// NOTE: x and y should be distributed within [-1, 1], @@ -90,7 +90,7 @@ pub fn uniform_sample_triangle(va: Vector, vb: Vector, vc: Vector, i: f32, j: f3 /// Calculates the surface area of a triangle. pub fn triangle_surface_area(p0: Point, p1: Point, p2: Point) -> f32 { - 0.5 * cross(p1 - p0, p2 - p0).length() + 0.5 * cross_fast(p1 - p0, p2 - p0).length() } /// Calculates the projected solid angle of a spherical triangle. @@ -98,9 +98,9 @@ pub fn triangle_surface_area(p0: Point, p1: Point, p2: Point) -> f32 { /// A, B, and C are the points of the triangle on a unit sphere. pub fn spherical_triangle_solid_angle(va: Vector, vb: Vector, vc: Vector) -> f32 { // Calculate sines and cosines of the spherical triangle's edge lengths - let cos_a: f64 = dot(vb, vc).max(-1.0).min(1.0) as f64; - let cos_b: f64 = dot(vc, va).max(-1.0).min(1.0) as f64; - let cos_c: f64 = dot(va, vb).max(-1.0).min(1.0) as f64; + let cos_a: f64 = dot_fast(vb, vc).max(-1.0).min(1.0) as f64; + let cos_b: f64 = dot_fast(vc, va).max(-1.0).min(1.0) as f64; + let cos_c: f64 = dot_fast(va, vb).max(-1.0).min(1.0) as f64; let sin_a: f64 = (1.0 - (cos_a * cos_a)).sqrt(); let sin_b: f64 = (1.0 - (cos_b * cos_b)).sqrt(); let sin_c: f64 = (1.0 - (cos_c * cos_c)).sqrt(); @@ -141,9 +141,9 @@ pub fn uniform_sample_spherical_triangle( j: f32, ) -> Vector { // Calculate sines and cosines of the spherical triangle's edge lengths - let cos_a: f64 = dot(vb, vc).max(-1.0).min(1.0) as f64; - let cos_b: f64 = dot(vc, va).max(-1.0).min(1.0) as f64; - let cos_c: f64 = dot(va, vb).max(-1.0).min(1.0) as f64; + let cos_a: f64 = dot_fast(vb, vc).max(-1.0).min(1.0) as f64; + let cos_b: f64 = dot_fast(vc, va).max(-1.0).min(1.0) as f64; + let cos_c: f64 = dot_fast(va, vb).max(-1.0).min(1.0) as f64; let sin_a: f64 = (1.0 - (cos_a * cos_a)).sqrt(); let sin_b: f64 = (1.0 - (cos_b * cos_b)).sqrt(); let sin_c: f64 = (1.0 - (cos_c * cos_c)).sqrt(); @@ -191,10 +191,10 @@ pub fn uniform_sample_spherical_triangle( let q_bottom = ((v * s) + (u * t)) * sin_va; let q = q_top / q_bottom; - let vc_2 = - (va * q as f32) + ((vc - (va * dot(vc, va))).normalized() * (1.0 - (q * q)).sqrt() as f32); + let vc_2 = (va * q as f32) + + ((vc - (va * dot_fast(vc, va))).normalized() * (1.0 - (q * q)).sqrt() as f32); - let z = 1.0 - (j * (1.0 - dot(vc_2, vb))); + let z = 1.0 - (j * (1.0 - dot_fast(vc_2, vb))); - (vb * z) + ((vc_2 - (vb * dot(vc_2, vb))).normalized() * (1.0 - (z * z)).sqrt()) + (vb * z) + ((vc_2 - (vb * dot_fast(vc_2, vb))).normalized() * (1.0 - (z * z)).sqrt()) } diff --git a/src/scene/assembly.rs b/src/scene/assembly.rs index 90a296d..2f47cfe 100644 --- a/src/scene/assembly.rs +++ b/src/scene/assembly.rs @@ -70,8 +70,8 @@ impl<'a> Assembly<'a> { if let Some((light_i, sel_pdf, whittled_n)) = self.light_accel.select( idata.incoming.xform_inv(&sel_xform), idata.pos.xform_inv(&sel_xform), - idata.nor.xform_inv(&sel_xform), - idata.nor_g.xform_inv(&sel_xform), + idata.nor.xform_inv_fast(&sel_xform), + idata.nor_g.xform_inv_fast(&sel_xform), &closure, time, n, diff --git a/src/shading/surface_closure.rs b/src/shading/surface_closure.rs index 6e0aa16..620eb51 100644 --- a/src/shading/surface_closure.rs +++ b/src/shading/surface_closure.rs @@ -5,7 +5,7 @@ use std::f32::consts::PI as PI_32; use crate::{ color::{Color, SpectralSample}, lerp::{lerp, Lerp}, - math::{clamp, dot, zup_to_vec, Float4, Normal, Vector}, + math::{clamp, dot_fast, zup_to_vec, Float4, Normal, Vector}, sampling::cosine_sample_hemisphere, }; @@ -287,7 +287,7 @@ mod lambert_closure { uv: (f32, f32), wavelength: f32, ) -> (Vector, SpectralSample, f32) { - let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 { + let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 { (nor.normalized().into_vector(), nor_g.into_vector()) } else { (-nor.normalized().into_vector(), -nor_g.into_vector()) @@ -300,7 +300,7 @@ mod lambert_closure { let out = zup_to_vec(dir, nn); // Make sure it's not on the wrong side of the geometric normal. - if dot(flipped_nor_g, out) >= 0.0 { + if dot_fast(flipped_nor_g, out) >= 0.0 { (out, color.to_spectral_sample(wavelength) * pdf, pdf) } else { (out, SpectralSample::new(0.0), 0.0) @@ -315,14 +315,14 @@ mod lambert_closure { nor_g: Normal, wavelength: f32, ) -> (SpectralSample, f32) { - let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 { + let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 { (nor.normalized().into_vector(), nor_g.into_vector()) } else { (-nor.normalized().into_vector(), -nor_g.into_vector()) }; - if dot(flipped_nor_g, out) >= 0.0 { - let fac = dot(nn, out.normalized()).max(0.0) * INV_PI; + if dot_fast(flipped_nor_g, out) >= 0.0 { + let fac = dot_fast(nn, out.normalized()).max(0.0) * INV_PI; (color.to_spectral_sample(wavelength) * fac, fac) } else { (SpectralSample::new(0.0), 0.0) @@ -381,14 +381,14 @@ mod lambert_closure { let cos_theta_max = (1.0 - sin_theta_max2).sqrt(); let v = to_light_center.normalized(); - let nn = if dot(nor_g.into_vector(), inc) <= 0.0 { + let nn = if dot_fast(nor_g.into_vector(), inc) <= 0.0 { nor.normalized() } else { -nor.normalized() } .into_vector(); - let cos_nv = dot(nn, v).max(-1.0).min(1.0); + let cos_nv = dot_fast(nn, v).max(-1.0).min(1.0); // Alt implementation from the SPI paper. // Worse sampling, but here for reference. @@ -426,7 +426,7 @@ mod ggx_closure { wavelength: f32, ) -> (Vector, SpectralSample, f32) { // Get normalized surface normal - let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 { + let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 { (nor.normalized().into_vector(), nor_g.into_vector()) } else { (-nor.normalized().into_vector(), -nor_g.into_vector()) @@ -440,10 +440,10 @@ mod ggx_closure { let mut half_dir = Vector::new(angle.cos() * theta_sin, angle.sin() * theta_sin, theta_cos); half_dir = zup_to_vec(half_dir, nn).normalized(); - let out = inc - (half_dir * 2.0 * dot(inc, half_dir)); + let out = inc - (half_dir * 2.0 * dot_fast(inc, half_dir)); // Make sure it's not on the wrong side of the geometric normal. - if dot(flipped_nor_g, out) >= 0.0 { + if dot_fast(flipped_nor_g, out) >= 0.0 { let (filter, pdf) = evaluate(col, roughness, fresnel, inc, out, nor, nor_g, wavelength); (out, filter, pdf) } else { @@ -467,23 +467,23 @@ mod ggx_closure { let hh = (aa + bb).normalized(); // Half-way between aa and bb // Surface normal - let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 { + let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 { (nor.normalized().into_vector(), nor_g.into_vector()) } else { (-nor.normalized().into_vector(), -nor_g.into_vector()) }; // Make sure everything's on the correct side of the surface - if dot(nn, aa) < 0.0 || dot(nn, bb) < 0.0 || dot(flipped_nor_g, bb) < 0.0 { + if dot_fast(nn, aa) < 0.0 || dot_fast(nn, bb) < 0.0 || dot_fast(flipped_nor_g, bb) < 0.0 { return (SpectralSample::new(0.0), 0.0); } // Calculate needed dot products - let na = clamp(dot(nn, aa), -1.0, 1.0); - let nb = clamp(dot(nn, bb), -1.0, 1.0); - let ha = clamp(dot(hh, aa), -1.0, 1.0); - let hb = clamp(dot(hh, bb), -1.0, 1.0); - let nh = clamp(dot(nn, hh), -1.0, 1.0); + let na = clamp(dot_fast(nn, aa), -1.0, 1.0); + let nb = clamp(dot_fast(nn, bb), -1.0, 1.0); + let ha = clamp(dot_fast(hh, aa), -1.0, 1.0); + let hb = clamp(dot_fast(hh, bb), -1.0, 1.0); + let nh = clamp(dot_fast(nn, hh), -1.0, 1.0); // Calculate F - Fresnel let col_f = { @@ -554,7 +554,7 @@ mod ggx_closure { assert!(cos_theta_max <= 1.0); // Surface normal - let nn = if dot(nor.into_vector(), inc) < 0.0 { + let nn = if dot_fast(nor.into_vector(), inc) < 0.0 { nor.normalized() } else { -nor.normalized() // If back-facing, flip normal @@ -572,9 +572,9 @@ mod ggx_closure { // let vv = Halton::sample(1, i); // let mut samp = uniform_sample_cone(uu, vv, cos_theta_max); // samp = zup_to_vec(samp, bb).normalized(); - // if dot(nn, samp) > 0.0 { + // if dot_fast(nn, samp) > 0.0 { // let hh = (aa+samp).normalized(); - // fac += ggx_d(dot(nn, hh), roughness); + // fac += ggx_d(dot_fast(nn, hh), roughness); // } //} //fac /= N * N; @@ -582,7 +582,7 @@ mod ggx_closure { // Approximate method let theta = cos_theta_max.acos(); let hh = (aa + bb).normalized(); - let nh = clamp(dot(nn, hh), -1.0, 1.0); + let nh = clamp(dot_fast(nn, hh), -1.0, 1.0); let fac = ggx_d(nh, (1.0f32).min(roughness.sqrt() + (2.0 * theta / PI_32))); fac * (1.0f32).min(1.0 - cos_theta_max) * INV_PI diff --git a/src/surface/micropoly_batch.rs b/src/surface/micropoly_batch.rs index 54de7a4..9a95e31 100644 --- a/src/surface/micropoly_batch.rs +++ b/src/surface/micropoly_batch.rs @@ -319,7 +319,7 @@ impl<'a> MicropolyBatch<'a> { let n1 = lerp_slice(n1_slice, ray_time).normalized(); let n2 = lerp_slice(n2_slice, ray_time).normalized(); - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space); + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform_fast(&mat_space); if dot(s_nor, geo_normal) >= 0.0 { s_nor } else { diff --git a/src/surface/triangle_mesh.rs b/src/surface/triangle_mesh.rs index ae82dac..f41ca6f 100644 --- a/src/surface/triangle_mesh.rs +++ b/src/surface/triangle_mesh.rs @@ -297,7 +297,7 @@ impl<'a> Surface for TriangleMesh<'a> { let n1 = lerp_slice(n1_slice, ray_time).normalized(); let n2 = lerp_slice(n2_slice, ray_time).normalized(); - let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform(&mat_space); + let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform_fast(&mat_space); if dot(s_nor, geo_normal) >= 0.0 { s_nor } else { diff --git a/sub_crates/rmath/src/wide4/sse.rs b/sub_crates/rmath/src/wide4/sse.rs index 9835ce5..f7eaee5 100644 --- a/sub_crates/rmath/src/wide4/sse.rs +++ b/sub_crates/rmath/src/wide4/sse.rs @@ -34,7 +34,7 @@ impl Float4 { /// `(self * a) + b` with only one rounding error. #[inline(always)] pub fn mul_add(self, a: Self, b: Self) -> Self { - if cfg!(feature = "fma") { + if is_x86_feature_detected!("fma") { Self(unsafe { _mm_fmadd_ps(self.0, a.0, b.0) }) } else { Self::new( diff --git a/sub_crates/spectral_upsampling/src/jakob.rs b/sub_crates/spectral_upsampling/src/jakob.rs index 9288dd0..8abce47 100644 --- a/sub_crates/spectral_upsampling/src/jakob.rs +++ b/sub_crates/spectral_upsampling/src/jakob.rs @@ -8,6 +8,8 @@ /// the coefficents for even more efficient evaluation later on. use rmath::wide4::Float4; +pub const EQUAL_ENERGY_REFLECTANCE: f32 = 1.0; + /// How many polynomial coefficients? const RGB2SPEC_N_COEFFS: usize = 3; @@ -134,7 +136,7 @@ fn small_rgb_to_spectrum_p4( #[inline(always)] fn rgb2spec_fma_4(a: Float4, b: Float4, c: Float4) -> Float4 { - (a * b) + c + a.mul_add(b, c) } fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Float4) -> Float4 {