diff --git a/src/hilbert.rs b/src/hilbert.rs index c8b1bcd..4ea1817 100644 --- a/src/hilbert.rs +++ b/src/hilbert.rs @@ -20,7 +20,7 @@ fn hil_rot(n: u32, rx: u32, ry: u32, x: &mut u32, y: &mut u32) { /// y: The y coordinate. Must be a positive integer no greater than 2^16-1. /// /// Returns the hilbert curve index corresponding to the (x,y) coordinates given. -pub fn xy2d(x: u32, y: u32) -> u32 { +pub fn xy2i(x: u32, y: u32) -> u32 { assert!(x < N); assert!(y < N); @@ -44,7 +44,7 @@ pub fn xy2d(x: u32, y: u32) -> u32 { /// d: The hilbert curve index. /// /// Returns the (x, y) coords at the given index. -pub fn d2xy(d: u32) -> (u32, u32) { +pub fn i2xy(d: u32) -> (u32, u32) { let (mut x, mut y) = (0, 0); let mut s = 1; let mut t = d; @@ -69,8 +69,8 @@ mod tests { #[test] fn reversible() { let d = 54; - let (x, y) = d2xy(d); - let d2 = xy2d(x, y); + let (x, y) = i2xy(d); + let d2 = xy2i(x, y); assert_eq!(d, d2); } diff --git a/src/lerp.rs b/src/lerp.rs index 8ffea7e..13052a9 100644 --- a/src/lerp.rs +++ b/src/lerp.rs @@ -213,20 +213,18 @@ mod tests { #[test] fn lerp_matrix() { - let a = Transform::new_from_values( - 0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, - ); - let b = Transform::new_from_values( + let a = Xform::new(0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0); + let b = Xform::new( -1.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, ); - let c1 = Transform::new_from_values( + let c1 = Xform::new( -0.25, 1.75, 2.25, 3.25, 4.25, 5.25, 6.25, 7.25, 8.25, 9.25, 10.25, 11.25, ); - let c2 = Transform::new_from_values( + let c2 = Xform::new( -0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, ); - let c3 = Transform::new_from_values( + let c3 = Xform::new( -0.75, 1.25, 2.75, 3.75, 4.75, 5.75, 6.75, 7.75, 8.75, 9.75, 10.75, 11.75, ); diff --git a/src/main.rs b/src/main.rs index f802c77..ebf8468 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,11 +29,13 @@ mod lerp; mod light; mod math; mod mis; +mod morton; mod parse; mod ray; mod renderer; mod sampling; mod scene; +mod scramble; mod shading; mod surface; mod timer; diff --git a/src/morton.rs b/src/morton.rs new file mode 100644 index 0000000..9882db1 --- /dev/null +++ b/src/morton.rs @@ -0,0 +1,81 @@ +#![allow(dead_code)] + +const N: u32 = 1 << 16; + +#[inline(always)] +fn part_1_by_1(mut x: u32) -> u32 { + x &= 0x0000ffff; + x = (x ^ (x << 8)) & 0x00ff00ff; + x = (x ^ (x << 4)) & 0x0f0f0f0f; + x = (x ^ (x << 2)) & 0x33333333; + x = (x ^ (x << 1)) & 0x55555555; + x +} + +#[inline(always)] +fn part_1_by_2(mut x: u32) -> u32 { + x &= 0x000003ff; + x = (x ^ (x << 16)) & 0xff0000ff; + x = (x ^ (x << 8)) & 0x0300f00f; + x = (x ^ (x << 4)) & 0x030c30c3; + x = (x ^ (x << 2)) & 0x09249249; + x +} + +#[inline(always)] +fn compact_1_by_1(mut x: u32) -> u32 { + x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 + x = (x ^ (x >> 1)) & 0x33333333; + x = (x ^ (x >> 2)) & 0x0f0f0f0f; + x = (x ^ (x >> 4)) & 0x00ff00ff; + x = (x ^ (x >> 8)) & 0x0000ffff; + x +} + +#[inline(always)] +fn compact_1_by_2(mut x: u32) -> u32 { + x &= 0x09249249; + x = (x ^ (x >> 2)) & 0x030c30c3; + x = (x ^ (x >> 4)) & 0x0300f00f; + x = (x ^ (x >> 8)) & 0xff0000ff; + x = (x ^ (x >> 16)) & 0x000003ff; + x +} + +/// Convert (x,y) to morton curve index. +/// +/// x: The x coordinate. Must be a positive integer no greater than 2^16-1. +/// y: The y coordinate. Must be a positive integer no greater than 2^16-1. +/// +/// Returns the morton curve index corresponding to the (x,y) coordinates given. +pub fn xy2i(x: u32, y: u32) -> u32 { + debug_assert!(x < N); + debug_assert!(y < N); + + part_1_by_1(x) | (part_1_by_1(y) << 1) +} + +/// Convert morton curve index to (x,y). +/// +/// i: The morton curve index. +/// +/// Returns the (x, y) coords at the given index. +pub fn i2xy(i: u32) -> (u32, u32) { + (compact_1_by_1(i), compact_1_by_1(i >> 1)) +} + +// TODO: 3D morton curves. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn reversible() { + let d = 54; + let (x, y) = i2xy(d); + let d2 = xy2i(x, y); + + assert_eq!(d, d2); + } +} diff --git a/src/renderer.rs b/src/renderer.rs index f4a7f93..0d22b87 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -13,7 +13,6 @@ use crate::{ accel::ACCEL_NODE_RAY_TESTS, color::{map_0_1_to_wavelength, SpectralSample, XYZ}, fp_utils::robust_ray_origin, - hash::hash_u32, hilbert, image::Image, math::{probit, upper_power_of_two, Float4}, @@ -153,7 +152,7 @@ impl<'a> Renderer<'a> { pow2 * pow2 }; for hilbert_d in 0..bucket_n { - let (bx, by) = hilbert::d2xy(hilbert_d); + let (bx, by) = hilbert::i2xy(hilbert_d); let x = bx as usize * bucket_w; let y = by as usize * bucket_h; @@ -238,13 +237,24 @@ impl<'a> Renderer<'a> { // Generate light paths and initial rays for y in bucket.y..(bucket.y + bucket.h) { for x in bucket.x..(bucket.x + bucket.w) { + // `si_offset` is for screen-space blue-noise sampling in the + // spirit of the paper "Screen-Space Blue-Noise Diffusion of Monte + // Carlo Sampling Error via Hierarchical Ordering of Pixels" by + // Ahmed et al. + // + // This works with any sobol or sobol-like sampler. But also happens + // to work well with golden-ratio sampling. Since we only use sobol + // and golden ratio sampling, we do this up-front here rather than in + // the samplers themselves. + let si_offset = crate::scramble::owen(hilbert::xy2i(x, y), self.seed) + .wrapping_mul(self.spp as u32); for si in 0..self.spp { + let si = (si as u32).wrapping_add(si_offset); + // Raw sample numbers. - let d0 = golden_ratio_sample(si as u32, (x, y), self.seed, self.spp as u32); - let (d1, d2, d3, d4) = - get_sample_4d(si as u32, 0, (x, y), self.seed, self.spp as u32); - let (d5, _, _, _) = - get_sample_4d(si as u32, 1, (x, y), self.seed, self.spp as u32); + let d0 = golden_ratio_sample(si); + let (d1, d2, d3, d4) = get_sample_4d(si, 0, self.seed); + let (d5, _, _, _) = get_sample_4d(si, 1, self.seed); // Calculate image plane x and y coordinates let (img_x, img_y) = { @@ -265,7 +275,6 @@ impl<'a> Renderer<'a> { d1, map_0_1_to_wavelength(d0), si as u32, - self.spp as u32, ); paths.push(path); rays.push(ray, false); @@ -384,8 +393,6 @@ pub struct LightPath { light_attenuation: Float4, pending_color_addition: Float4, color: Float4, - - max_samples: u32, } #[allow(clippy::new_ret_no_self)] @@ -399,7 +406,6 @@ impl LightPath { time: f32, wavelength: f32, sample_number: u32, - max_samples: u32, ) -> (LightPath, Ray) { ( LightPath { @@ -420,8 +426,6 @@ impl LightPath { light_attenuation: Float4::splat(1.0), pending_color_addition: Float4::splat(0.0), color: Float4::splat(0.0), - - max_samples: max_samples, }, scene.camera.generate_ray( image_plane_co.0, @@ -442,13 +446,7 @@ impl LightPath { fn next_lds_samp(&mut self) -> (f32, f32, f32, f32) { let dimension = self.dim_offset; self.dim_offset += 1; - get_sample_4d( - self.sample_number, - dimension, - self.pixel_co, - self.sampling_seed, - self.max_samples, - ) + get_sample_4d(self.sample_number, dimension, self.sampling_seed) } fn next( @@ -700,27 +698,12 @@ impl LightPath { /// and switching to random samples at higher dimensions where /// LDS samples aren't available. #[inline(always)] -fn get_sample_4d( - i: u32, - dimension_set: u32, - pixel_co: (u32, u32), - seed: u32, - max_samples: u32, -) -> (f32, f32, f32, f32) { +fn get_sample_4d(i: u32, dimension_set: u32, seed: u32) -> (f32, f32, f32, f32) { match dimension_set { ds if ds < sobol_burley::NUM_DIMENSION_SETS_4D as u32 => { // Sobol sampling. // - // The `offset` is for screen-space blue noise distribution in the - // spirit of the paper "Screen-Space Blue-Noise Diffusion of Monte - // Carlo Sampling Error via Hierarchical Ordering of Pixels" by - // Ahmed et al. - // `sample_4d()` already does sample shuffling internally in a way - // consistent with the paper, so we only have to arrange the pixels - // according to a space filling curve to achieve the results from - // the paper. - let offset = hilbert::xy2d(pixel_co.0, pixel_co.1) * max_samples; - let n4 = sobol_burley::sample_4d(offset + i, ds, seed); + let n4 = sobol_burley::sample_4d(i, ds, seed); (n4[0], n4[1], n4[2], n4[3]) } ds => { @@ -742,25 +725,9 @@ fn get_sample_4d( // due to the nature of hero wavelength sampling this ends up // being crazily more efficient than pretty much any other sampler, // and reduces variance by a huge amount. -fn golden_ratio_sample(i: u32, pixel_co: (u32, u32), seed: u32, max_samples: u32) -> f32 { - use sobol_burley::parts::{owen_scramble_rev, u32_to_f32_norm}; - - // Turns out the screen-space blue-noise diffusion trick mentioned - // in `get_sample_4d()` works reasonably well on golden ratio - // sampling as well! But we actually have to do the shuffling - // ourselves here. - let offset = owen_scramble_rev( - hilbert::xy2d(pixel_co.0, pixel_co.1).reverse_bits(), - hash_u32(seed, 6543266), - ) - .reverse_bits() - * max_samples; - let i = i.wrapping_add(offset); - - // The actual golden ratio sampling computation. - let n = i.wrapping_mul(2654435769); - - u32_to_f32_norm(n) +fn golden_ratio_sample(i: u32) -> f32 { + use sobol_burley::parts::u32_to_f32_norm; + u32_to_f32_norm(i.wrapping_mul(2654435769)) } #[derive(Debug)] diff --git a/src/scramble.rs b/src/scramble.rs new file mode 100644 index 0000000..f5ac53f --- /dev/null +++ b/src/scramble.rs @@ -0,0 +1,14 @@ +use crate::hash::hash_u32; + +/// Performs a base-2 Owen scramble on an integer. +pub fn owen(n: u32, seed: u32) -> u32 { + let mut result = n; + + for b in 0..31 { + let mask = (!0) << (b + 1); + result ^= hash_u32(n & mask, seed) & (1 << b); + } + result ^= hash_u32(0, seed) & (1 << 31); // Handle highest bit. + + result +}