diff --git a/Cargo.lock b/Cargo.lock index bf5fd4d..2ea780a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "blue_noise_mask" version = "0.1.0" +dependencies = [ + "rayon 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "c_vec" @@ -59,6 +62,11 @@ name = "crossbeam" version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "deque" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "float4" version = "0.1.0" @@ -176,6 +184,34 @@ dependencies = [ "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rayon" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rayon-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rayon-core" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "deque 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "redox_syscall" version = "0.1.17" @@ -264,6 +300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum clap 2.23.2 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf1114886d7cde2d6448517161d7db8d681a9a1c09f7d210f0b0864e48195f6" "checksum cmake 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)" = "d18d68987ed4c516dcc3e7913659bfa4076f5182eea4a7e0038bb060953e76ac" "checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" +"checksum deque 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a694dae478589798d752c7125542f8a5ae8b6e59476172baf2eed67357bdfa27" "checksum gcc 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)" = "40899336fb50db0c78710f53e87afc54d8c7266fb76262fecc78ca1a7f09deae" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3b37545ab726dd833ec6420aaba8231c5b320814b9029ad585555d2a03e94fbf" @@ -274,6 +311,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum openexr 0.1.0 (git+https://github.com/cessen/openexr-rs?rev=612fc6c81c031970ffddcab15509236711613de8)" = "" "checksum openexr-sys 0.1.0 (git+https://github.com/cessen/openexr-rs?rev=612fc6c81c031970ffddcab15509236711613de8)" = "" "checksum pkg-config 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3a8b4c6b8165cd1a1cd4b9b120978131389f64bdaf456435caa41e630edba903" +"checksum rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "022e0636ec2519ddae48154b028864bdce4eaf7d35226ab8e65c611be97b189d" +"checksum rayon 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8c83adcb08e5b922e804fe1918142b422602ef11f2fd670b0b52218cb5984a20" +"checksum rayon-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "767d91bacddf07d442fe39257bf04fd95897d1c47c545d009f6beb03efd038f8" "checksum redox_syscall 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "29dbdfd4b9df8ab31dec47c6087b7b13cbf4a776f335e4de8efba8288dda075b" "checksum rgb 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "4db5350eea2dbb4f4972c4fb4d980b70c3f0ed3983eb2f66d174a43457514a14" "checksum rustc-serialize 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)" = "684ce48436d6465300c9ea783b6b14c4361d6b8dcbb1375b486a69cc19e2dfb0" diff --git a/src/hash.rs b/src/hash.rs index b9c5e0a..16db9aa 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,3 +1,5 @@ +use std; + pub fn hash_u32(n: u32, seed: u32) -> u32 { let mut hash = n; @@ -21,3 +23,19 @@ pub fn hash_u64(n: u64, seed: u64) -> u64 { return hash; } + +/// Returns a random float in [0, 1] based on 'n' and a seed. +/// Generally use n for getting a bunch of different random +/// numbers, and use seed to vary between runs. +pub fn hash_u32_to_f32(n: u32, seed: u32) -> f32 { + let mut hash = n; + + for _ in 0..3 { + hash = hash.wrapping_mul(1936502639); + hash ^= hash.wrapping_shr(16); + hash = hash.wrapping_add(seed); + } + + const INV_MAX: f32 = 1.0 / std::u32::MAX as f32; + return hash as f32 * INV_MAX; +} diff --git a/src/renderer.rs b/src/renderer.rs index e3dfa93..48c0834 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -8,6 +8,8 @@ use std::sync::{RwLock, Mutex}; use crossbeam::sync::MsQueue; use scoped_threadpool::Pool; +use blue_noise_mask; +use blue_noise_mask::{MASKS, NUM_MASKS_WRAP_BITMASK}; use halton; use algorithm::partition_pair; @@ -135,16 +137,19 @@ impl<'a> Renderer<'a> { // Generate light paths and initial rays for y in bucket.y..(bucket.y + bucket.h) { for x in bucket.x..(bucket.x + bucket.w) { - let offset = hash_u32(((x as u32) << 16) ^ (y as u32), self.seed); + let x = x as u32; + let y = y as u32; + + let mask_i = blue_noise_mask::get_index_to_point(x, y) + ((self.seed as usize * 53) & NUM_MASKS_WRAP_BITMASK as usize); + for si in 0..self.spp { + let si = si as u32; // Calculate image plane x and y coordinates let (img_x, img_y) = { - let filter_x = - fast_logit(halton::sample(4, offset + si as u32), 1.5) + - 0.5; + let filter_x = + fast_logit(rot_f32(halton::sample(0, si), MASKS[(mask_i + 4) & (MASKS.len() - 1)]), 1.5) + 0.5; let filter_y = - fast_logit(halton::sample(5, offset + si as u32), 1.5) + - 0.5; + fast_logit(rot_f32(halton::sample(1, si), MASKS[(mask_i + 5) & (MASKS.len() - 1)]), 1.5) + 0.5; let samp_x = (filter_x + x as f32) * cmpx; let samp_y = (filter_y + y as f32) * cmpy; ((samp_x - 0.5) * x_extent, (0.5 - samp_y) * y_extent) @@ -155,15 +160,12 @@ impl<'a> Renderer<'a> { LightPath::new(&self.scene, (x, y), (img_x, img_y), - (halton::sample(0, offset + si as u32), - halton::sample(1, offset + si as u32)), - halton::sample(2, offset + si as u32), - map_0_1_to_wavelength(halton::sample(3, - - offset + - si as - u32)), - offset + si as u32); + (rot_f32(halton::sample(2, si), MASKS[(mask_i + 0) & (MASKS.len() - 1)]), + rot_f32(halton::sample(3, si), MASKS[(mask_i + 1) & (MASKS.len() - 1)])), + rot_f32(halton::sample(4, si), MASKS[(mask_i + 2) & (MASKS.len() - 1)]), + map_0_1_to_wavelength(rot_f32(halton::sample(5, si), MASKS[(mask_i + 3) & (MASKS.len() - 1)])), + si, + mask_i as u32); paths.push(path); rays.push(ray); } @@ -310,6 +312,7 @@ pub struct LightPath { pixel_co: (u32, u32), lds_offset: u32, dim_offset: Cell, + mask_offset: Cell, time: f32, wavelength: f32, @@ -328,7 +331,8 @@ impl LightPath { lens_uv: (f32, f32), time: f32, wavelength: f32, - lds_offset: u32) + lds_offset: u32, + mask_offset: u32) -> (LightPath, Ray) { (LightPath { event: LightPathEvent::CameraRay, @@ -337,6 +341,7 @@ impl LightPath { pixel_co: pixel_co, lds_offset: lds_offset, dim_offset: Cell::new(6), + mask_offset: Cell::new(mask_offset + 6), time: time, wavelength: wavelength, @@ -356,10 +361,13 @@ impl LightPath { } fn next_lds_samp(&self) -> f32 { - let s = halton::sample(self.dim_offset.get(), self.lds_offset); - let inc = self.dim_offset.get() + 1; - self.dim_offset.set(inc); - s + let dim = self.dim_offset.get(); + let mask_i = self.mask_offset.get(); + self.dim_offset.set(dim + 1); + self.mask_offset.set((mask_i + 1) & (MASKS.len() - 1) as u32); + + let samp = halton::sample(dim, self.lds_offset); + rot_f32(samp, unsafe { *MASKS.get_unchecked(mask_i as usize) }) } fn next(&mut self, @@ -506,3 +514,15 @@ struct BucketJob { w: u32, h: u32, } + +#[inline(always)] +fn rot_f32(a: f32, b: f32) -> f32 { + //assert!(a >= 0.0); + //assert!(b >= 0.0); + let mut c = a + b; + while c >= 1.0 { + c -= 1.0; + } + + c +} diff --git a/sub_crates/blue_noise_mask/Cargo.toml b/sub_crates/blue_noise_mask/Cargo.toml index 555623e..e6b91f9 100644 --- a/sub_crates/blue_noise_mask/Cargo.toml +++ b/sub_crates/blue_noise_mask/Cargo.toml @@ -8,3 +8,6 @@ build = "build.rs" [lib] name = "blue_noise_mask" path = "src/lib.rs" + +[build-dependencies] +rayon = "0.7" \ No newline at end of file diff --git a/sub_crates/blue_noise_mask/build.rs b/sub_crates/blue_noise_mask/build.rs index a61776c..ccab5bc 100644 --- a/sub_crates/blue_noise_mask/build.rs +++ b/sub_crates/blue_noise_mask/build.rs @@ -1,5 +1,7 @@ // Generate Blue Noise Mask tables. +extern crate rayon; + use std::cmp::Ordering; use std::env; use std::fs::File; @@ -7,22 +9,31 @@ use std::io::Write; use std::ops::{Index, IndexMut}; use std::path::Path; +use rayon::prelude::*; -const WINDOW_RADIUS: isize = 6; -const FILTER_WIDTH: f32 = 1.0; +const WINDOW_RADIUS: isize = 63; +const FILTER_WIDTH: f32 = 1.2; +const FILTER_PASSES: usize = 1; // These are specified in powers of two (2^N) for fast wrapping // in the generated Rust code. const NUM_MASKS_POW: usize = 7; // 128 const MASK_SIZE_POW: usize = 7; // 128 +const MASK_SIZE: usize = 1 << MASK_SIZE_POW; +const MASK_SIZE_BITMASK: usize = (1 << MASK_SIZE_POW) - 1; + fn main() { let out_dir = env::var("OUT_DIR").unwrap(); let dest_path = Path::new(&out_dir).join("blue_noise_masks.rs"); let mut f = File::create(&dest_path).unwrap(); // Generate masks - let masks = (0..(1 << NUM_MASKS_POW) as u32).map(|i| blue_noise_mask(1 << MASK_SIZE_POW, i)).collect::>(); + let masks = (0..(1 << NUM_MASKS_POW)) + .collect::>() + .par_iter() + .map(|i| blue_noise_mask(*i)) + .collect::>(); // Write the beginning bits of the file f.write_all(format!(r#" @@ -32,34 +43,44 @@ fn main() { pub const NUM_MASKS: u32 = {}; pub const MASK_SIZE: u32 = {}; -const NUM_MASKS_WRAP_BITMASK: u32 = {}; +pub const NUM_MASKS_WRAP_BITMASK: u32 = {}; const MASK_SIZE_WRAP_BITMASK: u32 = {}; const MASK_POINTS: u32 = {}; -pub fn get_point(mask_i: u32, x: u32, y: u32) -> f32 {{ - let mask_i = mask_i & NUM_MASKS_WRAP_BITMASK; +#[inline] +pub fn get_point(x: u32, y: u32) -> &'static [f32] {{ + let i = get_index_to_point(x, y); + + &MASKS[i..(i + NUM_MASKS as usize)] +}} + +#[inline] +pub fn get_index_to_point(x: u32, y: u32) -> usize {{ let x = x & MASK_SIZE_WRAP_BITMASK; let y = y & MASK_SIZE_WRAP_BITMASK; - unsafe {{ *MASKS.get_unchecked(((mask_i * MASK_POINTS) + (y * MASK_SIZE) + x) as usize) }} + ((y * MASK_SIZE * NUM_MASKS) + (x * NUM_MASKS)) as usize }} "#, 1 << NUM_MASKS_POW, - 1 << MASK_SIZE_POW, + MASK_SIZE, (1 << NUM_MASKS_POW) - 1, - (1 << MASK_SIZE_POW) - 1, - (1 << MASK_SIZE_POW) * (1 << MASK_SIZE_POW), + MASK_SIZE_BITMASK, + MASK_SIZE * MASK_SIZE, ).as_bytes()).unwrap(); // Write the mask data f.write_all(format!(r#" -const MASKS: [f32; {}] = [ - "#, (1 << MASK_SIZE_POW) * (1 << MASK_SIZE_POW) * (1 << NUM_MASKS_POW)).as_bytes()).unwrap(); +pub static MASKS: [f32; {}] = [ + "#, MASK_SIZE * MASK_SIZE * (1 << NUM_MASKS_POW)).as_bytes()).unwrap(); - for mask in masks.iter() { - for v in mask.data.iter() { - f.write_all(format!(r#" {:.8}, + for y in 0..MASK_SIZE { + for x in 0..MASK_SIZE { + for mask in masks.iter() { + let v = mask[(x as isize, y as isize)]; + f.write_all(format!(r#" {:.8}, "#, v).as_bytes()).unwrap(); + } } } @@ -71,32 +92,32 @@ const MASKS: [f32; {}] = [ /// Creates a blue noise mask -fn blue_noise_mask(tile_size: usize, seed: u32) -> Image { - let mut image = Image::new(tile_size, tile_size); - - for (i, v) in image.data.iter_mut().enumerate() { +fn blue_noise_mask(seed: u32) -> Mask { + // Generate white noise mask + let mut mask = Mask::new(); + for (i, v) in mask.data.iter_mut().enumerate() { *v = hash_u32_to_f32(i as u32, seed); } // High pass and remap - for _ in 0..2 { - high_pass_filter(&mut image, WINDOW_RADIUS, FILTER_WIDTH); - remap_values(&mut image); + for _ in 0..FILTER_PASSES { + high_pass_filter(&mut mask, WINDOW_RADIUS, FILTER_WIDTH); + remap_values(&mut mask); } - image + mask } -/// High pass filter for an Image -fn high_pass_filter(image: &mut Image, window_radius: isize, filter_width: f32) { +/// Performs a high pass filter on a Mask +fn high_pass_filter(mask: &mut Mask, window_radius: isize, filter_width: f32) { // Precompute filter convolution matrix let conv = { - let mut conv = Image::new(window_radius as usize * 2 + 1, window_radius as usize * 2 + 1); - for j in (-window_radius)..window_radius { - for i in (-window_radius)..window_radius { - let n = (((j*j) + (i*i)) as f32).sqrt(); + let mut conv = Mask::new(); + for j in (-window_radius)..(window_radius + 1) { + for i in (-window_radius)..(window_radius + 1) { + let n = (((j * j) + (i * i)) as f32).sqrt(); //let n = (j.abs() + i.abs()) as f32; - conv.set_wrapped(gauss(n, filter_width), i,j); + conv[(i + window_radius, j + window_radius)] = sinc(n, filter_width); } } @@ -108,47 +129,45 @@ fn high_pass_filter(image: &mut Image, window_radius: isize, filter_width: f32) conv }; - // Compute the low-pass image - let mut low_pass_img = Image::new(image.width, image.height); - for y in 0..image.height { - for x in 0..image.width { - for j in (-window_radius as isize)..window_radius { - for i in (-window_radius as isize)..window_radius { + // Compute the low-pass mask + let mut low_pass_mask = Mask::new(); + for y in 0..MASK_SIZE { + for x in 0..MASK_SIZE { + for j in (-window_radius as isize)..(window_radius + 1) { + for i in (-window_radius as isize)..(window_radius + 1) { let b = y as isize + j; let a = x as isize + i; - let alpha = conv.get_wrapped(i, j); - low_pass_img[(x as isize, y as isize)] += image.get_wrapped(a, b) * alpha; + let alpha = conv[(i + window_radius, j + window_radius)]; + low_pass_mask[(x as isize, y as isize)] += mask.get_wrapped(a, b) * alpha; } } } } // Subtract low pass from original - for i in 0..image.data.len() { - image.data[i] -= low_pass_img.data[i] - 0.5; + for i in 0..mask.data.len() { + mask.data[i] -= low_pass_mask.data[i]; } } -/// Remaps the values in an Image to be linearly distributed within [0, 1] -fn remap_values(image: &mut Image) { - let mut vals = Vec::with_capacity(image.width * image.height); - for y in 0..image.height { - for x in 0..image.width { - vals.push((image[(x as isize, y as isize)], x, y)); +/// Remaps the values in a Mask to be linearly distributed within [0, 1] +fn remap_values(mask: &mut Mask) { + let mut vals = Vec::with_capacity(MASK_SIZE * MASK_SIZE); + for y in 0..MASK_SIZE { + for x in 0..MASK_SIZE { + vals.push((mask[(x as isize, y as isize)], x, y)); } } - vals.sort_by(|a, b| { - if a < b { - Ordering::Less - } else { - Ordering::Greater - } + vals.sort_by(|a, b| if a < b { + Ordering::Less + } else { + Ordering::Greater }); - let inc = 1.0 / (image.data.len() - 1) as f32; - let mut nor_v = 0.0; + let inc = 1.0 / (vals.len() - 1) as f32; + let mut n = 0.0; for v in vals.iter() { - image[(v.1 as isize, v.2 as isize)] = nor_v; - nor_v += inc; + mask[(v.1 as isize, v.2 as isize)] = n; + n += inc; } } @@ -159,6 +178,16 @@ fn gauss(x: f32, sd: f32) -> f32 { norm * dist } +// Sinc filter function +fn sinc(x: f32, w: f32) -> f32 { + if x == 0.0 { + 1.0 + } else { + let x = x * std::f32::consts::PI / w; + x.sin() / x + } +} + /// Returns a random float in [0, 1] based on 'n' and a seed. /// Generally use n for getting a bunch of different random /// numbers, and use seed to vary between runs. @@ -175,59 +204,34 @@ pub fn hash_u32_to_f32(n: u32, seed: u32) -> f32 { return hash as f32 * INV_MAX; } -struct Image { + +// Holds data for a 2d mask +struct Mask { data: Vec, - width: usize, - height: usize, } -impl Image { - fn new(width: usize, height: usize) -> Image { - Image { - data: vec![0.0; width * height], - width: width, - height: height, - } +impl Mask { + fn new() -> Mask { + Mask { data: vec![0.0; MASK_SIZE * MASK_SIZE] } } - fn get_wrapped(&self, mut ix: isize, mut iy: isize) -> f32 { - while ix < 0 { - ix += self.width as isize; - } - while iy < 1 { - iy += self.height as isize; - } + fn get_wrapped(&self, ix: isize, iy: isize) -> f32 { + let x = (ix + MASK_SIZE as isize) as usize & MASK_SIZE_BITMASK; + let y = (iy + MASK_SIZE as isize) as usize & MASK_SIZE_BITMASK; - let x = ix as usize % self.width; - let y = iy as usize % self.height; - - self.data[y * self.width + x] - } - - fn set_wrapped(&mut self, v: f32, mut ix: isize, mut iy: isize){ - while ix < 0 { - ix += self.width as isize; - } - while iy < 1 { - iy += self.height as isize; - } - - let x = ix as usize % self.width; - let y = iy as usize % self.height; - - self.data[y * self.width + x] = v + self.data[(y << MASK_SIZE_POW) + x] } } -impl Index<(isize, isize)> for Image { +impl Index<(isize, isize)> for Mask { type Output = f32; fn index(&self, index: (isize, isize)) -> &f32 { - &self.data[index.1 as usize * self.width + index.0 as usize] + &self.data[index.1 as usize * MASK_SIZE + index.0 as usize] } } -impl IndexMut<(isize, isize)> for Image { +impl IndexMut<(isize, isize)> for Mask { fn index_mut(&mut self, index: (isize, isize)) -> &mut f32 { - &mut self.data[index.1 as usize * self.width + index.0 as usize] + &mut self.data[index.1 as usize * MASK_SIZE + index.0 as usize] } -} \ No newline at end of file +}