From 7d7ab1acf1a7fa429abf10b5c47991df82da88f5 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 25 Aug 2022 15:53:29 -0700 Subject: [PATCH] Significant speed up to the hash. It can now hash data at 9-10 GiB/s. --- sub_crates/backend/src/hash.rs | 112 ++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 51 deletions(-) diff --git a/sub_crates/backend/src/hash.rs b/sub_crates/backend/src/hash.rs index 7aeb49e..9607455 100644 --- a/sub_crates/backend/src/hash.rs +++ b/sub_crates/backend/src/hash.rs @@ -57,11 +57,16 @@ impl LedHash256 { let mut data = data; while !data.is_empty() { - if self.buf_length >= BLOCK_SIZE { - // Process the filled buffer - self.add_buffer_to_state(); - self.mix_state(UPDATE_MIX_ROUNDS); + if self.buf_length == BLOCK_SIZE { + // Process the filled buffer. + add_buffer_to_state(&mut self.state, &self.buf); + mix_state(&mut self.state, UPDATE_MIX_ROUNDS); self.buf_length = 0; + } else if self.buf_length == 0 && data.len() >= BLOCK_SIZE { + // Process data directly, skipping the buffer. + add_buffer_to_state(&mut self.state, data); + mix_state(&mut self.state, UPDATE_MIX_ROUNDS); + data = &data[BLOCK_SIZE..]; } else { // Fill the buffer. let n = (BLOCK_SIZE - self.buf_length).min(data.len()); @@ -77,14 +82,14 @@ impl LedHash256 { // Hash the remaining bytes if there are any. if self.buf_length > 0 { (&mut self.buf[self.buf_length..]).fill(0); - self.add_buffer_to_state(); - self.mix_state(UPDATE_MIX_ROUNDS); + add_buffer_to_state(&mut self.state, &self.buf); + mix_state(&mut self.state, UPDATE_MIX_ROUNDS); } // Incorporate the message length (in bits) and do the // final mixing. self.state[0] ^= self.message_length * 8; - self.mix_state(FINISH_MIX_ROUNDS); + mix_state(&mut self.state, FINISH_MIX_ROUNDS); // Get the digest as a byte array and return it. let mut digest = [0u8; BLOCK_SIZE]; @@ -94,55 +99,60 @@ impl LedHash256 { digest[24..32].copy_from_slice(&self.state[3].to_le_bytes()); return digest; } +} - /// Adds the current contents of the buffer to the hash state. - fn add_buffer_to_state(&mut self) { - // Convert the buffer to native endian u64's and xor into the - // hash state. - let (a, b, c) = unsafe { self.buf.align_to::() }; - debug_assert!(a.is_empty()); - debug_assert!(c.is_empty()); - self.state[0] ^= u64::from_le(b[0]); - self.state[1] ^= u64::from_le(b[1]); - self.state[2] ^= u64::from_le(b[2]); - self.state[3] ^= u64::from_le(b[3]); - } +/// Adds the contents of a buffer to the hash state. +/// +/// The buffer must be at least 32 bytes long. Only the first 32 bytes +/// are added. +#[inline(always)] +fn add_buffer_to_state(state: &mut [u64; 4], buffer: &[u8]) { + use std::convert::TryInto; - /// The main mix function. Mixes the hash state. - /// - /// Inspired by Skein 1.3, and using its MIX function. - /// - /// The mix rotation constants are: - /// - 40 50 - /// - 27 21 - /// - /// They were selected by an exhaustive search of the four-constant - /// space, selecting for the best single-bit diffusion at a small - /// number of rounds. - /// - /// The permute table is: - /// - Indices: 0 1 2 3 - /// - Become: 0 1 3 2 - fn mix_state(&mut self, rounds: usize) { - for _ in 0..rounds { - // Skein MIX function. - self.state[0] = self.state[0].wrapping_add(self.state[2]); - self.state[1] = self.state[1].wrapping_add(self.state[3]); - self.state[2] = self.state[2].rotate_left(40) ^ self.state[0]; - self.state[3] = self.state[3].rotate_left(50) ^ self.state[1]; + // Convert the buffer to native endian u64's and xor into the + // hash state. + assert!(buffer.len() >= BLOCK_SIZE); + state[0] ^= u64::from_le_bytes((&buffer[0..8]).try_into().unwrap()); + state[1] ^= u64::from_le_bytes((&buffer[8..16]).try_into().unwrap()); + state[2] ^= u64::from_le_bytes((&buffer[16..24]).try_into().unwrap()); + state[3] ^= u64::from_le_bytes((&buffer[24..32]).try_into().unwrap()); +} - // Permute. - self.state.swap(2, 3); +/// The main mix function. Mixes the passed hash state. +/// +/// Inspired by Skein 1.3, and using its MIX function. +/// +/// The mix rotation constants are: +/// - 40 50 +/// - 27 21 +/// +/// They were selected by an exhaustive search of the four-constant +/// space, selecting for the best single-bit diffusion at a small +/// number of rounds. +/// +/// The permute table is: +/// - Indices: 0 1 2 3 +/// - Become: 0 1 3 2 +#[inline(always)] +fn mix_state(state: &mut [u64; 4], rounds: usize) { + for _ in 0..rounds { + // MIX function. + state[0] = state[0].wrapping_add(state[2]); + state[2] = state[2].rotate_left(40) ^ state[0]; + state[1] = state[1].wrapping_add(state[3]); + state[3] = state[3].rotate_left(50) ^ state[1]; - // Skein MIX function. - self.state[0] = self.state[0].wrapping_add(self.state[2]); - self.state[1] = self.state[1].wrapping_add(self.state[3]); - self.state[2] = self.state[2].rotate_left(27) ^ self.state[0]; - self.state[3] = self.state[3].rotate_left(21) ^ self.state[1]; + // Permute. + state.swap(2, 3); - // Permute. - self.state.swap(2, 3); - } + // MIX function. + state[0] = state[0].wrapping_add(state[2]); + state[2] = state[2].rotate_left(27) ^ state[0]; + state[1] = state[1].wrapping_add(state[3]); + state[3] = state[3].rotate_left(21) ^ state[1]; + + // Permute. + state.swap(2, 3); } }