From 03e78299c6aaf7534eb78b2a0db0a92422aa1008 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 25 Aug 2022 23:46:42 -0700 Subject: [PATCH] Use hash perf leeway from prior commits to make the hash higher quality. When incorporating a data block into the hash, it now does enough mixing rounds to flip on average about 110 bits for any bit flipped by the data block. This reduces performance again, but not all the way to what they were before. It's still reasonably fast, hashing at around 6-7 GB/s. --- sub_crates/backend/src/hash.rs | 114 ++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/sub_crates/backend/src/hash.rs b/sub_crates/backend/src/hash.rs index c11fadf..e6e45cc 100644 --- a/sub_crates/backend/src/hash.rs +++ b/sub_crates/backend/src/hash.rs @@ -2,30 +2,24 @@ //! //! This is intended to be used as a fast, high-quality checksum for //! non-adversarial data identification. It is not intended to stand -//! up to attacks of any kind. +//! up to attacks of any kind. (It does use the MIX function and +//! constants from Skein v1.3, but is not otherwise related.) //! -//! This uses the MIX function and permutation patterns from Skein v1.3, -//! but is otherwise unrelated. For example, it is not tweakable and -//! uses far fewer rounds per data chunk. -//! -//! This implementation assumes support for 64-bit unsigned integers. +//! This hash does *not* reliably have a full 256 bits worth of power to +//! distinguish different data. Rather, that number is somewhere +//! between 128 and 192 bits (much closer to the latter). The 256 bits +//! of output *are*, however, fully diffused. So you can truncate them +//! to whatever size you like without harm. //! //! This implementation should work on platforms of any endianness, //! but has only been tested on little endian platforms. Running the //! unit tests on a big-endian platform can verify. const BLOCK_SIZE: usize = 256 / 8; // Block size of the hash, in bytes. -const UPDATE_MIX_ROUNDS: usize = 2; -const FINISH_MIX_ROUNDS: usize = 6; +const UPDATE_MIX_ROUNDS: usize = 3; // Number of mix rounds after each block of data is added. +const FINISH_MIX_ROUNDS: usize = 6; // Number of mix rounds used to finalize the hash. -/// Convenience function to generate a hash for a block of data. -pub fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] { - let mut h = LedHash256::new(); - h.update(data); - h.finish() -} - -/// A hash builder. Consumes bytes and generates a 256-bit hash. +/// A hasher. Consumes bytes and generates a 256-bit hash. #[derive(Debug, Copy, Clone)] #[repr(C)] #[repr(align(32))] @@ -40,10 +34,11 @@ impl LedHash256 { pub fn new() -> LedHash256 { LedHash256 { state: [ - 0x302f7bfd333d0b0d, - 0xb98cf5312d92a4d5, - 0xb1c885da4c257a6e, - 0xedb85f57b82c7e10, + // Initial Chaining Values from Skein-256-256, v1.3 + 0xfc9da860d048b449, + 0x2fca66479fa7d833, + 0xb33bc3896656840f, + 0x6a54e920fde8da69, ], buf: [0; BLOCK_SIZE], buf_length: 0, @@ -59,12 +54,12 @@ impl LedHash256 { while !data.is_empty() { if self.buf_length == BLOCK_SIZE { // Process the filled buffer. - add_buffer_to_state(&mut self.state, &self.buf); + add_data_to_state(&mut self.state, &self.buf); mix_state(&mut self.state, UPDATE_MIX_ROUNDS); self.buf_length = 0; } else if self.buf_length == 0 && data.len() >= BLOCK_SIZE { // Process data directly, skipping the buffer. - add_buffer_to_state(&mut self.state, data); + add_data_to_state(&mut self.state, data); mix_state(&mut self.state, UPDATE_MIX_ROUNDS); data = &data[BLOCK_SIZE..]; } else { @@ -82,7 +77,7 @@ impl LedHash256 { // Hash the remaining bytes if there are any. if self.buf_length > 0 { (&mut self.buf[self.buf_length..]).fill(0); - add_buffer_to_state(&mut self.state, &self.buf); + add_data_to_state(&mut self.state, &self.buf); mix_state(&mut self.state, UPDATE_MIX_ROUNDS); } @@ -101,34 +96,45 @@ impl LedHash256 { } } -/// Adds the contents of a buffer to the hash state. +/// Adds message data to the hash state. /// -/// The buffer must be at least 32 bytes long. Only the first 32 bytes +/// The data must be at least 32 bytes long. Only the first 32 bytes /// are added. #[inline(always)] -fn add_buffer_to_state(state: &mut [u64; 4], buffer: &[u8]) { +fn add_data_to_state(state: &mut [u64; 4], data: &[u8]) { use std::convert::TryInto; - // Convert the buffer to native endian u64's and xor into the + // Convert the data to native endian u64's and xor into the // hash state. - assert!(buffer.len() >= BLOCK_SIZE); - state[0] ^= u64::from_le_bytes((&buffer[0..8]).try_into().unwrap()); - state[1] ^= u64::from_le_bytes((&buffer[8..16]).try_into().unwrap()); - state[2] ^= u64::from_le_bytes((&buffer[16..24]).try_into().unwrap()); - state[3] ^= u64::from_le_bytes((&buffer[24..32]).try_into().unwrap()); + assert!(data.len() >= BLOCK_SIZE); + state[0] ^= u64::from_le_bytes((&data[0..8]).try_into().unwrap()); + state[1] ^= u64::from_le_bytes((&data[8..16]).try_into().unwrap()); + state[2] ^= u64::from_le_bytes((&data[16..24]).try_into().unwrap()); + state[3] ^= u64::from_le_bytes((&data[24..32]).try_into().unwrap()); } -/// The main mix function. Mixes the passed hash state. +/// Mixes the passed hash state. /// -/// Inspired by Skein 1.3, and using its MIX function. +/// Inspired by Skein 1.3, and using its MIX function and rotation +/// constants. +/// +/// Each round actually applies the MIX function twice, with a different +/// word permutation such that each word affects every other word once +/// per round. +/// +/// 3 rounds is enough for each bit to have a reasonable chance of +/// affecting most other bits: one input bit flip on average causes about +/// 110 output bit flips. 5 rounds is enough for full diffusion. #[inline(always)] fn mix_state(state: &mut [u64; 4], rounds: usize) { + // Rotation constants from Skein-256-256 v1.3. const ROTATIONS: &[[u32; 4]] = &[ - [40, 50, 27, 21], - [40, 50, 27, 21], - [40, 50, 27, 21], - [40, 50, 27, 21], + [14, 16, 52, 57], + [23, 40, 5, 37], + [25, 33, 46, 12], + [58, 22, 32, 32], ]; + for round in 0..rounds { let rot = ROTATIONS[round % ROTATIONS.len()]; @@ -138,8 +144,8 @@ fn mix_state(state: &mut [u64; 4], rounds: usize) { state[1] = state[1].wrapping_add(state[3]); state[3] = state[3].rotate_left(rot[1]) ^ state[1]; - // We flip the indices we use below, as if we did - // a [0 1 2 3] -> [0 1 3 2] permutation. + // We change the indices we use below, as if we + // did a [0 1 2 3] -> [0 1 3 2] permutation. // MIX function. state[0] = state[0].wrapping_add(state[3]); @@ -153,6 +159,12 @@ fn mix_state(state: &mut [u64; 4], rounds: usize) { mod test { use super::*; + fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] { + let mut h = LedHash256::new(); + h.update(data); + h.finish() + } + fn digest_to_string(digest: [u8; 32]) -> String { fn low_bits_to_char(n: u8) -> char { match n { @@ -186,47 +198,47 @@ mod test { #[test] fn hash_empty() { - let correct_digest = "3c26b703d4d7316c7bc6fc8e72893433cb10e1044fb5100ac89d2a67c85ff1a1"; + let correct_digest = "fcdfdd47e35abc0d7ebd5c24aaa81b896c07f2cb0f2dc6395fdda6fc8fb12991"; assert_eq!(digest_to_string(hash(&[])), correct_digest); } #[test] fn hash_zero() { - let correct_digest = "65670a6fd5d2e919f3d5ec856dd18d7c66397683e47f059ac19f2533b1607708"; + let correct_digest = "0f11e90ef9373089f0a337cd1af6c923a2e5d679e92782d3e51da364a34d33e9"; assert_eq!(digest_to_string(hash(&[0u8])), correct_digest); } #[test] fn hash_one() { - let correct_digest = "6666ad08b0d0164f1972f36be7541c04b6e22947710d52b2733dcd3007908b82"; + let correct_digest = "36bf32dc5bcce36f6e1cc268ab40a5d7c1e2ed8dddec59c51a9e79a8a230802f"; assert_eq!(digest_to_string(hash(&[1u8])), correct_digest); } #[test] fn hash_string_01() { let s = "abc"; - let correct_digest = "891862c9ad6003a588194f7b59c6760d48083faa5eda5ade03b5eb0551c538e6"; + let correct_digest = "e06e17ff841570a558f48991172d522b37f86966f19bc45ee7bde2537b212246"; assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest); } #[test] fn hash_string_02() { let s = "The quick brown fox jumps over the lazy dog."; - let correct_digest = "347cce28ceb133b8ab30cca93e52460c40d30156ddf9122b3e13a6239c1e3f17"; + let correct_digest = "e82d9acc9ed0e629115585a253baa4ad607225dcba88cbdb4f1f10979e5b1bfc"; assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest); } #[test] fn hash_string_03() { let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; - let correct_digest = "99b7eeae84e40ef9371b398af09e4cad00e800417b74b45f5af6042b49d03e1e"; + let correct_digest = "3e85ef4e523b431b4039bc0b67a8ed80e91be1dc7d650ce6c8a498ba97663cb0"; assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest); } #[test] fn hash_string_04() { let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; - let correct_digest = "8d84fe8c5bc2a41ba0f0f57013bef7f038acf1d2d6a77ebc4d1b0fa14c10629c"; + let correct_digest = "793360cfe767f993a3ed4d91238a5042c2b6746c9767cdd77f1b3dbd5f632894"; assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest); } @@ -239,7 +251,7 @@ mod test { let test_string4 = "cup"; let test_string5 = "idatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; - let correct_digest = "8d84fe8c5bc2a41ba0f0f57013bef7f038acf1d2d6a77ebc4d1b0fa14c10629c"; + let correct_digest = "793360cfe767f993a3ed4d91238a5042c2b6746c9767cdd77f1b3dbd5f632894"; let mut hasher = LedHash256::new(); hasher.update(test_string1.as_bytes()); @@ -264,15 +276,15 @@ mod test { assert_eq!( digest_to_string(hash(len_0)), - "3c26b703d4d7316c7bc6fc8e72893433cb10e1044fb5100ac89d2a67c85ff1a1", + "fcdfdd47e35abc0d7ebd5c24aaa81b896c07f2cb0f2dc6395fdda6fc8fb12991", ); assert_eq!( digest_to_string(hash(len_1)), - "65670a6fd5d2e919f3d5ec856dd18d7c66397683e47f059ac19f2533b1607708", + "0f11e90ef9373089f0a337cd1af6c923a2e5d679e92782d3e51da364a34d33e9", ); assert_eq!( digest_to_string(hash(len_2)), - "95bba56b21bdaf4a6c8c3a231c4966c0992845757a73fa6bbd48389cf8b7b452", + "70f4c3fdc580f268bd8a81e2163cafa9109ea193f1a062c12ef72996b396043f", ); } }