Use hash perf leeway from prior commits to make the hash higher quality.
When incorporating a data block into the hash, it now does enough mixing rounds to flip on average about 110 bits for any bit flipped by the data block. This reduces performance again, but not all the way to what they were before. It's still reasonably fast, hashing at around 6-7 GB/s.
This commit is contained in:
parent
e25e28d88b
commit
03e78299c6
|
@ -2,30 +2,24 @@
|
||||||
//!
|
//!
|
||||||
//! This is intended to be used as a fast, high-quality checksum for
|
//! This is intended to be used as a fast, high-quality checksum for
|
||||||
//! non-adversarial data identification. It is not intended to stand
|
//! non-adversarial data identification. It is not intended to stand
|
||||||
//! up to attacks of any kind.
|
//! up to attacks of any kind. (It does use the MIX function and
|
||||||
|
//! constants from Skein v1.3, but is not otherwise related.)
|
||||||
//!
|
//!
|
||||||
//! This uses the MIX function and permutation patterns from Skein v1.3,
|
//! This hash does *not* reliably have a full 256 bits worth of power to
|
||||||
//! but is otherwise unrelated. For example, it is not tweakable and
|
//! distinguish different data. Rather, that number is somewhere
|
||||||
//! uses far fewer rounds per data chunk.
|
//! between 128 and 192 bits (much closer to the latter). The 256 bits
|
||||||
//!
|
//! of output *are*, however, fully diffused. So you can truncate them
|
||||||
//! This implementation assumes support for 64-bit unsigned integers.
|
//! to whatever size you like without harm.
|
||||||
//!
|
//!
|
||||||
//! This implementation should work on platforms of any endianness,
|
//! This implementation should work on platforms of any endianness,
|
||||||
//! but has only been tested on little endian platforms. Running the
|
//! but has only been tested on little endian platforms. Running the
|
||||||
//! unit tests on a big-endian platform can verify.
|
//! unit tests on a big-endian platform can verify.
|
||||||
|
|
||||||
const BLOCK_SIZE: usize = 256 / 8; // Block size of the hash, in bytes.
|
const BLOCK_SIZE: usize = 256 / 8; // Block size of the hash, in bytes.
|
||||||
const UPDATE_MIX_ROUNDS: usize = 2;
|
const UPDATE_MIX_ROUNDS: usize = 3; // Number of mix rounds after each block of data is added.
|
||||||
const FINISH_MIX_ROUNDS: usize = 6;
|
const FINISH_MIX_ROUNDS: usize = 6; // Number of mix rounds used to finalize the hash.
|
||||||
|
|
||||||
/// Convenience function to generate a hash for a block of data.
|
/// A hasher. Consumes bytes and generates a 256-bit hash.
|
||||||
pub fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] {
|
|
||||||
let mut h = LedHash256::new();
|
|
||||||
h.update(data);
|
|
||||||
h.finish()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A hash builder. Consumes bytes and generates a 256-bit hash.
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[repr(align(32))]
|
#[repr(align(32))]
|
||||||
|
@ -40,10 +34,11 @@ impl LedHash256 {
|
||||||
pub fn new() -> LedHash256 {
|
pub fn new() -> LedHash256 {
|
||||||
LedHash256 {
|
LedHash256 {
|
||||||
state: [
|
state: [
|
||||||
0x302f7bfd333d0b0d,
|
// Initial Chaining Values from Skein-256-256, v1.3
|
||||||
0xb98cf5312d92a4d5,
|
0xfc9da860d048b449,
|
||||||
0xb1c885da4c257a6e,
|
0x2fca66479fa7d833,
|
||||||
0xedb85f57b82c7e10,
|
0xb33bc3896656840f,
|
||||||
|
0x6a54e920fde8da69,
|
||||||
],
|
],
|
||||||
buf: [0; BLOCK_SIZE],
|
buf: [0; BLOCK_SIZE],
|
||||||
buf_length: 0,
|
buf_length: 0,
|
||||||
|
@ -59,12 +54,12 @@ impl LedHash256 {
|
||||||
while !data.is_empty() {
|
while !data.is_empty() {
|
||||||
if self.buf_length == BLOCK_SIZE {
|
if self.buf_length == BLOCK_SIZE {
|
||||||
// Process the filled buffer.
|
// Process the filled buffer.
|
||||||
add_buffer_to_state(&mut self.state, &self.buf);
|
add_data_to_state(&mut self.state, &self.buf);
|
||||||
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
||||||
self.buf_length = 0;
|
self.buf_length = 0;
|
||||||
} else if self.buf_length == 0 && data.len() >= BLOCK_SIZE {
|
} else if self.buf_length == 0 && data.len() >= BLOCK_SIZE {
|
||||||
// Process data directly, skipping the buffer.
|
// Process data directly, skipping the buffer.
|
||||||
add_buffer_to_state(&mut self.state, data);
|
add_data_to_state(&mut self.state, data);
|
||||||
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
||||||
data = &data[BLOCK_SIZE..];
|
data = &data[BLOCK_SIZE..];
|
||||||
} else {
|
} else {
|
||||||
|
@ -82,7 +77,7 @@ impl LedHash256 {
|
||||||
// Hash the remaining bytes if there are any.
|
// Hash the remaining bytes if there are any.
|
||||||
if self.buf_length > 0 {
|
if self.buf_length > 0 {
|
||||||
(&mut self.buf[self.buf_length..]).fill(0);
|
(&mut self.buf[self.buf_length..]).fill(0);
|
||||||
add_buffer_to_state(&mut self.state, &self.buf);
|
add_data_to_state(&mut self.state, &self.buf);
|
||||||
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
mix_state(&mut self.state, UPDATE_MIX_ROUNDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,34 +96,45 @@ impl LedHash256 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds the contents of a buffer to the hash state.
|
/// Adds message data to the hash state.
|
||||||
///
|
///
|
||||||
/// The buffer must be at least 32 bytes long. Only the first 32 bytes
|
/// The data must be at least 32 bytes long. Only the first 32 bytes
|
||||||
/// are added.
|
/// are added.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add_buffer_to_state(state: &mut [u64; 4], buffer: &[u8]) {
|
fn add_data_to_state(state: &mut [u64; 4], data: &[u8]) {
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
// Convert the buffer to native endian u64's and xor into the
|
// Convert the data to native endian u64's and xor into the
|
||||||
// hash state.
|
// hash state.
|
||||||
assert!(buffer.len() >= BLOCK_SIZE);
|
assert!(data.len() >= BLOCK_SIZE);
|
||||||
state[0] ^= u64::from_le_bytes((&buffer[0..8]).try_into().unwrap());
|
state[0] ^= u64::from_le_bytes((&data[0..8]).try_into().unwrap());
|
||||||
state[1] ^= u64::from_le_bytes((&buffer[8..16]).try_into().unwrap());
|
state[1] ^= u64::from_le_bytes((&data[8..16]).try_into().unwrap());
|
||||||
state[2] ^= u64::from_le_bytes((&buffer[16..24]).try_into().unwrap());
|
state[2] ^= u64::from_le_bytes((&data[16..24]).try_into().unwrap());
|
||||||
state[3] ^= u64::from_le_bytes((&buffer[24..32]).try_into().unwrap());
|
state[3] ^= u64::from_le_bytes((&data[24..32]).try_into().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The main mix function. Mixes the passed hash state.
|
/// Mixes the passed hash state.
|
||||||
///
|
///
|
||||||
/// Inspired by Skein 1.3, and using its MIX function.
|
/// Inspired by Skein 1.3, and using its MIX function and rotation
|
||||||
|
/// constants.
|
||||||
|
///
|
||||||
|
/// Each round actually applies the MIX function twice, with a different
|
||||||
|
/// word permutation such that each word affects every other word once
|
||||||
|
/// per round.
|
||||||
|
///
|
||||||
|
/// 3 rounds is enough for each bit to have a reasonable chance of
|
||||||
|
/// affecting most other bits: one input bit flip on average causes about
|
||||||
|
/// 110 output bit flips. 5 rounds is enough for full diffusion.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn mix_state(state: &mut [u64; 4], rounds: usize) {
|
fn mix_state(state: &mut [u64; 4], rounds: usize) {
|
||||||
|
// Rotation constants from Skein-256-256 v1.3.
|
||||||
const ROTATIONS: &[[u32; 4]] = &[
|
const ROTATIONS: &[[u32; 4]] = &[
|
||||||
[40, 50, 27, 21],
|
[14, 16, 52, 57],
|
||||||
[40, 50, 27, 21],
|
[23, 40, 5, 37],
|
||||||
[40, 50, 27, 21],
|
[25, 33, 46, 12],
|
||||||
[40, 50, 27, 21],
|
[58, 22, 32, 32],
|
||||||
];
|
];
|
||||||
|
|
||||||
for round in 0..rounds {
|
for round in 0..rounds {
|
||||||
let rot = ROTATIONS[round % ROTATIONS.len()];
|
let rot = ROTATIONS[round % ROTATIONS.len()];
|
||||||
|
|
||||||
|
@ -138,8 +144,8 @@ fn mix_state(state: &mut [u64; 4], rounds: usize) {
|
||||||
state[1] = state[1].wrapping_add(state[3]);
|
state[1] = state[1].wrapping_add(state[3]);
|
||||||
state[3] = state[3].rotate_left(rot[1]) ^ state[1];
|
state[3] = state[3].rotate_left(rot[1]) ^ state[1];
|
||||||
|
|
||||||
// We flip the indices we use below, as if we did
|
// We change the indices we use below, as if we
|
||||||
// a [0 1 2 3] -> [0 1 3 2] permutation.
|
// did a [0 1 2 3] -> [0 1 3 2] permutation.
|
||||||
|
|
||||||
// MIX function.
|
// MIX function.
|
||||||
state[0] = state[0].wrapping_add(state[3]);
|
state[0] = state[0].wrapping_add(state[3]);
|
||||||
|
@ -153,6 +159,12 @@ fn mix_state(state: &mut [u64; 4], rounds: usize) {
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] {
|
||||||
|
let mut h = LedHash256::new();
|
||||||
|
h.update(data);
|
||||||
|
h.finish()
|
||||||
|
}
|
||||||
|
|
||||||
fn digest_to_string(digest: [u8; 32]) -> String {
|
fn digest_to_string(digest: [u8; 32]) -> String {
|
||||||
fn low_bits_to_char(n: u8) -> char {
|
fn low_bits_to_char(n: u8) -> char {
|
||||||
match n {
|
match n {
|
||||||
|
@ -186,47 +198,47 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_empty() {
|
fn hash_empty() {
|
||||||
let correct_digest = "3c26b703d4d7316c7bc6fc8e72893433cb10e1044fb5100ac89d2a67c85ff1a1";
|
let correct_digest = "fcdfdd47e35abc0d7ebd5c24aaa81b896c07f2cb0f2dc6395fdda6fc8fb12991";
|
||||||
assert_eq!(digest_to_string(hash(&[])), correct_digest);
|
assert_eq!(digest_to_string(hash(&[])), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_zero() {
|
fn hash_zero() {
|
||||||
let correct_digest = "65670a6fd5d2e919f3d5ec856dd18d7c66397683e47f059ac19f2533b1607708";
|
let correct_digest = "0f11e90ef9373089f0a337cd1af6c923a2e5d679e92782d3e51da364a34d33e9";
|
||||||
assert_eq!(digest_to_string(hash(&[0u8])), correct_digest);
|
assert_eq!(digest_to_string(hash(&[0u8])), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_one() {
|
fn hash_one() {
|
||||||
let correct_digest = "6666ad08b0d0164f1972f36be7541c04b6e22947710d52b2733dcd3007908b82";
|
let correct_digest = "36bf32dc5bcce36f6e1cc268ab40a5d7c1e2ed8dddec59c51a9e79a8a230802f";
|
||||||
assert_eq!(digest_to_string(hash(&[1u8])), correct_digest);
|
assert_eq!(digest_to_string(hash(&[1u8])), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_string_01() {
|
fn hash_string_01() {
|
||||||
let s = "abc";
|
let s = "abc";
|
||||||
let correct_digest = "891862c9ad6003a588194f7b59c6760d48083faa5eda5ade03b5eb0551c538e6";
|
let correct_digest = "e06e17ff841570a558f48991172d522b37f86966f19bc45ee7bde2537b212246";
|
||||||
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_string_02() {
|
fn hash_string_02() {
|
||||||
let s = "The quick brown fox jumps over the lazy dog.";
|
let s = "The quick brown fox jumps over the lazy dog.";
|
||||||
let correct_digest = "347cce28ceb133b8ab30cca93e52460c40d30156ddf9122b3e13a6239c1e3f17";
|
let correct_digest = "e82d9acc9ed0e629115585a253baa4ad607225dcba88cbdb4f1f10979e5b1bfc";
|
||||||
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_string_03() {
|
fn hash_string_03() {
|
||||||
let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
||||||
let correct_digest = "99b7eeae84e40ef9371b398af09e4cad00e800417b74b45f5af6042b49d03e1e";
|
let correct_digest = "3e85ef4e523b431b4039bc0b67a8ed80e91be1dc7d650ce6c8a498ba97663cb0";
|
||||||
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn hash_string_04() {
|
fn hash_string_04() {
|
||||||
let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
|
let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
|
||||||
let correct_digest = "8d84fe8c5bc2a41ba0f0f57013bef7f038acf1d2d6a77ebc4d1b0fa14c10629c";
|
let correct_digest = "793360cfe767f993a3ed4d91238a5042c2b6746c9767cdd77f1b3dbd5f632894";
|
||||||
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +251,7 @@ mod test {
|
||||||
let test_string4 = "cup";
|
let test_string4 = "cup";
|
||||||
let test_string5 =
|
let test_string5 =
|
||||||
"idatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
|
"idatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
|
||||||
let correct_digest = "8d84fe8c5bc2a41ba0f0f57013bef7f038acf1d2d6a77ebc4d1b0fa14c10629c";
|
let correct_digest = "793360cfe767f993a3ed4d91238a5042c2b6746c9767cdd77f1b3dbd5f632894";
|
||||||
|
|
||||||
let mut hasher = LedHash256::new();
|
let mut hasher = LedHash256::new();
|
||||||
hasher.update(test_string1.as_bytes());
|
hasher.update(test_string1.as_bytes());
|
||||||
|
@ -264,15 +276,15 @@ mod test {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
digest_to_string(hash(len_0)),
|
digest_to_string(hash(len_0)),
|
||||||
"3c26b703d4d7316c7bc6fc8e72893433cb10e1044fb5100ac89d2a67c85ff1a1",
|
"fcdfdd47e35abc0d7ebd5c24aaa81b896c07f2cb0f2dc6395fdda6fc8fb12991",
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
digest_to_string(hash(len_1)),
|
digest_to_string(hash(len_1)),
|
||||||
"65670a6fd5d2e919f3d5ec856dd18d7c66397683e47f059ac19f2533b1607708",
|
"0f11e90ef9373089f0a337cd1af6c923a2e5d679e92782d3e51da364a34d33e9",
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
digest_to_string(hash(len_2)),
|
digest_to_string(hash(len_2)),
|
||||||
"95bba56b21bdaf4a6c8c3a231c4966c0992845757a73fa6bbd48389cf8b7b452",
|
"70f4c3fdc580f268bd8a81e2163cafa9109ea193f1a062c12ef72996b396043f",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user