Cleaned up the hash implementation and made it endian-clean.

This commit is contained in:
Nathan Vegdahl 2020-02-24 10:09:13 +09:00
parent e1d91ff43d
commit 905ff2c301

View File

@ -1,13 +1,16 @@
/// A 256-bit non-cryptographic hash function for data identification. /// A 256-bit non-cryptographic hash function for data identification.
/// ///
/// This is essentially "Skein Lite". It pulls many ideas from /// This uses the MIX function, constants, and permutation patterns
/// Skein v1.3, and uses its constants, but strips out everything /// from Skein v1.3, but is otherwise largely unrelated--in particular
/// related to security, and changes a few things for performance. /// it does not use sub-keys, tweak values, or UBI from Skein.
/// ///
/// This implementation assumes little endian byte order and support /// This implementation assumes support for 64-bit unsigned integers.
/// for 64-bit unsigned integers. ///
/// This implementation should work on platforms of any endianness,
/// but has only been tested on little endian platforms. Running the
/// unit tests on a big-endian platform can verify.
const BLOCK_SIZE: usize = 32; // Block size of the hash, in bytes const BLOCK_SIZE: usize = 256 / 8; // Block size of the hash, in bytes
/// Convenience function to generate a hash for a block of data. /// Convenience function to generate a hash for a block of data.
pub fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] { pub fn hash(data: &[u8]) -> [u8; BLOCK_SIZE] {
@ -71,9 +74,12 @@ impl LedHash256 {
pub fn finish(mut self) -> [u8; BLOCK_SIZE] { pub fn finish(mut self) -> [u8; BLOCK_SIZE] {
// Hash the remaining bytes if there are any. // Hash the remaining bytes if there are any.
if self.buf_length > 0 { if self.buf_length > 0 {
// Pad with zero.
for i in (&mut self.buf[self.buf_length..]).iter_mut() { for i in (&mut self.buf[self.buf_length..]).iter_mut() {
*i = 0; *i = 0;
} }
// Process.
let (a, b, c) = unsafe { self.buf.align_to::<u64>() }; let (a, b, c) = unsafe { self.buf.align_to::<u64>() };
debug_assert!(a.is_empty()); debug_assert!(a.is_empty());
debug_assert!(c.is_empty()); debug_assert!(c.is_empty());
@ -84,6 +90,12 @@ impl LedHash256 {
// Hash the message length, in bits. // Hash the message length, in bits.
mix(&mut self.state[..], &[self.message_length * 8, 0, 0, 0]); mix(&mut self.state[..], &[self.message_length * 8, 0, 0, 0]);
// Convert to little endian.
self.state[0] = self.state[0].to_le();
self.state[1] = self.state[1].to_le();
self.state[2] = self.state[2].to_le();
self.state[3] = self.state[3].to_le();
// Return the result. // Return the result.
unsafe { std::mem::transmute(self.state) } unsafe { std::mem::transmute(self.state) }
} }
@ -94,8 +106,6 @@ impl LedHash256 {
/// Inspired by Skein 1.3, and using the constants from its 256-bit /// Inspired by Skein 1.3, and using the constants from its 256-bit
/// variant. It does 9 rounds of mixing, as that produces full /// variant. It does 9 rounds of mixing, as that produces full
/// diffusion for 256-bit keys according to the Skein 1.3 paper. /// diffusion for 256-bit keys according to the Skein 1.3 paper.
/// There are, of course, many meaningful differences from Skein, this
/// being far, far simpler and not tweakable at all.
/// ///
/// The mix rotation constants, as taken from Skein 1.3 256-bit variant: /// The mix rotation constants, as taken from Skein 1.3 256-bit variant:
/// 14 16 /// 14 16
@ -112,110 +122,39 @@ impl LedHash256 {
/// Indices: 0 1 2 3 /// Indices: 0 1 2 3
/// Become: 0 3 2 1 /// Become: 0 3 2 1
fn mix(state: &mut [u64], block: &[u64]) { fn mix(state: &mut [u64], block: &[u64]) {
/// The MIX function from ThreeFish (which is in turn from Skein). /// The MIX function from Skein.
fn umix(a: &mut u64, b: &mut u64, r: u64) { fn umix(pair: &mut [u64], r: u32) {
*a = a.wrapping_add(*b); pair[0] = pair[0].wrapping_add(pair[1]);
*b = (*b << r) | (*b >> (64 - r)); pair[1] = pair[1].rotate_left(r) ^ pair[0];
*b ^= *a;
} }
// xor the block into the hash state // Convert the block to native endianness and xor into the hash state.
state[0] ^= block[0]; state[0] ^= u64::from_le(block[0]);
state[1] ^= block[1]; state[1] ^= u64::from_le(block[1]);
state[2] ^= block[2]; state[2] ^= u64::from_le(block[2]);
state[3] ^= block[3]; state[3] ^= u64::from_le(block[3]);
// Mix the hash state // Mixing constants.
umix( const ROUNDS: usize = 9;
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) }, const ROTATION_TABLE: [(u32, u32); 8] = [
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) }, (14, 16),
14, (52, 57),
); (23, 40),
umix( (5, 37),
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) }, (25, 33),
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) }, (46, 12),
16, (58, 22),
); (32, 32),
umix( ];
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) }, // Do the mixing.
52, for (rot_1, rot_2) in ROTATION_TABLE.iter().cycle().take(ROUNDS) {
); umix(&mut state[..2], *rot_1);
umix( umix(&mut state[2..], *rot_2);
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) }, state.swap(1, 3);
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) }, }
57,
); state.swap(1, 3);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
23,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
40,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
5,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
37,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
25,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
33,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
46,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
12,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
58,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
22,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
32,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
32,
);
umix(
unsafe { std::mem::transmute((&mut state[0]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[1]) as *mut u64) },
14,
);
umix(
unsafe { std::mem::transmute((&mut state[2]) as *mut u64) },
unsafe { std::mem::transmute((&mut state[3]) as *mut u64) },
16,
);
} }
#[cfg(test)] #[cfg(test)]
@ -337,6 +276,25 @@ mod test {
assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest); assert_eq!(digest_to_string(hash(s.as_bytes())), correct_digest);
} }
#[test]
fn hash_length() {
// We're testing here to make sure the length of the data properly
// affects the hash. The last block of data is padded with zeros
// if less than the block size, so here we're forcing that last
// block to be all zeros, and only changing the length of input.
let len_0 = &[];
let len_1 = &[0u8];
let len_2 = &[0u8, 0];
let len_0_hash = digest_to_string(hash(len_0));
let len_1_hash = digest_to_string(hash(len_1));
let len_2_hash = digest_to_string(hash(len_2));
assert!(len_0_hash != len_1_hash);
assert!(len_0_hash != len_2_hash);
assert!(len_1_hash != len_2_hash);
}
#[test] #[test]
fn hash_multi_part_processing() { fn hash_multi_part_processing() {
let test_string1 = let test_string1 =