From 60cbb193b247d1f923928766045dfbc5df3fce5b Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Tue, 21 Aug 2018 22:18:22 -0700 Subject: [PATCH] Added unit tests for utf16 encoding/decoding. --- sub_crates/text_encoding/src/lib.rs | 4 +- sub_crates/text_encoding/src/utf16_be.rs | 220 ++++++++++++++++++++++- sub_crates/text_encoding/src/utf16_le.rs | 220 ++++++++++++++++++++++- 3 files changed, 436 insertions(+), 8 deletions(-) diff --git a/sub_crates/text_encoding/src/lib.rs b/sub_crates/text_encoding/src/lib.rs index 1561e13..4eda39f 100644 --- a/sub_crates/text_encoding/src/lib.rs +++ b/sub_crates/text_encoding/src/lib.rs @@ -85,7 +85,7 @@ pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>; /// /// It is guaranteed that all input leading up to the problem character has /// already been encoded and written to the output buffer. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] pub struct EncodeError { pub character: char, pub error_range: (usize, usize), @@ -104,7 +104,7 @@ pub struct EncodeError { /// /// It is guaranteed that all input leading up to the invalid data has /// already been encoded and written to the output buffer. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] pub struct DecodeError { pub error_range: (usize, usize), pub output_bytes_written: usize, diff --git a/sub_crates/text_encoding/src/utf16_be.rs b/sub_crates/text_encoding/src/utf16_be.rs index 3b23e3f..faab6a8 100644 --- a/sub_crates/text_encoding/src/utf16_be.rs +++ b/sub_crates/text_encoding/src/utf16_be.rs @@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<' output[output_i] = val[0]; output[output_i + 1] = val[1]; output_i += 2; - input_i = offset; + input_i = offset + 1; } else { break; } @@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<' output[output_i + 2] = second[0]; output[output_i + 3] = second[1]; output_i += 4; - input_i = offset; + input_i = offset + 1; } else { break; } } // Calculate how much of the input was consumed. - input_i += 1; if input_i > input.len() { input_i = input.len(); } else { @@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a core::str::from_utf8_unchecked(&output[..output_i]) })) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_01() { + let text = "こんにちは!"; + let mut buf = [0u8; 1]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(encoded, &[]); + } + + #[test] + fn encode_02() { + let text = "こんにちは!"; + let mut buf = [0u8; 2]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 3); + assert_eq!(encoded, &[0x30, 0x53]); + } + + #[test] + fn encode_03() { + let text = "こんにちは!"; + let mut buf = [0u8; 3]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 3); + assert_eq!(encoded, &[0x30, 0x53]); + } + + #[test] + fn encode_04() { + let text = "😺😼"; + let mut buf = [0u8; 3]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(encoded, &[]); + } + + #[test] + fn encode_05() { + let text = "😺😼"; + let mut buf = [0u8; 4]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]); + } + + #[test] + fn encode_06() { + let text = "😺😼"; + let mut buf = [0u8; 7]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]); + } + + #[test] + fn decode_01() { + let data = [ + 0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" + let mut buf = [0u8; 2]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(decoded, ""); + } + + #[test] + fn decode_02() { + let data = [ + 0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" + let mut buf = [0u8; 3]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 2); + assert_eq!(decoded, "こ"); + } + + #[test] + fn decode_03() { + let data = [ + 0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" + let mut buf = [0u8; 5]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 2); + assert_eq!(decoded, "こ"); + } + + #[test] + fn decode_04() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼" + let mut buf = [0u8; 3]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(decoded, ""); + } + + #[test] + fn decode_05() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼" + let mut buf = [0u8; 4]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_06() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼" + let mut buf = [0u8; 7]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_error_01() { + let data = [ + 0xDE, 0x3A, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the first char (end surrogate) + let mut buf = [0u8; 2]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (0, 2), + output_bytes_written: 0, + }) + ); + } + + #[test] + fn decode_error_02() { + let data = [ + 0x30, 0x53, 0xDE, 0x3A, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the second char (end surrogate) + let mut buf = [0u8; 3]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (2, 4), + output_bytes_written: 3, + }) + ); + } + + #[test] + fn decode_error_03() { + let data = [ + 0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xDE, 0x3A, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the fourth char (end surrogate) + let mut buf = [0u8; 64]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (6, 8), + output_bytes_written: 9, + }) + ); + } + + #[test] + fn decode_error_04() { + let data = [ + 0xD8, 0x3D, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the first char (start surrogate) + let mut buf = [0u8; 2]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (0, 2), + output_bytes_written: 0, + }) + ); + } + + #[test] + fn decode_error_05() { + let data = [ + 0x30, 0x53, 0xD8, 0x3D, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the second char (start surrogate) + let mut buf = [0u8; 3]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (2, 4), + output_bytes_written: 3, + }) + ); + } + + #[test] + fn decode_error_06() { + let data = [ + 0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xD8, 0x3D, 0x30, 0x6F, 0xFF, 0x01, + ]; // "こんにちは!" with an error on the fourth char (start surrogate) + let mut buf = [0u8; 64]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (6, 8), + output_bytes_written: 9, + }) + ); + } +} diff --git a/sub_crates/text_encoding/src/utf16_le.rs b/sub_crates/text_encoding/src/utf16_le.rs index 6781e45..720a189 100644 --- a/sub_crates/text_encoding/src/utf16_le.rs +++ b/sub_crates/text_encoding/src/utf16_le.rs @@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<' output[output_i] = val[0]; output[output_i + 1] = val[1]; output_i += 2; - input_i = offset; + input_i = offset + 1; } else { break; } @@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<' output[output_i + 2] = second[0]; output[output_i + 3] = second[1]; output_i += 4; - input_i = offset; + input_i = offset + 1; } else { break; } } // Calculate how much of the input was consumed. - input_i += 1; if input_i > input.len() { input_i = input.len(); } else { @@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a core::str::from_utf8_unchecked(&output[..output_i]) })) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_01() { + let text = "こんにちは!"; + let mut buf = [0u8; 1]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(encoded, &[]); + } + + #[test] + fn encode_02() { + let text = "こんにちは!"; + let mut buf = [0u8; 2]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 3); + assert_eq!(encoded, &[0x53, 0x30]); + } + + #[test] + fn encode_03() { + let text = "こんにちは!"; + let mut buf = [0u8; 3]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 3); + assert_eq!(encoded, &[0x53, 0x30]); + } + + #[test] + fn encode_04() { + let text = "😺😼"; + let mut buf = [0u8; 3]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(encoded, &[]); + } + + #[test] + fn encode_05() { + let text = "😺😼"; + let mut buf = [0u8; 4]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]); + } + + #[test] + fn encode_06() { + let text = "😺😼"; + let mut buf = [0u8; 7]; + let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]); + } + + #[test] + fn decode_01() { + let data = [ + 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" + let mut buf = [0u8; 2]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(decoded, ""); + } + + #[test] + fn decode_02() { + let data = [ + 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" + let mut buf = [0u8; 3]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 2); + assert_eq!(decoded, "こ"); + } + + #[test] + fn decode_03() { + let data = [ + 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" + let mut buf = [0u8; 5]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 2); + assert_eq!(decoded, "こ"); + } + + #[test] + fn decode_04() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼" + let mut buf = [0u8; 3]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 0); + assert_eq!(decoded, ""); + } + + #[test] + fn decode_05() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼" + let mut buf = [0u8; 4]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_06() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼" + let mut buf = [0u8; 7]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_error_01() { + let data = [ + 0x3A, 0xDE, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the first char (end surrogate) + let mut buf = [0u8; 2]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (0, 2), + output_bytes_written: 0, + }) + ); + } + + #[test] + fn decode_error_02() { + let data = [ + 0x53, 0x30, 0x3A, 0xDE, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the second char (end surrogate) + let mut buf = [0u8; 3]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (2, 4), + output_bytes_written: 3, + }) + ); + } + + #[test] + fn decode_error_03() { + let data = [ + 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3A, 0xDE, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the fourth char (end surrogate) + let mut buf = [0u8; 64]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (6, 8), + output_bytes_written: 9, + }) + ); + } + + #[test] + fn decode_error_04() { + let data = [ + 0x3D, 0xD8, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the first char (start surrogate) + let mut buf = [0u8; 2]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (0, 2), + output_bytes_written: 0, + }) + ); + } + + #[test] + fn decode_error_05() { + let data = [ + 0x53, 0x30, 0x3D, 0xD8, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the second char (start surrogate) + let mut buf = [0u8; 3]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (2, 4), + output_bytes_written: 3, + }) + ); + } + + #[test] + fn decode_error_06() { + let data = [ + 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3D, 0xD8, 0x6F, 0x30, 0x01, 0xFF, + ]; // "こんにちは!" with an error on the fourth char (start surrogate) + let mut buf = [0u8; 64]; + let error = decode_to_utf8(&data, &mut buf); + assert_eq!( + error, + Err(DecodeError { + error_range: (6, 8), + output_bytes_written: 9, + }) + ); + } +}