diff --git a/sub_crates/text_encoding/src/utf16_be.rs b/sub_crates/text_encoding/src/utf16_be.rs index faab6a8..355c675 100644 --- a/sub_crates/text_encoding/src/utf16_be.rs +++ b/sub_crates/text_encoding/src/utf16_be.rs @@ -80,7 +80,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a // Two code units. // Get the second code unit, if possible. - if !(input_i + 3) < input.len() { + if (input_i + 3) >= input.len() { break; } let bytes_2 = itr.next().unwrap(); @@ -237,6 +237,33 @@ mod tests { assert_eq!(decoded, "😺"); } + #[test] + fn decode_07() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D]; // "😺😼" with last codepoint chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_08() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE]; // "😺😼" with last byte chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_09() { + let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8]; // "😺😼" with last 3 bytes chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + #[test] fn decode_error_01() { let data = [ diff --git a/sub_crates/text_encoding/src/utf16_le.rs b/sub_crates/text_encoding/src/utf16_le.rs index 720a189..0325cfc 100644 --- a/sub_crates/text_encoding/src/utf16_le.rs +++ b/sub_crates/text_encoding/src/utf16_le.rs @@ -80,7 +80,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a // Two code units. // Get the second code unit, if possible. - if !(input_i + 3) < input.len() { + if (input_i + 3) >= input.len() { break; } let bytes_2 = itr.next().unwrap(); @@ -237,6 +237,33 @@ mod tests { assert_eq!(decoded, "😺"); } + #[test] + fn decode_07() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8]; // "😺😼" with last codepoint chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_08() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C]; // "😺😼" with last byte chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + + #[test] + fn decode_09() { + let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D]; // "😺😼" with last 3 bytes chopped off. + let mut buf = [0u8; 64]; + let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap(); + assert_eq!(consumed_count, 4); + assert_eq!(decoded, "😺"); + } + #[test] fn decode_error_01() { let data = [