From 60cbb193b247d1f923928766045dfbc5df3fce5b Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Tue, 21 Aug 2018 22:18:22 -0700
Subject: [PATCH] Added unit tests for utf16 encoding/decoding.

---
 sub_crates/text_encoding/src/lib.rs      |   4 +-
 sub_crates/text_encoding/src/utf16_be.rs | 220 ++++++++++++++++++++++-
 sub_crates/text_encoding/src/utf16_le.rs | 220 ++++++++++++++++++++++-
 3 files changed, 436 insertions(+), 8 deletions(-)

diff --git a/sub_crates/text_encoding/src/lib.rs b/sub_crates/text_encoding/src/lib.rs
index 1561e13..4eda39f 100644
--- a/sub_crates/text_encoding/src/lib.rs
+++ b/sub_crates/text_encoding/src/lib.rs
@@ -85,7 +85,7 @@ pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
 ///
 /// It is guaranteed that all input leading up to the problem character has
 /// already been encoded and written to the output buffer.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct EncodeError {
     pub character: char,
     pub error_range: (usize, usize),
@@ -104,7 +104,7 @@ pub struct EncodeError {
 ///
 /// It is guaranteed that all input leading up to the invalid data has
 /// already been encoded and written to the output buffer.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct DecodeError {
     pub error_range: (usize, usize),
     pub output_bytes_written: usize,
diff --git a/sub_crates/text_encoding/src/utf16_be.rs b/sub_crates/text_encoding/src/utf16_be.rs
index 3b23e3f..faab6a8 100644
--- a/sub_crates/text_encoding/src/utf16_be.rs
+++ b/sub_crates/text_encoding/src/utf16_be.rs
@@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
                 output[output_i] = val[0];
                 output[output_i + 1] = val[1];
                 output_i += 2;
-                input_i = offset;
+                input_i = offset + 1;
             } else {
                 break;
             }
@@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
             output[output_i + 2] = second[0];
             output[output_i + 3] = second[1];
             output_i += 4;
-            input_i = offset;
+            input_i = offset + 1;
         } else {
             break;
         }
     }
 
     // Calculate how much of the input was consumed.
-    input_i += 1;
     if input_i > input.len() {
         input_i = input.len();
     } else {
@@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
         core::str::from_utf8_unchecked(&output[..output_i])
     }))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn encode_01() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 1];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(encoded, &[]);
+    }
+
+    #[test]
+    fn encode_02() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 2];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 3);
+        assert_eq!(encoded, &[0x30, 0x53]);
+    }
+
+    #[test]
+    fn encode_03() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 3];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 3);
+        assert_eq!(encoded, &[0x30, 0x53]);
+    }
+
+    #[test]
+    fn encode_04() {
+        let text = "😺😼";
+        let mut buf = [0u8; 3];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(encoded, &[]);
+    }
+
+    #[test]
+    fn encode_05() {
+        let text = "😺😼";
+        let mut buf = [0u8; 4];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
+    }
+
+    #[test]
+    fn encode_06() {
+        let text = "😺😼";
+        let mut buf = [0u8; 7];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
+    }
+
+    #[test]
+    fn decode_01() {
+        let data = [
+            0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 2];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(decoded, "");
+    }
+
+    #[test]
+    fn decode_02() {
+        let data = [
+            0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 3];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 2);
+        assert_eq!(decoded, "こ");
+    }
+
+    #[test]
+    fn decode_03() {
+        let data = [
+            0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 5];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 2);
+        assert_eq!(decoded, "こ");
+    }
+
+    #[test]
+    fn decode_04() {
+        let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
+        let mut buf = [0u8; 3];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(decoded, "");
+    }
+
+    #[test]
+    fn decode_05() {
+        let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
+        let mut buf = [0u8; 4];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(decoded, "😺");
+    }
+
+    #[test]
+    fn decode_06() {
+        let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
+        let mut buf = [0u8; 7];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(decoded, "😺");
+    }
+
+    #[test]
+    fn decode_error_01() {
+        let data = [
+            0xDE, 0x3A, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the first char (end surrogate)
+        let mut buf = [0u8; 2];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (0, 2),
+                output_bytes_written: 0,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_02() {
+        let data = [
+            0x30, 0x53, 0xDE, 0x3A, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the second char (end surrogate)
+        let mut buf = [0u8; 3];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (2, 4),
+                output_bytes_written: 3,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_03() {
+        let data = [
+            0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xDE, 0x3A, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the fourth char (end surrogate)
+        let mut buf = [0u8; 64];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (6, 8),
+                output_bytes_written: 9,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_04() {
+        let data = [
+            0xD8, 0x3D, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the first char (start surrogate)
+        let mut buf = [0u8; 2];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (0, 2),
+                output_bytes_written: 0,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_05() {
+        let data = [
+            0x30, 0x53, 0xD8, 0x3D, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the second char (start surrogate)
+        let mut buf = [0u8; 3];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (2, 4),
+                output_bytes_written: 3,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_06() {
+        let data = [
+            0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xD8, 0x3D, 0x30, 0x6F, 0xFF, 0x01,
+        ]; // "こんにちは！" with an error on the fourth char (start surrogate)
+        let mut buf = [0u8; 64];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (6, 8),
+                output_bytes_written: 9,
+            })
+        );
+    }
+}
diff --git a/sub_crates/text_encoding/src/utf16_le.rs b/sub_crates/text_encoding/src/utf16_le.rs
index 6781e45..720a189 100644
--- a/sub_crates/text_encoding/src/utf16_le.rs
+++ b/sub_crates/text_encoding/src/utf16_le.rs
@@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
                 output[output_i] = val[0];
                 output[output_i + 1] = val[1];
                 output_i += 2;
-                input_i = offset;
+                input_i = offset + 1;
             } else {
                 break;
             }
@@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
             output[output_i + 2] = second[0];
             output[output_i + 3] = second[1];
             output_i += 4;
-            input_i = offset;
+            input_i = offset + 1;
         } else {
             break;
         }
     }
 
     // Calculate how much of the input was consumed.
-    input_i += 1;
     if input_i > input.len() {
         input_i = input.len();
     } else {
@@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
         core::str::from_utf8_unchecked(&output[..output_i])
     }))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn encode_01() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 1];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(encoded, &[]);
+    }
+
+    #[test]
+    fn encode_02() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 2];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 3);
+        assert_eq!(encoded, &[0x53, 0x30]);
+    }
+
+    #[test]
+    fn encode_03() {
+        let text = "こんにちは！";
+        let mut buf = [0u8; 3];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 3);
+        assert_eq!(encoded, &[0x53, 0x30]);
+    }
+
+    #[test]
+    fn encode_04() {
+        let text = "😺😼";
+        let mut buf = [0u8; 3];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(encoded, &[]);
+    }
+
+    #[test]
+    fn encode_05() {
+        let text = "😺😼";
+        let mut buf = [0u8; 4];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
+    }
+
+    #[test]
+    fn encode_06() {
+        let text = "😺😼";
+        let mut buf = [0u8; 7];
+        let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
+    }
+
+    #[test]
+    fn decode_01() {
+        let data = [
+            0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 2];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(decoded, "");
+    }
+
+    #[test]
+    fn decode_02() {
+        let data = [
+            0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 3];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 2);
+        assert_eq!(decoded, "こ");
+    }
+
+    #[test]
+    fn decode_03() {
+        let data = [
+            0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！"
+        let mut buf = [0u8; 5];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 2);
+        assert_eq!(decoded, "こ");
+    }
+
+    #[test]
+    fn decode_04() {
+        let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
+        let mut buf = [0u8; 3];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 0);
+        assert_eq!(decoded, "");
+    }
+
+    #[test]
+    fn decode_05() {
+        let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
+        let mut buf = [0u8; 4];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(decoded, "😺");
+    }
+
+    #[test]
+    fn decode_06() {
+        let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
+        let mut buf = [0u8; 7];
+        let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
+        assert_eq!(consumed_count, 4);
+        assert_eq!(decoded, "😺");
+    }
+
+    #[test]
+    fn decode_error_01() {
+        let data = [
+            0x3A, 0xDE, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the first char (end surrogate)
+        let mut buf = [0u8; 2];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (0, 2),
+                output_bytes_written: 0,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_02() {
+        let data = [
+            0x53, 0x30, 0x3A, 0xDE, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the second char (end surrogate)
+        let mut buf = [0u8; 3];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (2, 4),
+                output_bytes_written: 3,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_03() {
+        let data = [
+            0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3A, 0xDE, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the fourth char (end surrogate)
+        let mut buf = [0u8; 64];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (6, 8),
+                output_bytes_written: 9,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_04() {
+        let data = [
+            0x3D, 0xD8, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the first char (start surrogate)
+        let mut buf = [0u8; 2];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (0, 2),
+                output_bytes_written: 0,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_05() {
+        let data = [
+            0x53, 0x30, 0x3D, 0xD8, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the second char (start surrogate)
+        let mut buf = [0u8; 3];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (2, 4),
+                output_bytes_written: 3,
+            })
+        );
+    }
+
+    #[test]
+    fn decode_error_06() {
+        let data = [
+            0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3D, 0xD8, 0x6F, 0x30, 0x01, 0xFF,
+        ]; // "こんにちは！" with an error on the fourth char (start surrogate)
+        let mut buf = [0u8; 64];
+        let error = decode_to_utf8(&data, &mut buf);
+        assert_eq!(
+            error,
+            Err(DecodeError {
+                error_range: (6, 8),
+                output_bytes_written: 9,
+            })
+        );
+    }
+}