Added unit tests for utf16 encoding/decoding.
This commit is contained in:
parent
fc07ee3444
commit
60cbb193b2
|
@ -85,7 +85,7 @@ pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
|
|||
///
|
||||
/// It is guaranteed that all input leading up to the problem character has
|
||||
/// already been encoded and written to the output buffer.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct EncodeError {
|
||||
pub character: char,
|
||||
pub error_range: (usize, usize),
|
||||
|
@ -104,7 +104,7 @@ pub struct EncodeError {
|
|||
///
|
||||
/// It is guaranteed that all input leading up to the invalid data has
|
||||
/// already been encoded and written to the output buffer.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub struct DecodeError {
|
||||
pub error_range: (usize, usize),
|
||||
pub output_bytes_written: usize,
|
||||
|
|
|
@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
|||
output[output_i] = val[0];
|
||||
output[output_i + 1] = val[1];
|
||||
output_i += 2;
|
||||
input_i = offset;
|
||||
input_i = offset + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
|||
output[output_i + 2] = second[0];
|
||||
output[output_i + 3] = second[1];
|
||||
output_i += 4;
|
||||
input_i = offset;
|
||||
input_i = offset + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
|
@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
|||
core::str::from_utf8_unchecked(&output[..output_i])
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_01() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 1];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(encoded, &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_02() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 3);
|
||||
assert_eq!(encoded, &[0x30, 0x53]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_03() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 3);
|
||||
assert_eq!(encoded, &[0x30, 0x53]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_04() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(encoded, &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_05() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 4];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_06() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 7];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_01() {
|
||||
let data = [
|
||||
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(decoded, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_02() {
|
||||
let data = [
|
||||
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(decoded, "こ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_03() {
|
||||
let data = [
|
||||
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 5];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(decoded, "こ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_04() {
|
||||
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(decoded, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_05() {
|
||||
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
|
||||
let mut buf = [0u8; 4];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(decoded, "😺");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_06() {
|
||||
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
|
||||
let mut buf = [0u8; 7];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(decoded, "😺");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_01() {
|
||||
let data = [
|
||||
0xDE, 0x3A, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the first char (end surrogate)
|
||||
let mut buf = [0u8; 2];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (0, 2),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_02() {
|
||||
let data = [
|
||||
0x30, 0x53, 0xDE, 0x3A, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the second char (end surrogate)
|
||||
let mut buf = [0u8; 3];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (2, 4),
|
||||
output_bytes_written: 3,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_03() {
|
||||
let data = [
|
||||
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xDE, 0x3A, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the fourth char (end surrogate)
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (6, 8),
|
||||
output_bytes_written: 9,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_04() {
|
||||
let data = [
|
||||
0xD8, 0x3D, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the first char (start surrogate)
|
||||
let mut buf = [0u8; 2];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (0, 2),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_05() {
|
||||
let data = [
|
||||
0x30, 0x53, 0xD8, 0x3D, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the second char (start surrogate)
|
||||
let mut buf = [0u8; 3];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (2, 4),
|
||||
output_bytes_written: 3,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_06() {
|
||||
let data = [
|
||||
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xD8, 0x3D, 0x30, 0x6F, 0xFF, 0x01,
|
||||
]; // "こんにちは!" with an error on the fourth char (start surrogate)
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (6, 8),
|
||||
output_bytes_written: 9,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
|||
output[output_i] = val[0];
|
||||
output[output_i + 1] = val[1];
|
||||
output_i += 2;
|
||||
input_i = offset;
|
||||
input_i = offset + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -35,14 +35,13 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
|||
output[output_i + 2] = second[0];
|
||||
output[output_i + 3] = second[1];
|
||||
output_i += 4;
|
||||
input_i = offset;
|
||||
input_i = offset + 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
|
@ -119,3 +118,218 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
|||
core::str::from_utf8_unchecked(&output[..output_i])
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_01() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 1];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(encoded, &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_02() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 3);
|
||||
assert_eq!(encoded, &[0x53, 0x30]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_03() {
|
||||
let text = "こんにちは!";
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 3);
|
||||
assert_eq!(encoded, &[0x53, 0x30]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_04() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(encoded, &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_05() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 4];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_06() {
|
||||
let text = "😺😼";
|
||||
let mut buf = [0u8; 7];
|
||||
let (consumed_count, encoded) = encode_from_utf8(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_01() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(decoded, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_02() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(decoded, "こ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_03() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!"
|
||||
let mut buf = [0u8; 5];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(decoded, "こ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_04() {
|
||||
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
|
||||
let mut buf = [0u8; 3];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(decoded, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_05() {
|
||||
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
|
||||
let mut buf = [0u8; 4];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(decoded, "😺");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_06() {
|
||||
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
|
||||
let mut buf = [0u8; 7];
|
||||
let (consumed_count, decoded) = decode_to_utf8(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 4);
|
||||
assert_eq!(decoded, "😺");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_01() {
|
||||
let data = [
|
||||
0x3A, 0xDE, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the first char (end surrogate)
|
||||
let mut buf = [0u8; 2];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (0, 2),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_02() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x3A, 0xDE, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the second char (end surrogate)
|
||||
let mut buf = [0u8; 3];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (2, 4),
|
||||
output_bytes_written: 3,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_03() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3A, 0xDE, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the fourth char (end surrogate)
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (6, 8),
|
||||
output_bytes_written: 9,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_04() {
|
||||
let data = [
|
||||
0x3D, 0xD8, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the first char (start surrogate)
|
||||
let mut buf = [0u8; 2];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (0, 2),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_05() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x3D, 0xD8, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the second char (start surrogate)
|
||||
let mut buf = [0u8; 3];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (2, 4),
|
||||
output_bytes_written: 3,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_06() {
|
||||
let data = [
|
||||
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3D, 0xD8, 0x6F, 0x30, 0x01, 0xFF,
|
||||
]; // "こんにちは!" with an error on the fourth char (start surrogate)
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_utf8(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (6, 8),
|
||||
output_bytes_written: 9,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user