Added unit tests for windows1252 encoding/decoding.

This commit is contained in:
Nathan Vegdahl 2018-08-22 19:00:47 -07:00
parent fb95ff36c1
commit 3203da38bd

View File

@ -14,7 +14,7 @@ pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a
if let Some(byte) = encode_table(c) { if let Some(byte) = encode_table(c) {
output[output_i] = byte; output[output_i] = byte;
output_i += 1; output_i += 1;
input_i = offset; input_i = offset + 1;
} else { } else {
return Err(EncodeError { return Err(EncodeError {
character: c, character: c,
@ -25,7 +25,6 @@ pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a
} }
// Calculate how much of the input was consumed. // Calculate how much of the input was consumed.
input_i += 1;
if input_i > input.len() { if input_i > input.len() {
input_i = input.len(); input_i = input.len();
} else { } else {
@ -94,33 +93,33 @@ fn encode_table(code: char) -> Option<u8> {
return Some(code as u8); return Some(code as u8);
} }
match code { match code {
'' => Some(0x80), '\u{20AC}' => Some(0x80),
'' => Some(0x82), '\u{201A}' => Some(0x82),
'ƒ' => Some(0x83), '\u{0192}' => Some(0x83),
'' => Some(0x84), '\u{201E}' => Some(0x84),
'' => Some(0x85), '\u{2026}' => Some(0x85),
'' => Some(0x86), '\u{2020}' => Some(0x86),
'' => Some(0x87), '\u{2021}' => Some(0x87),
'ˆ' => Some(0x88), '\u{02C6}' => Some(0x88),
'' => Some(0x89), '\u{2030}' => Some(0x89),
'Š' => Some(0x8A), '\u{0160}' => Some(0x8A),
'' => Some(0x8B), '\u{2039}' => Some(0x8B),
'Œ' => Some(0x8C), '\u{0152}' => Some(0x8C),
'Ž' => Some(0x8E), '\u{017D}' => Some(0x8E),
'' => Some(0x91), '\u{2018}' => Some(0x91),
'' => Some(0x92), '\u{2019}' => Some(0x92),
'' => Some(0x93), '\u{201C}' => Some(0x93),
'' => Some(0x94), '\u{201D}' => Some(0x94),
'' => Some(0x95), '\u{2022}' => Some(0x95),
'' => Some(0x96), '\u{2013}' => Some(0x96),
'' => Some(0x97), '\u{2014}' => Some(0x97),
'˜' => Some(0x98), '\u{02DC}' => Some(0x98),
'' => Some(0x99), '\u{2122}' => Some(0x99),
'š' => Some(0x9A), '\u{0161}' => Some(0x9A),
'' => Some(0x9B), '\u{203A}' => Some(0x9B),
'œ' => Some(0x9C), '\u{0153}' => Some(0x9C),
'ž' => Some(0x9E), '\u{017E}' => Some(0x9E),
'Ÿ' => Some(0x9F), '\u{0178}' => Some(0x9F),
_ => None, _ => None,
} }
} }
@ -131,7 +130,281 @@ fn encode_table(code: char) -> Option<u8> {
// The '<27>'s stand in for codes not defined in windows-1252, and should be // The '<27>'s stand in for codes not defined in windows-1252, and should be
// be treated as an error when encountered. // be treated as an error when encountered.
const DECODE_TABLE: [char; 32] = [ const DECODE_TABLE: [char; 32] = [
'€', '<27>', '', 'ƒ', '„', '…', '†', '‡', 'ˆ', '‰', 'Š', '', 'Œ', '<27>', '\u{20AC}', '<27>', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
'Ž', '<27>', '<27>', '', '', '“', '”', '•', '', '—', '˜', '™', 'š', '', '\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '<27>', '\u{017D}', '<27>', '<27>',
'œ', '<27>', 'ž', 'Ÿ', '\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}', '\u{02DC}',
'\u{2122}', '\u{0161}', '\u{203A}', '\u{0153}', '<27>', '\u{017E}', '\u{0178}',
]; ];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "Hello world!";
let mut buf = [0u8; 0];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "Hello world!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(encoded, "H".as_bytes());
}
#[test]
fn encode_03() {
let text = "Hello world!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(encoded, "He".as_bytes());
}
#[test]
fn encode_04() {
let text = "Hello world!";
let mut buf = [0u8; 64];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn encode_05() {
let text = "Hello world!こ";
let mut buf = [0u8; 12];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn decode_01() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 0];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 1];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(decoded, "H");
}
#[test]
fn decode_03() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "He");
}
#[test]
fn decode_04() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(decoded, "Hello world!");
}
#[test]
fn decode_05() {
let data = [
0x80, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8E, 0x91,
0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9E, 0x9F,
]; // "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ", all of the non-latin1 matching characters.
let mut buf = [0u8; 128];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 27);
assert_eq!(
decoded,
"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"
);
}
#[test]
fn encode_error_01() {
let text = "こello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_02() {
let text = "\u{0085}ello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_03() {
let text = "Hこllo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (1, 4),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_04() {
let text = "H\u{0085}llo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (1, 3),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_05() {
let text = "Heこlo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
#[test]
fn encode_error_06() {
let text = "He\u{0085}lo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (2, 4),
output_bytes_written: 2,
})
);
}
#[test]
fn decode_error_01() {
let data = [
0x48, 0x81, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x48, 0x8D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x48, 0x8F, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0x48, 0x90, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_05() {
let data = [
0x48, 0x9D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
}