Added unit tests for windows1252 encoding/decoding.
This commit is contained in:
parent
fb95ff36c1
commit
3203da38bd
|
@ -14,7 +14,7 @@ pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a
|
|||
if let Some(byte) = encode_table(c) {
|
||||
output[output_i] = byte;
|
||||
output_i += 1;
|
||||
input_i = offset;
|
||||
input_i = offset + 1;
|
||||
} else {
|
||||
return Err(EncodeError {
|
||||
character: c,
|
||||
|
@ -25,7 +25,6 @@ pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a
|
|||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
|
@ -94,33 +93,33 @@ fn encode_table(code: char) -> Option<u8> {
|
|||
return Some(code as u8);
|
||||
}
|
||||
match code {
|
||||
'€' => Some(0x80),
|
||||
'‚' => Some(0x82),
|
||||
'ƒ' => Some(0x83),
|
||||
'„' => Some(0x84),
|
||||
'…' => Some(0x85),
|
||||
'†' => Some(0x86),
|
||||
'‡' => Some(0x87),
|
||||
'ˆ' => Some(0x88),
|
||||
'‰' => Some(0x89),
|
||||
'Š' => Some(0x8A),
|
||||
'‹' => Some(0x8B),
|
||||
'Œ' => Some(0x8C),
|
||||
'Ž' => Some(0x8E),
|
||||
'‘' => Some(0x91),
|
||||
'’' => Some(0x92),
|
||||
'“' => Some(0x93),
|
||||
'”' => Some(0x94),
|
||||
'•' => Some(0x95),
|
||||
'–' => Some(0x96),
|
||||
'—' => Some(0x97),
|
||||
'˜' => Some(0x98),
|
||||
'™' => Some(0x99),
|
||||
'š' => Some(0x9A),
|
||||
'›' => Some(0x9B),
|
||||
'œ' => Some(0x9C),
|
||||
'ž' => Some(0x9E),
|
||||
'Ÿ' => Some(0x9F),
|
||||
'\u{20AC}' => Some(0x80),
|
||||
'\u{201A}' => Some(0x82),
|
||||
'\u{0192}' => Some(0x83),
|
||||
'\u{201E}' => Some(0x84),
|
||||
'\u{2026}' => Some(0x85),
|
||||
'\u{2020}' => Some(0x86),
|
||||
'\u{2021}' => Some(0x87),
|
||||
'\u{02C6}' => Some(0x88),
|
||||
'\u{2030}' => Some(0x89),
|
||||
'\u{0160}' => Some(0x8A),
|
||||
'\u{2039}' => Some(0x8B),
|
||||
'\u{0152}' => Some(0x8C),
|
||||
'\u{017D}' => Some(0x8E),
|
||||
'\u{2018}' => Some(0x91),
|
||||
'\u{2019}' => Some(0x92),
|
||||
'\u{201C}' => Some(0x93),
|
||||
'\u{201D}' => Some(0x94),
|
||||
'\u{2022}' => Some(0x95),
|
||||
'\u{2013}' => Some(0x96),
|
||||
'\u{2014}' => Some(0x97),
|
||||
'\u{02DC}' => Some(0x98),
|
||||
'\u{2122}' => Some(0x99),
|
||||
'\u{0161}' => Some(0x9A),
|
||||
'\u{203A}' => Some(0x9B),
|
||||
'\u{0153}' => Some(0x9C),
|
||||
'\u{017E}' => Some(0x9E),
|
||||
'\u{0178}' => Some(0x9F),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -131,7 +130,281 @@ fn encode_table(code: char) -> Option<u8> {
|
|||
// The '<27>'s stand in for codes not defined in windows-1252, and should be
|
||||
// be treated as an error when encountered.
|
||||
const DECODE_TABLE: [char; 32] = [
|
||||
'€', '<27>', '‚', 'ƒ', '„', '…', '†', '‡', 'ˆ', '‰', 'Š', '‹', 'Œ', '<27>',
|
||||
'Ž', '<27>', '<27>', '‘', '’', '“', '”', '•', '–', '—', '˜', '™', 'š', '›',
|
||||
'œ', '<27>', 'ž', 'Ÿ',
|
||||
'\u{20AC}', '<27>', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
|
||||
'\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '<27>', '\u{017D}', '<27>', '<27>',
|
||||
'\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}', '\u{02DC}',
|
||||
'\u{2122}', '\u{0161}', '\u{203A}', '\u{0153}', '<27>', '\u{017E}', '\u{0178}',
|
||||
];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_01() {
|
||||
let text = "Hello world!";
|
||||
let mut buf = [0u8; 0];
|
||||
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(encoded, &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_02() {
|
||||
let text = "Hello world!";
|
||||
let mut buf = [0u8; 1];
|
||||
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 1);
|
||||
assert_eq!(encoded, "H".as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_03() {
|
||||
let text = "Hello world!";
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(encoded, "He".as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_04() {
|
||||
let text = "Hello world!";
|
||||
let mut buf = [0u8; 64];
|
||||
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 12);
|
||||
assert_eq!(encoded, "Hello world!".as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_05() {
|
||||
let text = "Hello world!こ";
|
||||
let mut buf = [0u8; 12];
|
||||
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 12);
|
||||
assert_eq!(encoded, "Hello world!".as_bytes());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_01() {
|
||||
let data = [
|
||||
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!"
|
||||
let mut buf = [0u8; 0];
|
||||
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 0);
|
||||
assert_eq!(decoded, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_02() {
|
||||
let data = [
|
||||
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!"
|
||||
let mut buf = [0u8; 1];
|
||||
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 1);
|
||||
assert_eq!(decoded, "H");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_03() {
|
||||
let data = [
|
||||
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!"
|
||||
let mut buf = [0u8; 2];
|
||||
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 2);
|
||||
assert_eq!(decoded, "He");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_04() {
|
||||
let data = [
|
||||
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!"
|
||||
let mut buf = [0u8; 64];
|
||||
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 12);
|
||||
assert_eq!(decoded, "Hello world!");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_05() {
|
||||
let data = [
|
||||
0x80, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8E, 0x91,
|
||||
0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9E, 0x9F,
|
||||
]; // "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ", all of the non-latin1 matching characters.
|
||||
let mut buf = [0u8; 128];
|
||||
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
|
||||
assert_eq!(consumed_count, 27);
|
||||
assert_eq!(
|
||||
decoded,
|
||||
"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_01() {
|
||||
let text = "こello world!";
|
||||
let mut buf = [0u8; 64];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: 'こ',
|
||||
error_range: (0, 3),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_02() {
|
||||
let text = "\u{0085}ello world!";
|
||||
let mut buf = [0u8; 64];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: '\u{0085}',
|
||||
error_range: (0, 2),
|
||||
output_bytes_written: 0,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_03() {
|
||||
let text = "Hこllo world!";
|
||||
let mut buf = [0u8; 64];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: 'こ',
|
||||
error_range: (1, 4),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_04() {
|
||||
let text = "H\u{0085}llo world!";
|
||||
let mut buf = [0u8; 64];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: '\u{0085}',
|
||||
error_range: (1, 3),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_05() {
|
||||
let text = "Heこlo world!";
|
||||
let mut buf = [0u8; 3];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: 'こ',
|
||||
error_range: (2, 5),
|
||||
output_bytes_written: 2,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_error_06() {
|
||||
let text = "He\u{0085}lo world!";
|
||||
let mut buf = [0u8; 3];
|
||||
assert_eq!(
|
||||
encode_from_str(text, &mut buf),
|
||||
Err(EncodeError {
|
||||
character: '\u{0085}',
|
||||
error_range: (2, 4),
|
||||
output_bytes_written: 2,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_01() {
|
||||
let data = [
|
||||
0x48, 0x81, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!" with an error on the second byte (undefined byte).
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_str(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (1, 2),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_02() {
|
||||
let data = [
|
||||
0x48, 0x8D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!" with an error on the second byte (undefined byte).
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_str(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (1, 2),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_03() {
|
||||
let data = [
|
||||
0x48, 0x8F, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!" with an error on the second byte (undefined byte).
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_str(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (1, 2),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_04() {
|
||||
let data = [
|
||||
0x48, 0x90, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!" with an error on the second byte (undefined byte).
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_str(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (1, 2),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_error_05() {
|
||||
let data = [
|
||||
0x48, 0x9D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
|
||||
]; // "Hello world!" with an error on the second byte (undefined byte).
|
||||
let mut buf = [0u8; 64];
|
||||
let error = decode_to_str(&data, &mut buf);
|
||||
assert_eq!(
|
||||
error,
|
||||
Err(DecodeError {
|
||||
error_range: (1, 2),
|
||||
output_bytes_written: 1,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user