Tests for single-byte encodings, and related bug fixes.

This commit is contained in:
Nathan Vegdahl 2018-08-23 13:21:16 -07:00
parent 3d8eaefa6b
commit 30a1440399
5 changed files with 280 additions and 437 deletions

View File

@ -151,7 +151,7 @@ fn generate_single_byte_encoding_from_index<R: Read, W: Write>(
let rev_table = {
let mut rev_table = vec![];
for (i, c) in table.iter().enumerate() {
rev_table.push((c, i));
rev_table.push((c, 128 + i));
}
rev_table.sort_by_key(|x| x.0);
rev_table

View File

@ -11,9 +11,8 @@ mod utf32_be;
mod utf32_le;
mod utf8;
mod utils;
mod windows1252;
use single_byte::{ibm866, iso_8859_2};
use single_byte::{ibm866, iso_8859_2, iso_8859_7, windows1252};
/// Encodes text from utf8 to a destination encoding.
pub fn encode_from_str<'a>(
@ -30,6 +29,7 @@ pub fn encode_from_str<'a>(
Encoding::IBM866 => ibm866::encode_from_str(input, output),
Encoding::Latin1 => latin1::encode_from_str(input, output),
Encoding::ISO8859_2 => iso_8859_2::encode_from_str(input, output),
Encoding::ISO8859_7 => iso_8859_7::encode_from_str(input, output),
Encoding::Windows1252 => windows1252::encode_from_str(input, output),
}
}
@ -49,6 +49,7 @@ pub fn decode_to_str<'a>(
Encoding::IBM866 => ibm866::decode_to_str(input, output),
Encoding::Latin1 => latin1::decode_to_str(input, output),
Encoding::ISO8859_2 => iso_8859_2::decode_to_str(input, output),
Encoding::ISO8859_7 => iso_8859_7::decode_to_str(input, output),
Encoding::Windows1252 => windows1252::decode_to_str(input, output),
}
}
@ -67,6 +68,7 @@ pub enum Encoding {
IBM866, // IBM 866
Latin1, // ISO/IEC 8859-1
ISO8859_2, // ISO/IEC 8859-2
ISO8859_7, // ISO/IEC 8859-7
Windows1252, // Windows code page 1252
}

View File

@ -34,10 +34,10 @@ pub mod iso_8859_2 {
// include!(concat!(env!("OUT_DIR"), "/iso-8859-6.rs"));
// }
// pub mod iso_8859_7 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-7.rs"));
// }
pub mod iso_8859_7 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/iso-8859-7.rs"));
}
// pub mod iso_8859_8 {
// // Generated by build.rs
@ -99,10 +99,10 @@ pub mod iso_8859_2 {
// include!(concat!(env!("OUT_DIR"), "/windows-1251.rs"));
// }
// pub mod windows1252 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1252.rs"));
// }
pub mod windows1252 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/windows-1252.rs"));
}
// pub mod windows1253 {
// // Generated by build.rs
@ -154,6 +154,11 @@ fn single_byte_encode_from_str<'a>(
if output_i >= output.len() {
break;
}
if c as u32 <= 127 {
output[output_i] = c as u8;
output_i += 1;
input_i = offset + 1;
} else {
if let Ok(i) = table.binary_search_by_key(&c, |x| x.0) {
output[output_i] = table[i].1;
output_i += 1;
@ -166,6 +171,7 @@ fn single_byte_encode_from_str<'a>(
});
}
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
@ -224,3 +230,246 @@ fn single_byte_decode_to_str<'a>(
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
//===========================================================================
// Testing is done with iso-8859-7, since it has a few undefined characters,
// allowing us to test handling of those.
#[cfg(test)]
mod tests {
use super::iso_8859_7::*;
use {DecodeError, EncodeError};
#[test]
fn encode_01() {
let text = "Hello world!";
let mut buf = [0u8; 0];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "Hello world!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(encoded, "H".as_bytes());
}
#[test]
fn encode_03() {
let text = "Hello world!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(encoded, "He".as_bytes());
}
#[test]
fn encode_04() {
let text = "Hello world!";
let mut buf = [0u8; 64];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn encode_05() {
let text = "Hello world!こ";
let mut buf = [0u8; 12];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn decode_01() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 0];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 1];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(decoded, "H");
}
#[test]
fn decode_03() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "He");
}
#[test]
fn decode_04() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(decoded, "Hello world!");
}
#[test]
fn decode_05() {
let data = [
0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE,
0xCF, 0xD0, 0xD1, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9,
]; // "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
let mut buf = [0u8; 128];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 24);
assert_eq!(decoded, "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ");
}
#[test]
fn encode_error_01() {
let text = "こello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_02() {
let text = "\u{00C0}ello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_03() {
let text = "Hこllo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (1, 4),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_04() {
let text = "H\u{00C0}llo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (1, 3),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_05() {
let text = "Heこlo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
#[test]
fn encode_error_06() {
let text = "He\u{00C0}lo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (2, 4),
output_bytes_written: 2,
})
);
}
#[test]
fn decode_error_01() {
let data = [
0x48, 0xAE, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x48, 0xD2, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x48, 0xFF, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
}

View File

@ -1,410 +0,0 @@
//! Encoding/decoding functions for Windows-1252.
use core;
use {DecodeError, DecodeResult, EncodeError, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
if output_i >= output.len() {
break;
}
if let Some(byte) = encode_table(c) {
output[output_i] = byte;
output_i += 1;
input_i = offset + 1;
} else {
return Err(EncodeError {
character: c,
error_range: (offset, offset + c.len_utf8()),
output_bytes_written: output_i,
});
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
for &byte in input.iter() {
if byte < 0x80 {
// 1-byte case
if output_i >= output.len() {
break;
}
output[output_i] = byte;
input_i += 1;
output_i += 1;
} else if byte < 0xA0 {
// Use lookup table.
let code = DECODE_TABLE[byte as usize - 0x80];
if code == '<27>' {
// Error: undefined byte.
return Err(DecodeError {
error_range: (input_i, input_i + 1),
output_bytes_written: output_i,
});
}
// Encode to utf8
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
input_i += 1;
output_i += s.len();
} else {
// Non-lookup-table 2-byte case
if (output_i + 1) >= output.len() {
break;
}
output[output_i] = 0b11000000 | (byte >> 6);
output[output_i + 1] = 0b10000000 | (byte & 0b00111111);
input_i += 1;
output_i += 2;
}
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
// Maps unicode to windows-1252.
//
// Returns `None` for characters not in windows-1252.
#[inline(always)]
fn encode_table(code: char) -> Option<u8> {
if (code as u32) < 0x80 || ((code as u32) > 0x9F && (code as u32) <= 0xFF) {
return Some(code as u8);
}
match code {
'\u{20AC}' => Some(0x80),
'\u{201A}' => Some(0x82),
'\u{0192}' => Some(0x83),
'\u{201E}' => Some(0x84),
'\u{2026}' => Some(0x85),
'\u{2020}' => Some(0x86),
'\u{2021}' => Some(0x87),
'\u{02C6}' => Some(0x88),
'\u{2030}' => Some(0x89),
'\u{0160}' => Some(0x8A),
'\u{2039}' => Some(0x8B),
'\u{0152}' => Some(0x8C),
'\u{017D}' => Some(0x8E),
'\u{2018}' => Some(0x91),
'\u{2019}' => Some(0x92),
'\u{201C}' => Some(0x93),
'\u{201D}' => Some(0x94),
'\u{2022}' => Some(0x95),
'\u{2013}' => Some(0x96),
'\u{2014}' => Some(0x97),
'\u{02DC}' => Some(0x98),
'\u{2122}' => Some(0x99),
'\u{0161}' => Some(0x9A),
'\u{203A}' => Some(0x9B),
'\u{0153}' => Some(0x9C),
'\u{017E}' => Some(0x9E),
'\u{0178}' => Some(0x9F),
_ => None,
}
}
// Maps the range 0x80-0x9F in windows-1252 to unicode. The remaining
// characters in windows-1252 match unicode.
//
// The '<27>'s stand in for codes not defined in windows-1252, and should be
// be treated as an error when encountered.
const DECODE_TABLE: [char; 32] = [
'\u{20AC}', '<27>', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
'\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '<27>', '\u{017D}', '<27>', '<27>',
'\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}', '\u{02DC}',
'\u{2122}', '\u{0161}', '\u{203A}', '\u{0153}', '<27>', '\u{017E}', '\u{0178}',
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "Hello world!";
let mut buf = [0u8; 0];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "Hello world!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(encoded, "H".as_bytes());
}
#[test]
fn encode_03() {
let text = "Hello world!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(encoded, "He".as_bytes());
}
#[test]
fn encode_04() {
let text = "Hello world!";
let mut buf = [0u8; 64];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn encode_05() {
let text = "Hello world!こ";
let mut buf = [0u8; 12];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn decode_01() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 0];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 1];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(decoded, "H");
}
#[test]
fn decode_03() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "He");
}
#[test]
fn decode_04() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(decoded, "Hello world!");
}
#[test]
fn decode_05() {
let data = [
0x80, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8E, 0x91,
0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9E, 0x9F,
]; // "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ", all of the non-latin1 matching characters.
let mut buf = [0u8; 128];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 27);
assert_eq!(
decoded,
"€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ"
);
}
#[test]
fn encode_error_01() {
let text = "こello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_02() {
let text = "\u{0085}ello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_03() {
let text = "Hこllo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (1, 4),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_04() {
let text = "H\u{0085}llo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (1, 3),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_05() {
let text = "Heこlo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
#[test]
fn encode_error_06() {
let text = "He\u{0085}lo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{0085}',
error_range: (2, 4),
output_bytes_written: 2,
})
);
}
#[test]
fn decode_error_01() {
let data = [
0x48, 0x81, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x48, 0x8D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x48, 0x8F, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0x48, 0x90, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_05() {
let data = [
0x48, 0x9D, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
}

View File

@ -197,23 +197,25 @@ proptest! {
assert_eq!(&data[..], &latin1[..]);
}
// The iso-8859-7 tests are representative of all single-byte encodings
// (except latin1) since they're all generated and share their code.
#[test]
fn pt_windows1252_roundtrip(mut data in vec(0u8..=255, 0..1000)) {
fn pt_iso_8859_7_roundtrip(mut data in vec(0u8..=255, 0..1000)) {
let mut buf = [0u8; 32];
let mut utf8 = String::new();
let mut w1252: Vec<u8> = Vec::new();
let mut iso8859_7: Vec<u8> = Vec::new();
// Eliminate undefined bytes in input.
for b in data.iter_mut() {
if *b == 0x81 || *b == 0x8D || *b == 0x8F || *b == 0x90 || *b == 0x9D {
if *b == 0xAE || *b == 0xD2 || *b == 0xFF {
*b = 0;
}
}
// Decode from windows-1252 to utf8
// Decode from iso-8859-7 to utf8
let mut tmp = &data[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Windows1252, tmp, &mut buf) {
if let Ok((n, decoded)) = decode_to_str(Encoding::ISO8859_7, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
@ -221,17 +223,17 @@ proptest! {
}
}
// Encode to from utf8 back to w1252
// Encode to from utf8 back to iso-8859-7
let mut tmp = &utf8[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Windows1252, tmp, &mut buf) {
if let Ok((n, encoded)) = encode_from_str(Encoding::ISO8859_7, tmp, &mut buf) {
tmp = &tmp[n..];
w1252.extend_from_slice(encoded);
iso8859_7.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
assert_eq!(&data[..], &w1252[..]);
assert_eq!(&data[..], &iso8859_7[..]);
}
}