101 lines
3.3 KiB
Rust
101 lines
3.3 KiB
Rust
//! A library for incrementally encoding/decoding between utf8 and various
|
|
//! text encodings.
|
|
|
|
mod latin1;
|
|
mod utf16_be;
|
|
mod utf16_le;
|
|
mod utf8;
|
|
|
|
/// Encodes text from utf8 to a destination encoding.
|
|
pub fn encode_from_utf8<'a>(
|
|
output_encoding: Encoding,
|
|
input: &str,
|
|
output: &'a mut [u8],
|
|
) -> EncodeResult<'a> {
|
|
match output_encoding {
|
|
Encoding::Utf8 => utf8::encode_from_utf8(input, output),
|
|
Encoding::Utf16BE => utf16_be::encode_from_utf8(input, output),
|
|
Encoding::Utf16LE => utf16_le::encode_from_utf8(input, output),
|
|
Encoding::Latin1 => latin1::encode_from_utf8(input, output),
|
|
_ => unimplemented!(),
|
|
}
|
|
}
|
|
|
|
/// Decodes text from a source encoding to utf8.
|
|
pub fn decode_to_utf8<'a>(
|
|
input_encoding: Encoding,
|
|
input: &[u8],
|
|
output: &'a mut [u8],
|
|
) -> DecodeResult<'a> {
|
|
match input_encoding {
|
|
Encoding::Utf8 => utf8::decode_to_utf8(input, output),
|
|
Encoding::Utf16BE => utf16_be::decode_to_utf8(input, output),
|
|
Encoding::Utf16LE => utf16_le::decode_to_utf8(input, output),
|
|
Encoding::Latin1 => latin1::decode_to_utf8(input, output),
|
|
_ => unimplemented!(),
|
|
}
|
|
}
|
|
|
|
/// Describes a text encoding.
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub enum Encoding {
|
|
Utf8,
|
|
Utf16BE, // Big endian
|
|
Utf16LE, // Little endian
|
|
Utf32BE, // Big endian
|
|
Utf32LE, // Little endian
|
|
ShiftJIS,
|
|
Big5,
|
|
Latin1, // ISO/IEC 8859-1
|
|
Windows1252, // Windows code page 1252
|
|
}
|
|
|
|
/// Result type for encoding text from utf8 to a target encoding.
|
|
///
|
|
/// The Ok() variant provides the number of bytes consumed and a reference
|
|
/// to the valid encoded text data.
|
|
pub type EncodeResult<'a> = Result<(usize, &'a [u8]), EncodeError>;
|
|
|
|
/// Result type for decoding text from a target encoding to utf8.
|
|
///
|
|
/// The Ok() variant provides the number of bytes consumed and a reference
|
|
/// to the valid decoded text.
|
|
pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
|
|
|
|
/// Represents an error when encoding from utf8 to some other format.
|
|
///
|
|
/// Since valid input utf8 is statically assumed, the only possible
|
|
/// error is encountering a char that is not representable in the target
|
|
/// encoding.
|
|
///
|
|
/// The problematic character, the byte index range of that character in the
|
|
/// input utf8, and the number of bytes already written to the output buffer
|
|
/// are provided.
|
|
///
|
|
/// It is guaranteed that all input leading up to the problem character has
|
|
/// already been encoded and written to the output buffer.
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub struct EncodeError {
|
|
pub character: char,
|
|
pub error_range: (usize, usize),
|
|
pub output_bytes_written: usize,
|
|
}
|
|
|
|
/// Represents an error when decoding to utf8 from some other format.
|
|
///
|
|
/// All supported text encodings can be fully represented in utf8, and
|
|
/// therefore the only possible error is that we encounter bytes in the
|
|
/// input data that are invalid for the text encoding we're attempting
|
|
/// to decode from.
|
|
///
|
|
/// The byte index range of the invalid input data and the number of bytes
|
|
/// already encoded and written to the output buffer are provided.
|
|
///
|
|
/// It is guaranteed that all input leading up to the invalid data has
|
|
/// already been encoded and written to the output buffer.
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub struct DecodeError {
|
|
pub error_range: (usize, usize),
|
|
pub output_bytes_written: usize,
|
|
}
|