led/sub_crates/text_encoding/src/lib.rs

101 lines
3.3 KiB
Rust

//! A library for incrementally encoding/decoding between utf8 and various
//! text encodings.
mod latin1;
mod utf16_be;
mod utf16_le;
mod utf8;
/// Encodes text from utf8 to a destination encoding.
pub fn encode_from_utf8<'a>(
output_encoding: Encoding,
input: &str,
output: &'a mut [u8],
) -> EncodeResult<'a> {
match output_encoding {
Encoding::Utf8 => utf8::encode_from_utf8(input, output),
Encoding::Utf16BE => utf16_be::encode_from_utf8(input, output),
Encoding::Utf16LE => utf16_le::encode_from_utf8(input, output),
Encoding::Latin1 => latin1::encode_from_utf8(input, output),
_ => unimplemented!(),
}
}
/// Decodes text from a source encoding to utf8.
pub fn decode_to_utf8<'a>(
input_encoding: Encoding,
input: &[u8],
output: &'a mut [u8],
) -> DecodeResult<'a> {
match input_encoding {
Encoding::Utf8 => utf8::decode_to_utf8(input, output),
Encoding::Utf16BE => utf16_be::decode_to_utf8(input, output),
Encoding::Utf16LE => utf16_le::decode_to_utf8(input, output),
Encoding::Latin1 => latin1::decode_to_utf8(input, output),
_ => unimplemented!(),
}
}
/// Describes a text encoding.
#[derive(Debug, Copy, Clone)]
pub enum Encoding {
Utf8,
Utf16BE, // Big endian
Utf16LE, // Little endian
Utf32BE, // Big endian
Utf32LE, // Little endian
ShiftJIS,
Big5,
Latin1, // ISO/IEC 8859-1
Windows1252, // Windows code page 1252
}
/// Result type for encoding text from utf8 to a target encoding.
///
/// The Ok() variant provides the number of bytes consumed and a reference
/// to the valid encoded text data.
pub type EncodeResult<'a> = Result<(usize, &'a [u8]), EncodeError>;
/// Result type for decoding text from a target encoding to utf8.
///
/// The Ok() variant provides the number of bytes consumed and a reference
/// to the valid decoded text.
pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
/// Represents an error when encoding from utf8 to some other format.
///
/// Since valid input utf8 is statically assumed, the only possible
/// error is encountering a char that is not representable in the target
/// encoding.
///
/// The problematic character, the byte index range of that character in the
/// input utf8, and the number of bytes already written to the output buffer
/// are provided.
///
/// It is guaranteed that all input leading up to the problem character has
/// already been encoded and written to the output buffer.
#[derive(Debug, Copy, Clone)]
pub struct EncodeError {
pub character: char,
pub error_range: (usize, usize),
pub output_bytes_written: usize,
}
/// Represents an error when decoding to utf8 from some other format.
///
/// All supported text encodings can be fully represented in utf8, and
/// therefore the only possible error is that we encounter bytes in the
/// input data that are invalid for the text encoding we're attempting
/// to decode from.
///
/// The byte index range of the invalid input data and the number of bytes
/// already encoded and written to the output buffer are provided.
///
/// It is guaranteed that all input leading up to the invalid data has
/// already been encoded and written to the output buffer.
#[derive(Debug, Copy, Clone)]
pub struct DecodeError {
pub error_range: (usize, usize),
pub output_bytes_written: usize,
}