WIP creating a clean frontend/backend separation.
- Started work on writing a new backend. - Started work on writing text encoding handling.
This commit is contained in:
parent
b713b72e72
commit
0ee183aa72
14
Cargo.lock
generated
14
Cargo.lock
generated
|
@ -2,12 +2,14 @@
|
|||
name = "Led"
|
||||
version = "0.0.2"
|
||||
dependencies = [
|
||||
"backend 0.1.0",
|
||||
"docopt 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ropey 0.8.4 (git+https://github.com/cessen/ropey)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"text_encoding 0.1.0",
|
||||
"unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
@ -20,6 +22,14 @@ dependencies = [
|
|||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backend"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"ropey 0.8.4 (git+https://github.com/cessen/ropey)",
|
||||
"unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docopt"
|
||||
version = "0.8.3"
|
||||
|
@ -155,6 +165,10 @@ dependencies = [
|
|||
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "text_encoding"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.5"
|
||||
|
|
15
Cargo.toml
15
Cargo.toml
|
@ -1,3 +1,9 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"sub_crates/backend",
|
||||
"sub_crates/text_encoding",
|
||||
]
|
||||
|
||||
[package]
|
||||
name = "Led"
|
||||
version = "0.0.2"
|
||||
|
@ -17,4 +23,11 @@ serde = "1.*"
|
|||
serde_derive = "1.*"
|
||||
docopt = "0.8"
|
||||
smallvec = "0.6"
|
||||
termion = "1.5"
|
||||
termion = "1.5"
|
||||
|
||||
# Local crate dependencies
|
||||
[dependencies.backend]
|
||||
path = "sub_crates/backend"
|
||||
|
||||
[dependencies.text_encoding]
|
||||
path = "sub_crates/text_encoding"
|
14
sub_crates/backend/Cargo.toml
Normal file
14
sub_crates/backend/Cargo.toml
Normal file
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
name = "backend"
|
||||
version = "0.1.0"
|
||||
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
|
||||
license = "MIT"
|
||||
|
||||
[lib]
|
||||
name = "backend"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
# ropey = "0.8"
|
||||
ropey = { git = "https://github.com/cessen/ropey", branch = "master" }
|
||||
unicode-segmentation = "1.2.1"
|
9
sub_crates/backend/src/buffer.rs
Normal file
9
sub_crates/backend/src/buffer.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
use ropey::Rope;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Buffer {
|
||||
// on_disk_encoding: Encoding,
|
||||
content_type: String,
|
||||
is_dirty: bool,
|
||||
text: Rope, // The actual text content.
|
||||
}
|
4
sub_crates/backend/src/lib.rs
Normal file
4
sub_crates/backend/src/lib.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
extern crate ropey;
|
||||
extern crate unicode_segmentation;
|
||||
|
||||
pub mod buffer;
|
9
sub_crates/text_encoding/Cargo.toml
Normal file
9
sub_crates/text_encoding/Cargo.toml
Normal file
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
name = "text_encoding"
|
||||
version = "0.1.0"
|
||||
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
|
||||
license = "MIT"
|
||||
|
||||
[lib]
|
||||
name = "text_encoding"
|
||||
path = "src/lib.rs"
|
71
sub_crates/text_encoding/src/latin1.rs
Normal file
71
sub_crates/text_encoding/src/latin1.rs
Normal file
|
@ -0,0 +1,71 @@
|
|||
//! Encoding/decoding functions for ISO/IEC 8859-1 (or "latin1"), which
|
||||
//! conveniently happens to map 1-to-1 to the first 256 unicode scalar values.
|
||||
//!
|
||||
//! Because latin1 is a single-byte encoding where all bytes are valid,
|
||||
//! decoding cannot fail. However, encoding will fail with scalar values
|
||||
//! greater than 255.
|
||||
|
||||
use std;
|
||||
use {DecodeResult, EncodeError, EncodeResult};
|
||||
|
||||
pub fn encode_from_utf8(input: &str, output: &mut [u8]) -> EncodeResult {
|
||||
// Do the encode.
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
for (offset, c) in input.char_indices() {
|
||||
if output_i >= output.len() {
|
||||
break;
|
||||
}
|
||||
if c as u32 > 255 {
|
||||
return Err(EncodeError {
|
||||
character: c,
|
||||
byte_offset: offset,
|
||||
bytes_written: output_i,
|
||||
});
|
||||
}
|
||||
output[output_i] = c as u8;
|
||||
output_i += 1;
|
||||
input_i = offset;
|
||||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
while !input.is_char_boundary(input_i) {
|
||||
input_i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((input_i, output_i))
|
||||
}
|
||||
|
||||
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
for &byte in input.iter() {
|
||||
if byte <= 127 {
|
||||
// 1-byte case
|
||||
if output_i >= output.len() {
|
||||
break;
|
||||
}
|
||||
output[output_i] = byte;
|
||||
input_i += 1;
|
||||
output_i += 1;
|
||||
} else {
|
||||
// 2-byte case
|
||||
if (output_i + 1) >= output.len() {
|
||||
break;
|
||||
}
|
||||
output[output_i] = 0b11000000 | (byte >> 6);
|
||||
output[output_i + 1] = 0b10000000 | (byte & 0b00111111);
|
||||
input_i += 1;
|
||||
output_i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((input_i, unsafe {
|
||||
std::str::from_utf8_unchecked(&output[..output_i])
|
||||
}))
|
||||
}
|
97
sub_crates/text_encoding/src/lib.rs
Normal file
97
sub_crates/text_encoding/src/lib.rs
Normal file
|
@ -0,0 +1,97 @@
|
|||
//! A library for incrementally encoding/decoding between utf8 and various
|
||||
//! text encodings.
|
||||
|
||||
mod latin1;
|
||||
mod utf16_be;
|
||||
mod utf16_le;
|
||||
mod utf8;
|
||||
|
||||
/// Encodes text from utf8 to a destination encoding.
|
||||
pub fn encode_from_utf8(output_encoding: Encoding, input: &str, output: &mut [u8]) -> EncodeResult {
|
||||
match output_encoding {
|
||||
Encoding::Utf8 => utf8::encode_from_utf8(input, output),
|
||||
Encoding::Utf16BE => utf16_be::encode_from_utf8(input, output),
|
||||
Encoding::Utf16LE => utf16_le::encode_from_utf8(input, output),
|
||||
Encoding::Latin1 => latin1::encode_from_utf8(input, output),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Decodes text from a source encoding to utf8.
|
||||
pub fn decode_to_utf8<'a>(
|
||||
input_encoding: Encoding,
|
||||
input: &[u8],
|
||||
output: &'a mut [u8],
|
||||
) -> DecodeResult<'a> {
|
||||
match input_encoding {
|
||||
Encoding::Utf8 => utf8::decode_to_utf8(input, output),
|
||||
Encoding::Utf16BE => utf16_be::decode_to_utf8(input, output),
|
||||
Encoding::Utf16LE => utf16_le::decode_to_utf8(input, output),
|
||||
Encoding::Latin1 => latin1::decode_to_utf8(input, output),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes a text encoding.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum Encoding {
|
||||
Utf8,
|
||||
Utf16BE, // Big endian
|
||||
Utf16LE, // Little endian
|
||||
Utf32BE, // Big endian
|
||||
Utf32LE, // Little endian
|
||||
ShiftJIS,
|
||||
Big5,
|
||||
Latin1, // ISO/IEC 8859-1
|
||||
Windows1252, // Windows code page 1252
|
||||
}
|
||||
|
||||
/// Result type for encoding text from utf8 to a target encoding.
|
||||
///
|
||||
/// The Ok() variant provides the number of bytes consumed and the
|
||||
/// number of bytes written, in that order.
|
||||
pub type EncodeResult = Result<(usize, usize), EncodeError>;
|
||||
|
||||
/// Result type for decoding text from a target encoding to utf8.
|
||||
///
|
||||
/// The Ok() variant provides the number of bytes consumed and a reference
|
||||
/// to the valid decoded text.
|
||||
pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
|
||||
|
||||
/// Represents an error when encoding from utf8 to some other format.
|
||||
///
|
||||
/// Since valid input utf8 is statically assumed, the only possible
|
||||
/// error is encountering a char that is not representable in the target
|
||||
/// encoding.
|
||||
///
|
||||
/// The problematic character, the byte offset of that character
|
||||
/// in the input utf8, and the number of bytes already written to the output
|
||||
/// buffer is provided.
|
||||
///
|
||||
/// It is guaranteed that all input leading up to the problem character has
|
||||
/// already been encoded and written to the output buffer.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct EncodeError {
|
||||
pub character: char,
|
||||
pub byte_offset: usize,
|
||||
pub bytes_written: usize,
|
||||
}
|
||||
|
||||
/// Represents an error when decoding to utf8 from some other format.
|
||||
///
|
||||
/// All supported text encodings can be fully represented in utf8, and
|
||||
/// therefore the only possible error is that we encounter bytes in the
|
||||
/// input data that are invalid for the text encoding we're attempting
|
||||
/// to decode from.
|
||||
///
|
||||
/// The byte offset of the invalid input data and in the number of bytes
|
||||
/// already written to the output buffer are.
|
||||
/// already been encoded and written to the output buffer.
|
||||
///
|
||||
/// It is guaranteed that all input leading up to the invalid data has
|
||||
/// already been encoded and written to the output buffer.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct DecodeError {
|
||||
pub byte_offset: usize,
|
||||
pub bytes_written: usize,
|
||||
}
|
148
sub_crates/text_encoding/src/utf16_be.rs
Normal file
148
sub_crates/text_encoding/src/utf16_be.rs
Normal file
|
@ -0,0 +1,148 @@
|
|||
//! Encoding/decoding functions for big-endian UTF-16.
|
||||
//!
|
||||
//! Because both utf8 and utf16 can represent the entirety of unicode, the
|
||||
//! only possible error is when invalid utf16 is encountered when decoding
|
||||
//! to utf8.
|
||||
|
||||
use std;
|
||||
use {DecodeError, DecodeResult, EncodeResult};
|
||||
|
||||
fn to_big_endian(n: u16) -> [u8; 2] {
|
||||
use std::mem::transmute;
|
||||
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
||||
if cfg!(target_endian = "little") {
|
||||
unsafe { [*ptr.offset(1), *ptr] }
|
||||
} else {
|
||||
unsafe { [*ptr, *ptr.offset(1)] }
|
||||
}
|
||||
}
|
||||
|
||||
fn from_big_endian(n: [u8; 2]) -> u16 {
|
||||
use std::mem::transmute;
|
||||
let mut x: u16 = 0;
|
||||
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
||||
if cfg!(target_endian = "little") {
|
||||
unsafe {
|
||||
*ptr = n[1];
|
||||
*ptr.offset(1) = n[0];
|
||||
}
|
||||
} else {
|
||||
unsafe {
|
||||
*ptr = n[0];
|
||||
*ptr.offset(1) = n[1];
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
pub fn encode_from_utf8(input: &str, output: &mut [u8]) -> EncodeResult {
|
||||
// Do the encode.
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
for (offset, c) in input.char_indices() {
|
||||
let mut code = c as u32;
|
||||
if code <= 0xFFFF {
|
||||
// One code unit
|
||||
if (output_i + 1) < output.len() {
|
||||
let val = to_big_endian(code as u16);
|
||||
output[output_i] = val[0];
|
||||
output[output_i + 1] = val[1];
|
||||
output_i += 2;
|
||||
input_i = offset;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (output_i + 3) < output.len() {
|
||||
// Two code units
|
||||
code -= 0x10000;
|
||||
let first = to_big_endian(0xD800 | ((code >> 10) as u16));
|
||||
let second = to_big_endian(0xDC00 | ((code as u16) & 0x3FF));
|
||||
output[output_i] = first[0];
|
||||
output[output_i + 1] = first[1];
|
||||
output[output_i + 2] = second[0];
|
||||
output[output_i + 3] = second[1];
|
||||
output_i += 4;
|
||||
input_i = offset;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
while !input.is_char_boundary(input_i) {
|
||||
input_i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((input_i, output_i))
|
||||
}
|
||||
|
||||
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
|
||||
// Loop through the input, getting 2 bytes at a time.
|
||||
let mut itr = input.chunks(2);
|
||||
while let Some(bytes) = itr.next() {
|
||||
if bytes.len() < 2 {
|
||||
break;
|
||||
}
|
||||
|
||||
// Decode to scalar value.
|
||||
let code = {
|
||||
let code_1 = from_big_endian([bytes[0], bytes[1]]);
|
||||
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
||||
// Single code unit.
|
||||
unsafe { std::char::from_u32_unchecked(code_1 as u32) }
|
||||
} else if (code_1 & 0xFC00) == 0xDC00 {
|
||||
// Error: orphaned second half of a surrogate pair.
|
||||
return Err(DecodeError {
|
||||
byte_offset: input_i,
|
||||
bytes_written: output_i,
|
||||
});
|
||||
} else {
|
||||
// Two code units.
|
||||
|
||||
// Get the second code unit, if possible.
|
||||
if !(input_i + 3) < input.len() {
|
||||
break;
|
||||
}
|
||||
let bytes_2 = itr.next().unwrap();
|
||||
let code_2 = from_big_endian([bytes_2[0], bytes_2[1]]);
|
||||
if !(code_2 & 0xFC00) == 0xDC00 {
|
||||
// Error: second half is not valid surrogate.
|
||||
return Err(DecodeError {
|
||||
byte_offset: input_i,
|
||||
bytes_written: output_i,
|
||||
});
|
||||
}
|
||||
|
||||
unsafe {
|
||||
std::char::from_u32_unchecked(
|
||||
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Encode to utf8.
|
||||
let mut buf = [0u8; 4];
|
||||
let s = code.encode_utf8(&mut buf);
|
||||
if (output_i + s.len()) > output.len() {
|
||||
break;
|
||||
}
|
||||
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
|
||||
|
||||
// Update our counters.
|
||||
input_i += code.len_utf16() * 2;
|
||||
output_i += s.len();
|
||||
}
|
||||
|
||||
Ok((input_i, unsafe {
|
||||
std::str::from_utf8_unchecked(&output[..output_i])
|
||||
}))
|
||||
}
|
148
sub_crates/text_encoding/src/utf16_le.rs
Normal file
148
sub_crates/text_encoding/src/utf16_le.rs
Normal file
|
@ -0,0 +1,148 @@
|
|||
//! Encoding/decoding functions for little-endian UTF-16.
|
||||
//!
|
||||
//! Because both utf8 and utf16 can represent the entirety of unicode, the
|
||||
//! only possible error is when invalid utf16 is encountered when decoding
|
||||
//! to utf8.
|
||||
|
||||
use std;
|
||||
use {DecodeError, DecodeResult, EncodeResult};
|
||||
|
||||
fn to_little_endian(n: u16) -> [u8; 2] {
|
||||
use std::mem::transmute;
|
||||
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
||||
if cfg!(target_endian = "little") {
|
||||
unsafe { [*ptr, *ptr.offset(1)] }
|
||||
} else {
|
||||
unsafe { [*ptr.offset(1), *ptr] }
|
||||
}
|
||||
}
|
||||
|
||||
fn from_little_endian(n: [u8; 2]) -> u16 {
|
||||
use std::mem::transmute;
|
||||
let mut x: u16 = 0;
|
||||
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
||||
if cfg!(target_endian = "little") {
|
||||
unsafe {
|
||||
*ptr = n[0];
|
||||
*ptr.offset(1) = n[1];
|
||||
}
|
||||
} else {
|
||||
unsafe {
|
||||
*ptr = n[1];
|
||||
*ptr.offset(1) = n[0];
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
pub fn encode_from_utf8(input: &str, output: &mut [u8]) -> EncodeResult {
|
||||
// Do the encode.
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
for (offset, c) in input.char_indices() {
|
||||
let mut code = c as u32;
|
||||
if code <= 0xFFFF {
|
||||
// One code unit
|
||||
if (output_i + 1) < output.len() {
|
||||
let val = to_little_endian(code as u16);
|
||||
output[output_i] = val[0];
|
||||
output[output_i + 1] = val[1];
|
||||
output_i += 2;
|
||||
input_i = offset;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (output_i + 3) < output.len() {
|
||||
// Two code units
|
||||
code -= 0x10000;
|
||||
let first = to_little_endian(0xD800 | ((code >> 10) as u16));
|
||||
let second = to_little_endian(0xDC00 | ((code as u16) & 0x3FF));
|
||||
output[output_i] = first[0];
|
||||
output[output_i + 1] = first[1];
|
||||
output[output_i + 2] = second[0];
|
||||
output[output_i + 3] = second[1];
|
||||
output_i += 4;
|
||||
input_i = offset;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate how much of the input was consumed.
|
||||
input_i += 1;
|
||||
if input_i > input.len() {
|
||||
input_i = input.len();
|
||||
} else {
|
||||
while !input.is_char_boundary(input_i) {
|
||||
input_i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((input_i, output_i))
|
||||
}
|
||||
|
||||
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
||||
let mut input_i = 0;
|
||||
let mut output_i = 0;
|
||||
|
||||
// Loop through the input, getting 2 bytes at a time.
|
||||
let mut itr = input.chunks(2);
|
||||
while let Some(bytes) = itr.next() {
|
||||
if bytes.len() < 2 {
|
||||
break;
|
||||
}
|
||||
|
||||
// Decode to scalar value.
|
||||
let code = {
|
||||
let code_1 = from_little_endian([bytes[0], bytes[1]]);
|
||||
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
||||
// Single code unit.
|
||||
unsafe { std::char::from_u32_unchecked(code_1 as u32) }
|
||||
} else if (code_1 & 0xFC00) == 0xDC00 {
|
||||
// Error: orphaned second half of a surrogate pair.
|
||||
return Err(DecodeError {
|
||||
byte_offset: input_i,
|
||||
bytes_written: output_i,
|
||||
});
|
||||
} else {
|
||||
// Two code units.
|
||||
|
||||
// Get the second code unit, if possible.
|
||||
if !(input_i + 3) < input.len() {
|
||||
break;
|
||||
}
|
||||
let bytes_2 = itr.next().unwrap();
|
||||
let code_2 = from_little_endian([bytes_2[0], bytes_2[1]]);
|
||||
if !(code_2 & 0xFC00) == 0xDC00 {
|
||||
// Error: second half is not valid surrogate.
|
||||
return Err(DecodeError {
|
||||
byte_offset: input_i,
|
||||
bytes_written: output_i,
|
||||
});
|
||||
}
|
||||
|
||||
unsafe {
|
||||
std::char::from_u32_unchecked(
|
||||
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Encode to utf8.
|
||||
let mut buf = [0u8; 4];
|
||||
let s = code.encode_utf8(&mut buf);
|
||||
if (output_i + s.len()) > output.len() {
|
||||
break;
|
||||
}
|
||||
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
|
||||
|
||||
// Update our counters.
|
||||
input_i += code.len_utf16() * 2;
|
||||
output_i += s.len();
|
||||
}
|
||||
|
||||
Ok((input_i, unsafe {
|
||||
std::str::from_utf8_unchecked(&output[..output_i])
|
||||
}))
|
||||
}
|
51
sub_crates/text_encoding/src/utf8.rs
Normal file
51
sub_crates/text_encoding/src/utf8.rs
Normal file
|
@ -0,0 +1,51 @@
|
|||
//! These functions are essentially redundant, since they're supposedly
|
||||
//! encoding/decoding between utf8 and... utf8. However, `decode_to_utf8()`
|
||||
//! is still useful for validating unknown input. And they allow a uniform
|
||||
//! API for all encodings.
|
||||
|
||||
use std;
|
||||
use {DecodeError, DecodeResult, EncodeResult};
|
||||
|
||||
// Encode from utf8
|
||||
pub fn encode_from_utf8(input: &str, output: &mut [u8]) -> EncodeResult {
|
||||
let copy_len = {
|
||||
if output.len() >= input.len() {
|
||||
input.len()
|
||||
} else {
|
||||
let mut i = output.len();
|
||||
while !input.is_char_boundary(i) {
|
||||
i -= 1;
|
||||
}
|
||||
i
|
||||
}
|
||||
};
|
||||
|
||||
output[..copy_len].copy_from_slice(input[..copy_len].as_bytes());
|
||||
|
||||
Ok((copy_len, copy_len))
|
||||
}
|
||||
|
||||
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
||||
let valid_up_to = match std::str::from_utf8(input) {
|
||||
Ok(text) => text.len(),
|
||||
Err(e) => {
|
||||
if e.valid_up_to() > 0 {
|
||||
e.valid_up_to()
|
||||
} else {
|
||||
return Err(DecodeError {
|
||||
byte_offset: 0,
|
||||
bytes_written: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let (in_consumed, out_written) = encode_from_utf8(
|
||||
unsafe { std::str::from_utf8_unchecked(&input[..valid_up_to]) },
|
||||
output,
|
||||
).unwrap();
|
||||
|
||||
Ok((in_consumed, unsafe {
|
||||
std::str::from_utf8_unchecked(&output[..out_written])
|
||||
}))
|
||||
}
|
Loading…
Reference in New Issue
Block a user