Reorg text_encoding sub-crate a bit and make it no_std.
This commit is contained in:
parent
3a17ca9e8c
commit
fc07ee3444
|
@ -5,7 +5,7 @@
|
||||||
//! decoding cannot fail. However, encoding will fail with scalar values
|
//! decoding cannot fail. However, encoding will fail with scalar values
|
||||||
//! greater than 255.
|
//! greater than 255.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
use {DecodeResult, EncodeError, EncodeResult};
|
use {DecodeResult, EncodeError, EncodeResult};
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
|
@ -66,6 +66,6 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
#![no_std]
|
||||||
|
|
||||||
//! A library for incrementally encoding/decoding between utf8 and various
|
//! A library for incrementally encoding/decoding between utf8 and various
|
||||||
//! text encodings.
|
//! text encodings.
|
||||||
|
|
||||||
|
@ -7,6 +9,7 @@ mod utf16_le;
|
||||||
mod utf32_be;
|
mod utf32_be;
|
||||||
mod utf32_le;
|
mod utf32_le;
|
||||||
mod utf8;
|
mod utf8;
|
||||||
|
mod utils;
|
||||||
mod windows1252;
|
mod windows1252;
|
||||||
|
|
||||||
/// Encodes text from utf8 to a destination encoding.
|
/// Encodes text from utf8 to a destination encoding.
|
||||||
|
|
|
@ -4,37 +4,10 @@
|
||||||
//! only possible error is when invalid utf16 is encountered when decoding
|
//! only possible error is when invalid utf16 is encountered when decoding
|
||||||
//! to utf8.
|
//! to utf8.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
|
use utils::{from_big_endian_u16, to_big_endian_u16};
|
||||||
use {DecodeError, DecodeResult, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeResult};
|
||||||
|
|
||||||
fn to_big_endian(n: u16) -> [u8; 2] {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe { [*ptr.offset(1), *ptr] }
|
|
||||||
} else {
|
|
||||||
unsafe { [*ptr, *ptr.offset(1)] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_big_endian(n: [u8; 2]) -> u16 {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let mut x: u16 = 0;
|
|
||||||
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[1];
|
|
||||||
*ptr.offset(1) = n[0];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[0];
|
|
||||||
*ptr.offset(1) = n[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
// Do the encode.
|
// Do the encode.
|
||||||
let mut input_i = 0;
|
let mut input_i = 0;
|
||||||
|
@ -44,7 +17,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
||||||
if code <= 0xFFFF {
|
if code <= 0xFFFF {
|
||||||
// One code unit
|
// One code unit
|
||||||
if (output_i + 1) < output.len() {
|
if (output_i + 1) < output.len() {
|
||||||
let val = to_big_endian(code as u16);
|
let val = to_big_endian_u16(code as u16);
|
||||||
output[output_i] = val[0];
|
output[output_i] = val[0];
|
||||||
output[output_i + 1] = val[1];
|
output[output_i + 1] = val[1];
|
||||||
output_i += 2;
|
output_i += 2;
|
||||||
|
@ -55,8 +28,8 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
||||||
} else if (output_i + 3) < output.len() {
|
} else if (output_i + 3) < output.len() {
|
||||||
// Two code units
|
// Two code units
|
||||||
code -= 0x10000;
|
code -= 0x10000;
|
||||||
let first = to_big_endian(0xD800 | ((code >> 10) as u16));
|
let first = to_big_endian_u16(0xD800 | ((code >> 10) as u16));
|
||||||
let second = to_big_endian(0xDC00 | ((code as u16) & 0x3FF));
|
let second = to_big_endian_u16(0xDC00 | ((code as u16) & 0x3FF));
|
||||||
output[output_i] = first[0];
|
output[output_i] = first[0];
|
||||||
output[output_i + 1] = first[1];
|
output[output_i + 1] = first[1];
|
||||||
output[output_i + 2] = second[0];
|
output[output_i + 2] = second[0];
|
||||||
|
@ -94,10 +67,10 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
|
|
||||||
// Decode to scalar value.
|
// Decode to scalar value.
|
||||||
let code = {
|
let code = {
|
||||||
let code_1 = from_big_endian([bytes[0], bytes[1]]);
|
let code_1 = from_big_endian_u16([bytes[0], bytes[1]]);
|
||||||
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
||||||
// Single code unit.
|
// Single code unit.
|
||||||
unsafe { std::char::from_u32_unchecked(code_1 as u32) }
|
unsafe { core::char::from_u32_unchecked(code_1 as u32) }
|
||||||
} else if (code_1 & 0xFC00) == 0xDC00 {
|
} else if (code_1 & 0xFC00) == 0xDC00 {
|
||||||
// Error: orphaned second half of a surrogate pair.
|
// Error: orphaned second half of a surrogate pair.
|
||||||
return Err(DecodeError {
|
return Err(DecodeError {
|
||||||
|
@ -112,7 +85,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let bytes_2 = itr.next().unwrap();
|
let bytes_2 = itr.next().unwrap();
|
||||||
let code_2 = from_big_endian([bytes_2[0], bytes_2[1]]);
|
let code_2 = from_big_endian_u16([bytes_2[0], bytes_2[1]]);
|
||||||
if (code_2 & 0xFC00) != 0xDC00 {
|
if (code_2 & 0xFC00) != 0xDC00 {
|
||||||
// Error: second half is not valid surrogate.
|
// Error: second half is not valid surrogate.
|
||||||
return Err(DecodeError {
|
return Err(DecodeError {
|
||||||
|
@ -122,7 +95,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
std::char::from_u32_unchecked(
|
core::char::from_u32_unchecked(
|
||||||
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -143,6 +116,6 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,37 +4,10 @@
|
||||||
//! only possible error is when invalid utf16 is encountered when decoding
|
//! only possible error is when invalid utf16 is encountered when decoding
|
||||||
//! to utf8.
|
//! to utf8.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
|
use utils::{from_little_endian_u16, to_little_endian_u16};
|
||||||
use {DecodeError, DecodeResult, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeResult};
|
||||||
|
|
||||||
fn to_little_endian(n: u16) -> [u8; 2] {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe { [*ptr, *ptr.offset(1)] }
|
|
||||||
} else {
|
|
||||||
unsafe { [*ptr.offset(1), *ptr] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_little_endian(n: [u8; 2]) -> u16 {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let mut x: u16 = 0;
|
|
||||||
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[0];
|
|
||||||
*ptr.offset(1) = n[1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[1];
|
|
||||||
*ptr.offset(1) = n[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
// Do the encode.
|
// Do the encode.
|
||||||
let mut input_i = 0;
|
let mut input_i = 0;
|
||||||
|
@ -44,7 +17,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
||||||
if code <= 0xFFFF {
|
if code <= 0xFFFF {
|
||||||
// One code unit
|
// One code unit
|
||||||
if (output_i + 1) < output.len() {
|
if (output_i + 1) < output.len() {
|
||||||
let val = to_little_endian(code as u16);
|
let val = to_little_endian_u16(code as u16);
|
||||||
output[output_i] = val[0];
|
output[output_i] = val[0];
|
||||||
output[output_i + 1] = val[1];
|
output[output_i + 1] = val[1];
|
||||||
output_i += 2;
|
output_i += 2;
|
||||||
|
@ -55,8 +28,8 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
||||||
} else if (output_i + 3) < output.len() {
|
} else if (output_i + 3) < output.len() {
|
||||||
// Two code units
|
// Two code units
|
||||||
code -= 0x10000;
|
code -= 0x10000;
|
||||||
let first = to_little_endian(0xD800 | ((code >> 10) as u16));
|
let first = to_little_endian_u16(0xD800 | ((code >> 10) as u16));
|
||||||
let second = to_little_endian(0xDC00 | ((code as u16) & 0x3FF));
|
let second = to_little_endian_u16(0xDC00 | ((code as u16) & 0x3FF));
|
||||||
output[output_i] = first[0];
|
output[output_i] = first[0];
|
||||||
output[output_i + 1] = first[1];
|
output[output_i + 1] = first[1];
|
||||||
output[output_i + 2] = second[0];
|
output[output_i + 2] = second[0];
|
||||||
|
@ -94,10 +67,10 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
|
|
||||||
// Decode to scalar value.
|
// Decode to scalar value.
|
||||||
let code = {
|
let code = {
|
||||||
let code_1 = from_little_endian([bytes[0], bytes[1]]);
|
let code_1 = from_little_endian_u16([bytes[0], bytes[1]]);
|
||||||
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
if code_1 < 0xD800 || code_1 > 0xDFFF {
|
||||||
// Single code unit.
|
// Single code unit.
|
||||||
unsafe { std::char::from_u32_unchecked(code_1 as u32) }
|
unsafe { core::char::from_u32_unchecked(code_1 as u32) }
|
||||||
} else if (code_1 & 0xFC00) == 0xDC00 {
|
} else if (code_1 & 0xFC00) == 0xDC00 {
|
||||||
// Error: orphaned second half of a surrogate pair.
|
// Error: orphaned second half of a surrogate pair.
|
||||||
return Err(DecodeError {
|
return Err(DecodeError {
|
||||||
|
@ -112,7 +85,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let bytes_2 = itr.next().unwrap();
|
let bytes_2 = itr.next().unwrap();
|
||||||
let code_2 = from_little_endian([bytes_2[0], bytes_2[1]]);
|
let code_2 = from_little_endian_u16([bytes_2[0], bytes_2[1]]);
|
||||||
if (code_2 & 0xFC00) != 0xDC00 {
|
if (code_2 & 0xFC00) != 0xDC00 {
|
||||||
// Error: second half is not valid surrogate.
|
// Error: second half is not valid surrogate.
|
||||||
return Err(DecodeError {
|
return Err(DecodeError {
|
||||||
|
@ -122,7 +95,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
std::char::from_u32_unchecked(
|
core::char::from_u32_unchecked(
|
||||||
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -143,6 +116,6 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,48 +4,17 @@
|
||||||
//! only possible error is when invalid utf32 is encountered when decoding
|
//! only possible error is when invalid utf32 is encountered when decoding
|
||||||
//! to utf8.
|
//! to utf8.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
|
use utils::{from_big_endian_u32, to_big_endian_u32};
|
||||||
use {DecodeError, DecodeResult, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeResult};
|
||||||
|
|
||||||
fn to_big_endian(n: u32) -> [u8; 4] {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
|
|
||||||
} else {
|
|
||||||
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_big_endian(n: [u8; 4]) -> u32 {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let mut x: u32 = 0;
|
|
||||||
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[3];
|
|
||||||
*ptr.offset(1) = n[2];
|
|
||||||
*ptr.offset(2) = n[1];
|
|
||||||
*ptr.offset(3) = n[0];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[0];
|
|
||||||
*ptr.offset(1) = n[1];
|
|
||||||
*ptr.offset(2) = n[2];
|
|
||||||
*ptr.offset(3) = n[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
// Do the encode.
|
// Do the encode.
|
||||||
let mut input_i = 0;
|
let mut input_i = 0;
|
||||||
let mut output_i = 0;
|
let mut output_i = 0;
|
||||||
for (offset, c) in input.char_indices() {
|
for (offset, c) in input.char_indices() {
|
||||||
if (output_i + 3) < output.len() {
|
if (output_i + 3) < output.len() {
|
||||||
let mut code = to_big_endian(c as u32);
|
let mut code = to_big_endian_u32(c as u32);
|
||||||
output[output_i] = code[0];
|
output[output_i] = code[0];
|
||||||
output[output_i + 1] = code[1];
|
output[output_i + 1] = code[1];
|
||||||
output[output_i + 2] = code[2];
|
output[output_i + 2] = code[2];
|
||||||
|
@ -82,9 +51,9 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do the decode.
|
// Do the decode.
|
||||||
if let Some(code) =
|
if let Some(code) = core::char::from_u32(from_big_endian_u32([
|
||||||
std::char::from_u32(from_big_endian([bytes[0], bytes[1], bytes[2], bytes[3]]))
|
bytes[0], bytes[1], bytes[2], bytes[3],
|
||||||
{
|
])) {
|
||||||
// Encode to utf8.
|
// Encode to utf8.
|
||||||
let mut buf = [0u8; 4];
|
let mut buf = [0u8; 4];
|
||||||
let s = code.encode_utf8(&mut buf);
|
let s = code.encode_utf8(&mut buf);
|
||||||
|
@ -106,6 +75,6 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,48 +4,17 @@
|
||||||
//! only possible error is when invalid utf32 is encountered when decoding
|
//! only possible error is when invalid utf32 is encountered when decoding
|
||||||
//! to utf8.
|
//! to utf8.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
|
use utils::{from_little_endian_u32, to_little_endian_u32};
|
||||||
use {DecodeError, DecodeResult, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeResult};
|
||||||
|
|
||||||
fn to_little_endian(n: u32) -> [u8; 4] {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
|
|
||||||
} else {
|
|
||||||
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_little_endian(n: [u8; 4]) -> u32 {
|
|
||||||
use std::mem::transmute;
|
|
||||||
let mut x: u32 = 0;
|
|
||||||
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
|
|
||||||
if cfg!(target_endian = "little") {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[0];
|
|
||||||
*ptr.offset(1) = n[1];
|
|
||||||
*ptr.offset(2) = n[2];
|
|
||||||
*ptr.offset(3) = n[3];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
unsafe {
|
|
||||||
*ptr = n[3];
|
|
||||||
*ptr.offset(1) = n[2];
|
|
||||||
*ptr.offset(2) = n[1];
|
|
||||||
*ptr.offset(3) = n[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
// Do the encode.
|
// Do the encode.
|
||||||
let mut input_i = 0;
|
let mut input_i = 0;
|
||||||
let mut output_i = 0;
|
let mut output_i = 0;
|
||||||
for (offset, c) in input.char_indices() {
|
for (offset, c) in input.char_indices() {
|
||||||
if (output_i + 3) < output.len() {
|
if (output_i + 3) < output.len() {
|
||||||
let mut code = to_little_endian(c as u32);
|
let mut code = to_little_endian_u32(c as u32);
|
||||||
output[output_i] = code[0];
|
output[output_i] = code[0];
|
||||||
output[output_i + 1] = code[1];
|
output[output_i + 1] = code[1];
|
||||||
output[output_i + 2] = code[2];
|
output[output_i + 2] = code[2];
|
||||||
|
@ -82,9 +51,9 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do the decode.
|
// Do the decode.
|
||||||
if let Some(code) =
|
if let Some(code) = core::char::from_u32(from_little_endian_u32([
|
||||||
std::char::from_u32(from_little_endian([bytes[0], bytes[1], bytes[2], bytes[3]]))
|
bytes[0], bytes[1], bytes[2], bytes[3],
|
||||||
{
|
])) {
|
||||||
// Encode to utf8.
|
// Encode to utf8.
|
||||||
let mut buf = [0u8; 4];
|
let mut buf = [0u8; 4];
|
||||||
let s = code.encode_utf8(&mut buf);
|
let s = code.encode_utf8(&mut buf);
|
||||||
|
@ -106,6 +75,6 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
//! is still useful for validating unknown input. And they allow a uniform
|
//! is still useful for validating unknown input. And they allow a uniform
|
||||||
//! API for all encodings.
|
//! API for all encodings.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
use {DecodeError, DecodeResult, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeResult};
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
|
@ -25,7 +25,7 @@ pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
|
||||||
let valid_up_to = match std::str::from_utf8(input) {
|
let valid_up_to = match core::str::from_utf8(input) {
|
||||||
Ok(text) => text.len(),
|
Ok(text) => text.len(),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if e.valid_up_to() > 0 {
|
if e.valid_up_to() > 0 {
|
||||||
|
@ -40,11 +40,11 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
};
|
};
|
||||||
|
|
||||||
let (in_consumed, out_slice) = encode_from_utf8(
|
let (in_consumed, out_slice) = encode_from_utf8(
|
||||||
unsafe { std::str::from_utf8_unchecked(&input[..valid_up_to]) },
|
unsafe { core::str::from_utf8_unchecked(&input[..valid_up_to]) },
|
||||||
output,
|
output,
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
Ok((in_consumed, unsafe {
|
Ok((in_consumed, unsafe {
|
||||||
std::str::from_utf8_unchecked(out_slice)
|
core::str::from_utf8_unchecked(out_slice)
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
121
sub_crates/text_encoding/src/utils.rs
Normal file
121
sub_crates/text_encoding/src/utils.rs
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
use core::mem::transmute;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn to_big_endian_u16(n: u16) -> [u8; 2] {
|
||||||
|
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe { [*ptr.offset(1), *ptr] }
|
||||||
|
} else {
|
||||||
|
unsafe { [*ptr, *ptr.offset(1)] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn from_big_endian_u16(n: [u8; 2]) -> u16 {
|
||||||
|
let mut x: u16 = 0;
|
||||||
|
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[1];
|
||||||
|
*ptr.offset(1) = n[0];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[0];
|
||||||
|
*ptr.offset(1) = n[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn to_little_endian_u16(n: u16) -> [u8; 2] {
|
||||||
|
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe { [*ptr, *ptr.offset(1)] }
|
||||||
|
} else {
|
||||||
|
unsafe { [*ptr.offset(1), *ptr] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn from_little_endian_u16(n: [u8; 2]) -> u16 {
|
||||||
|
let mut x: u16 = 0;
|
||||||
|
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[0];
|
||||||
|
*ptr.offset(1) = n[1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[1];
|
||||||
|
*ptr.offset(1) = n[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn to_big_endian_u32(n: u32) -> [u8; 4] {
|
||||||
|
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
|
||||||
|
} else {
|
||||||
|
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn from_big_endian_u32(n: [u8; 4]) -> u32 {
|
||||||
|
let mut x: u32 = 0;
|
||||||
|
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[3];
|
||||||
|
*ptr.offset(1) = n[2];
|
||||||
|
*ptr.offset(2) = n[1];
|
||||||
|
*ptr.offset(3) = n[0];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[0];
|
||||||
|
*ptr.offset(1) = n[1];
|
||||||
|
*ptr.offset(2) = n[2];
|
||||||
|
*ptr.offset(3) = n[3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn to_little_endian_u32(n: u32) -> [u8; 4] {
|
||||||
|
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
|
||||||
|
} else {
|
||||||
|
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn from_little_endian_u32(n: [u8; 4]) -> u32 {
|
||||||
|
let mut x: u32 = 0;
|
||||||
|
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
|
||||||
|
if cfg!(target_endian = "little") {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[0];
|
||||||
|
*ptr.offset(1) = n[1];
|
||||||
|
*ptr.offset(2) = n[2];
|
||||||
|
*ptr.offset(3) = n[3];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
*ptr = n[3];
|
||||||
|
*ptr.offset(1) = n[2];
|
||||||
|
*ptr.offset(2) = n[1];
|
||||||
|
*ptr.offset(3) = n[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
//! Encoding/decoding functions for Windows-1252.
|
//! Encoding/decoding functions for Windows-1252.
|
||||||
|
|
||||||
use std;
|
use core;
|
||||||
use {DecodeError, DecodeResult, EncodeError, EncodeResult};
|
use {DecodeError, DecodeResult, EncodeError, EncodeResult};
|
||||||
|
|
||||||
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
|
||||||
|
@ -81,7 +81,7 @@ pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((input_i, unsafe {
|
Ok((input_i, unsafe {
|
||||||
std::str::from_utf8_unchecked(&output[..output_i])
|
core::str::from_utf8_unchecked(&output[..output_i])
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user