led/sub_crates/text_encoding/src/utf16_le.rs

//! Encoding/decoding functions for little-endian UTF-16.
//!
//! Because both utf8 and utf16 can represent the entirety of unicode, the
//! only possible error is when invalid utf16 is encountered when decoding
//! to utf8.

use core;
use utils::{from_little_endian_u16, to_little_endian_u16};
use {DecodeError, DecodeResult, EncodeResult};

pub fn encode_from_utf8<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
    // Do the encode.
    let mut input_i = 0;
    let mut output_i = 0;
    for (offset, c) in input.char_indices() {
        let mut code = c as u32;
        if code <= 0xFFFF {
            // One code unit
            if (output_i + 1) < output.len() {
                let val = to_little_endian_u16(code as u16);
                output[output_i] = val[0];
                output[output_i + 1] = val[1];
                output_i += 2;
                input_i = offset;
            } else {
                break;
            }
        } else if (output_i + 3) < output.len() {
            // Two code units
            code -= 0x10000;
            let first = to_little_endian_u16(0xD800 | ((code >> 10) as u16));
            let second = to_little_endian_u16(0xDC00 | ((code as u16) & 0x3FF));
            output[output_i] = first[0];
            output[output_i + 1] = first[1];
            output[output_i + 2] = second[0];
            output[output_i + 3] = second[1];
            output_i += 4;
            input_i = offset;
        } else {
            break;
        }
    }

    // Calculate how much of the input was consumed.
    input_i += 1;
    if input_i > input.len() {
        input_i = input.len();
    } else {
        while !input.is_char_boundary(input_i) {
            input_i += 1;
        }
    }

    Ok((input_i, &output[..output_i]))
}

pub fn decode_to_utf8<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
    let mut input_i = 0;
    let mut output_i = 0;

    // Loop through the input, getting 2 bytes at a time.
    let mut itr = input.chunks(2);
    while let Some(bytes) = itr.next() {
        if bytes.len() < 2 {
            break;
        }

        // Decode to scalar value.
        let code = {
            let code_1 = from_little_endian_u16([bytes[0], bytes[1]]);
            if code_1 < 0xD800 || code_1 > 0xDFFF {
                // Single code unit.
                unsafe { core::char::from_u32_unchecked(code_1 as u32) }
            } else if (code_1 & 0xFC00) == 0xDC00 {
                // Error: orphaned second half of a surrogate pair.
                return Err(DecodeError {
                    error_range: (input_i, input_i + 2),
                    output_bytes_written: output_i,
                });
            } else {
                // Two code units.

                // Get the second code unit, if possible.
                if !(input_i + 3) < input.len() {
                    break;
                }
                let bytes_2 = itr.next().unwrap();
                let code_2 = from_little_endian_u16([bytes_2[0], bytes_2[1]]);
                if (code_2 & 0xFC00) != 0xDC00 {
                    // Error: second half is not valid surrogate.
                    return Err(DecodeError {
                        error_range: (input_i, input_i + 2),
                        output_bytes_written: output_i,
                    });
                }

                unsafe {
                    core::char::from_u32_unchecked(
                        (((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
                    )
                }
            }
        };

        // Encode to utf8.
        let mut buf = [0u8; 4];
        let s = code.encode_utf8(&mut buf);
        if (output_i + s.len()) > output.len() {
            break;
        }
        output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());

        // Update our counters.
        input_i += code.len_utf16() * 2;
        output_i += s.len();
    }

    Ok((input_i, unsafe {
        core::str::from_utf8_unchecked(&output[..output_i])
    }))
}