led/src/string_utils.rs

//! Misc helpful utility functions for TextBuffer related stuff.

use ropey::RopeSlice;

pub fn is_line_ending(text: &str) -> bool {
    match text.chars().nth(0) {
        Some(c) if (c >= '\u{000A}' && c <= '\u{000D}') => true,
        Some('\u{0085}') | Some('\u{2028}') | Some('\u{2029}') => true,
        _ => false,
    }
}

pub fn str_is_whitespace(text: &str) -> bool {
    if let Some(c) = text.chars().nth(0) {
        is_whitespace(c)
    } else {
        false
    }
}

pub fn is_whitespace(c: char) -> bool {
    // TODO: this is a naive categorization of whitespace characters.
    // For better categorization these should be split up into groups
    // based on e.g. breaking vs non-breaking spaces, among other things.
    match c {
        //'\u{1680}' | // OGHAM SPACE MARK (here for completeness, but usually displayed as a dash, not as whitespace)
        '\u{0009}' | // CHARACTER TABULATION
        '\u{0020}' | // SPACE
        '\u{00A0}' | // NO-BREAK SPACE
        '\u{180E}' | // MONGOLIAN VOWEL SEPARATOR
        '\u{202F}' | // NARROW NO-BREAK SPACE
        '\u{205F}' | // MEDIUM MATHEMATICAL SPACE
        '\u{3000}' | // IDEOGRAPHIC SPACE
        '\u{FEFF}' // ZERO WIDTH NO-BREAK SPACE
        => true,

        // EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE,
        // FOUR-PER-EM SPACE, SIX-PER-EM SPACE, FIGURE SPACE,
        // PUNCTUATION SPACE, THIN SPACE, HAIR SPACE, ZERO WIDTH SPACE.
        c if c >= '\u{2000}' && c <= '\u{200B}' => true,

        // Not a matching whitespace character.
        _ => false,
    }
}

/// Represents one of the valid Unicode line endings.
/// Also acts as an index into `LINE_ENDINGS`.
#[derive(PartialEq, Copy, Clone)]
pub enum LineEnding {
    None = 0, // No line ending
    CRLF = 1, // CarriageReturn followed by LineFeed
    LF = 2,   // U+000A -- LineFeed
    VT = 3,   // U+000B -- VerticalTab
    FF = 4,   // U+000C -- FormFeed
    CR = 5,   // U+000D -- CarriageReturn
    NEL = 6,  // U+0085 -- NextLine
    LS = 7,   // U+2028 -- Line Separator
    PS = 8,   // U+2029 -- ParagraphSeparator
}

pub fn str_to_line_ending(g: &str) -> LineEnding {
    match g {
        "\u{000D}\u{000A}" => LineEnding::CRLF,
        "\u{000A}" => LineEnding::LF,
        "\u{000B}" => LineEnding::VT,
        "\u{000C}" => LineEnding::FF,
        "\u{000D}" => LineEnding::CR,
        "\u{0085}" => LineEnding::NEL,
        "\u{2028}" => LineEnding::LS,
        "\u{2029}" => LineEnding::PS,

        // Not a line ending
        _ => LineEnding::None,
    }
}

pub fn rope_slice_to_line_ending(g: RopeSlice) -> LineEnding {
    if let Some(text) = g.as_str() {
        str_to_line_ending(text)
    } else if g == "\u{000D}\u{000A}" {
        LineEnding::CRLF
    } else {
        // Not a line ending
        LineEnding::None
    }
}

pub fn line_ending_to_str(ending: LineEnding) -> &'static str {
    LINE_ENDINGS[ending as usize]
}

/// An array of string literals corresponding to the possible
/// unicode line endings.
pub const LINE_ENDINGS: [&'static str; 9] = [
    "",
    "\u{000D}\u{000A}",
    "\u{000A}",
    "\u{000B}",
    "\u{000C}",
    "\u{000D}",
    "\u{0085}",
    "\u{2028}",
    "\u{2029}",
];