//! Misc helpful utility functions for TextBuffer related stuff. use ropey::RopeSlice; pub fn is_line_ending(text: &str) -> bool { match text.chars().nth(0) { Some(c) if (c >= '\u{000A}' && c <= '\u{000D}') => true, Some('\u{0085}') | Some('\u{2028}') | Some('\u{2029}') => true, _ => false, } } pub fn str_is_whitespace(text: &str) -> bool { if let Some(c) = text.chars().nth(0) { is_whitespace(c) } else { false } } pub fn is_whitespace(c: char) -> bool { // TODO: this is a naive categorization of whitespace characters. // For better categorization these should be split up into groups // based on e.g. breaking vs non-breaking spaces, among other things. match c { //'\u{1680}' | // OGHAM SPACE MARK (here for completeness, but usually displayed as a dash, not as whitespace) '\u{0009}' | // CHARACTER TABULATION '\u{0020}' | // SPACE '\u{00A0}' | // NO-BREAK SPACE '\u{180E}' | // MONGOLIAN VOWEL SEPARATOR '\u{202F}' | // NARROW NO-BREAK SPACE '\u{205F}' | // MEDIUM MATHEMATICAL SPACE '\u{3000}' | // IDEOGRAPHIC SPACE '\u{FEFF}' // ZERO WIDTH NO-BREAK SPACE => true, // EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, // FOUR-PER-EM SPACE, SIX-PER-EM SPACE, FIGURE SPACE, // PUNCTUATION SPACE, THIN SPACE, HAIR SPACE, ZERO WIDTH SPACE. c if c >= '\u{2000}' && c <= '\u{200B}' => true, // Not a matching whitespace character. _ => false, } } /// Represents one of the valid Unicode line endings. /// Also acts as an index into `LINE_ENDINGS`. #[derive(PartialEq, Copy, Clone)] pub enum LineEnding { None = 0, // No line ending CRLF = 1, // CarriageReturn followed by LineFeed LF = 2, // U+000A -- LineFeed VT = 3, // U+000B -- VerticalTab FF = 4, // U+000C -- FormFeed CR = 5, // U+000D -- CarriageReturn NEL = 6, // U+0085 -- NextLine LS = 7, // U+2028 -- Line Separator PS = 8, // U+2029 -- ParagraphSeparator } pub fn str_to_line_ending(g: &str) -> LineEnding { match g { "\u{000D}\u{000A}" => LineEnding::CRLF, "\u{000A}" => LineEnding::LF, "\u{000B}" => LineEnding::VT, "\u{000C}" => LineEnding::FF, "\u{000D}" => LineEnding::CR, "\u{0085}" => LineEnding::NEL, "\u{2028}" => LineEnding::LS, "\u{2029}" => LineEnding::PS, // Not a line ending _ => LineEnding::None, } } pub fn rope_slice_to_line_ending(g: RopeSlice) -> LineEnding { if let Some(text) = g.as_str() { str_to_line_ending(text) } else if g == "\u{000D}\u{000A}" { LineEnding::CRLF } else { // Not a line ending LineEnding::None } } pub fn line_ending_to_str(ending: LineEnding) -> &'static str { LINE_ENDINGS[ending as usize] } /// An array of string literals corresponding to the possible /// unicode line endings. pub const LINE_ENDINGS: [&'static str; 9] = [ "", "\u{000D}\u{000A}", "\u{000A}", "\u{000B}", "\u{000C}", "\u{000D}", "\u{0085}", "\u{2028}", "\u{2029}", ];