From f8a38111d4b7d669c909407a37da57ba9b3831dd Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 5 Jul 2018 18:43:17 -0700 Subject: [PATCH] Some nice performance improvements with a simple optimization. We're now very close to being back to the performance levels we were at before the new Ropey without built-in grapheme support. --- Cargo.lock | 2 +- src/string_utils.rs | 50 +++++++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 167c597..c1ddb89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,7 +88,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "ropey" version = "0.6.3" -source = "git+https://github.com/cessen/ropey#e6a449e07841da690f369eeb9ef05d55fe102623" +source = "git+https://github.com/cessen/ropey#41fb82d82279e8f20c2be680f661ee2f6c0e1c78" dependencies = [ "smallvec 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/src/string_utils.rs b/src/string_utils.rs index d2d69e7..d668811 100644 --- a/src/string_utils.rs +++ b/src/string_utils.rs @@ -54,27 +54,31 @@ pub fn rope_slice_is_whitespace(text: &RopeSlice) -> bool { // For better categorization these should be split up into groups // based on e.g. breaking vs non-breaking spaces, among other things. - text == "\u{0020}" // SPACE - || text == "\u{0009}" // CHARACTER TABULATION - || text == "\u{00A0}" // NO-BREAK SPACE - //|| "\u{1680}" // OGHAM SPACE MARK (here for completeness, but usually displayed as a dash, not as whitespace) - || text == "\u{180E}" // MONGOLIAN VOWEL SEPARATOR - || text == "\u{2000}" // EN QUAD - || text == "\u{2001}" // EM QUAD - || text == "\u{2002}" // EN SPACE - || text == "\u{2003}" // EM SPACE - || text == "\u{2004}" // THREE-PER-EM SPACE - || text == "\u{2005}" // FOUR-PER-EM SPACE - || text == "\u{2006}" // SIX-PER-EM SPACE - || text == "\u{2007}" // FIGURE SPACE - || text == "\u{2008}" // PUNCTUATION SPACE - || text == "\u{2009}" // THIN SPACE - || text == "\u{200A}" // HAIR SPACE - || text == "\u{200B}" // ZERO WIDTH SPACE - || text == "\u{202F}" // NARROW NO-BREAK SPACE - || text == "\u{205F}" // MEDIUM MATHEMATICAL SPACE - || text == "\u{3000}" // IDEOGRAPHIC SPACE - || text == "\u{FEFF}" // ZERO WIDTH NO-BREAK SPACE + if let Some(text) = text.as_str() { + is_whitespace(text) + } else { + text == "\u{0020}" // SPACE + || text == "\u{0009}" // CHARACTER TABULATION + || text == "\u{00A0}" // NO-BREAK SPACE + //|| "\u{1680}" // OGHAM SPACE MARK (here for completeness, but usually displayed as a dash, not as whitespace) + || text == "\u{180E}" // MONGOLIAN VOWEL SEPARATOR + || text == "\u{2000}" // EN QUAD + || text == "\u{2001}" // EM QUAD + || text == "\u{2002}" // EN SPACE + || text == "\u{2003}" // EM SPACE + || text == "\u{2004}" // THREE-PER-EM SPACE + || text == "\u{2005}" // FOUR-PER-EM SPACE + || text == "\u{2006}" // SIX-PER-EM SPACE + || text == "\u{2007}" // FIGURE SPACE + || text == "\u{2008}" // PUNCTUATION SPACE + || text == "\u{2009}" // THIN SPACE + || text == "\u{200A}" // HAIR SPACE + || text == "\u{200B}" // ZERO WIDTH SPACE + || text == "\u{202F}" // NARROW NO-BREAK SPACE + || text == "\u{205F}" // MEDIUM MATHEMATICAL SPACE + || text == "\u{3000}" // IDEOGRAPHIC SPACE + || text == "\u{FEFF}" // ZERO WIDTH NO-BREAK SPACE + } } pub fn line_ending_count(text: &str) -> usize { @@ -320,7 +324,9 @@ pub fn str_to_line_ending(g: &str) -> LineEnding { } pub fn rope_slice_to_line_ending(g: &RopeSlice) -> LineEnding { - if g == "\u{000D}\u{000A}" { + if let Some(text) = g.as_str() { + str_to_line_ending(text) + } else if g == "\u{000D}\u{000A}" { LineEnding::CRLF } else if g == "\u{000A}" { LineEnding::LF