Handle grapheme width more robustly.

2020-02-16 22:26:31 +09:00 · 2020-02-16 22:26:31 +09:00 · 82df4d64bd
commit 82df4d64bd
parent 7b71cf2fdd
2 changed files with 23 additions and 5 deletions
--- a/src/formatter.rs
+++ b/src/formatter.rs
@ -5,7 +5,7 @@ use ropey::{Rope, RopeSlice};
 use crate::{
    buffer::Buffer,
    string_utils::char_count,
-    string_utils::{is_line_ending, str_is_whitespace},
+    string_utils::str_is_whitespace,
    utils::{grapheme_width, is_grapheme_boundary, prev_grapheme_boundary, RopeGraphemes},
 };
@ -476,9 +476,8 @@ impl<'a> Iterator for BlockVisIter<'a> {
 /// position on a line.
 fn grapheme_vis_width_at_vis_pos(g: &str, pos: usize, tab_width: usize) -> usize {
    if g == "\t" {
-        return tab_stop_from_vis_pos(pos, tab_width) - pos;
+        // Tabs are special.
-    } else if is_line_ending(g) {
+        tab_stop_from_vis_pos(pos, tab_width) - pos
        return 1;
    } else {
        return grapheme_width(&g);
    }
--- a/src/utils.rs
+++ b/src/utils.rs
@ -17,7 +17,26 @@ pub fn digit_count(mut n: u32, b: u32) -> u32 {
 //=============================================================
 pub fn grapheme_width(g: &str) -> usize {
-    UnicodeWidthStr::width(g)
+    if g.as_bytes()[0] <= 127 {
        // Fast-path ascii.
        // Point 1: theoretically, ascii control characters should have zero
        // width, but in our case we actually want them to have width: if they
        // show up in text, we want to treat them as textual elements that can
        // be editied.  So we can get away with making all ascii single width
        // here.
        // Point 2: we're only examining the first codepoint here, which means
        // we're ignoring graphemes formed with combining characters.  However,
        // if it starts with ascii, it's going to be a single-width grapeheme
        // regardless, so, again, we can get away with that here.
        // Point 3: we're only examining the first _byte_.  But for utf8, when
        // checking for ascii range values only, that works.
        1
    } else {
        // We use max(1) here because all grapeheme clusters--even illformed
        // ones--should have at least some width so they can be edited
        // properly.
        UnicodeWidthStr::width(g).max(1)
    }
 }
 /// Finds the previous grapheme boundary before the given char position.