diff --git a/src/formatter.rs b/src/formatter.rs index c606c65..9204494 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -5,7 +5,7 @@ use ropey::{Rope, RopeSlice}; use crate::{ buffer::Buffer, string_utils::char_count, - string_utils::{is_line_ending, str_is_whitespace}, + string_utils::str_is_whitespace, utils::{grapheme_width, is_grapheme_boundary, prev_grapheme_boundary, RopeGraphemes}, }; @@ -476,9 +476,8 @@ impl<'a> Iterator for BlockVisIter<'a> { /// position on a line. fn grapheme_vis_width_at_vis_pos(g: &str, pos: usize, tab_width: usize) -> usize { if g == "\t" { - return tab_stop_from_vis_pos(pos, tab_width) - pos; - } else if is_line_ending(g) { - return 1; + // Tabs are special. + tab_stop_from_vis_pos(pos, tab_width) - pos } else { return grapheme_width(&g); } diff --git a/src/utils.rs b/src/utils.rs index ce42422..e2417be 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -17,7 +17,26 @@ pub fn digit_count(mut n: u32, b: u32) -> u32 { //============================================================= pub fn grapheme_width(g: &str) -> usize { - UnicodeWidthStr::width(g) + if g.as_bytes()[0] <= 127 { + // Fast-path ascii. + // Point 1: theoretically, ascii control characters should have zero + // width, but in our case we actually want them to have width: if they + // show up in text, we want to treat them as textual elements that can + // be editied. So we can get away with making all ascii single width + // here. + // Point 2: we're only examining the first codepoint here, which means + // we're ignoring graphemes formed with combining characters. However, + // if it starts with ascii, it's going to be a single-width grapeheme + // regardless, so, again, we can get away with that here. + // Point 3: we're only examining the first _byte_. But for utf8, when + // checking for ascii range values only, that works. + 1 + } else { + // We use max(1) here because all grapeheme clusters--even illformed + // ones--should have at least some width so they can be edited + // properly. + UnicodeWidthStr::width(g).max(1) + } } /// Finds the previous grapheme boundary before the given char position.