Handle grapheme width more robustly.

This commit is contained in:
Nathan Vegdahl 2020-02-16 22:26:31 +09:00
parent 7b71cf2fdd
commit 82df4d64bd
2 changed files with 23 additions and 5 deletions

View File

@ -5,7 +5,7 @@ use ropey::{Rope, RopeSlice};
use crate::{
buffer::Buffer,
string_utils::char_count,
string_utils::{is_line_ending, str_is_whitespace},
string_utils::str_is_whitespace,
utils::{grapheme_width, is_grapheme_boundary, prev_grapheme_boundary, RopeGraphemes},
};
@ -476,9 +476,8 @@ impl<'a> Iterator for BlockVisIter<'a> {
/// position on a line.
fn grapheme_vis_width_at_vis_pos(g: &str, pos: usize, tab_width: usize) -> usize {
if g == "\t" {
return tab_stop_from_vis_pos(pos, tab_width) - pos;
} else if is_line_ending(g) {
return 1;
// Tabs are special.
tab_stop_from_vis_pos(pos, tab_width) - pos
} else {
return grapheme_width(&g);
}

View File

@ -17,7 +17,26 @@ pub fn digit_count(mut n: u32, b: u32) -> u32 {
//=============================================================
pub fn grapheme_width(g: &str) -> usize {
UnicodeWidthStr::width(g)
if g.as_bytes()[0] <= 127 {
// Fast-path ascii.
// Point 1: theoretically, ascii control characters should have zero
// width, but in our case we actually want them to have width: if they
// show up in text, we want to treat them as textual elements that can
// be editied. So we can get away with making all ascii single width
// here.
// Point 2: we're only examining the first codepoint here, which means
// we're ignoring graphemes formed with combining characters. However,
// if it starts with ascii, it's going to be a single-width grapeheme
// regardless, so, again, we can get away with that here.
// Point 3: we're only examining the first _byte_. But for utf8, when
// checking for ascii range values only, that works.
1
} else {
// We use max(1) here because all grapeheme clusters--even illformed
// ones--should have at least some width so they can be edited
// properly.
UnicodeWidthStr::width(g).max(1)
}
}
/// Finds the previous grapheme boundary before the given char position.