Handle grapheme width more robustly.
This commit is contained in:
parent
7b71cf2fdd
commit
82df4d64bd
|
@ -5,7 +5,7 @@ use ropey::{Rope, RopeSlice};
|
||||||
use crate::{
|
use crate::{
|
||||||
buffer::Buffer,
|
buffer::Buffer,
|
||||||
string_utils::char_count,
|
string_utils::char_count,
|
||||||
string_utils::{is_line_ending, str_is_whitespace},
|
string_utils::str_is_whitespace,
|
||||||
utils::{grapheme_width, is_grapheme_boundary, prev_grapheme_boundary, RopeGraphemes},
|
utils::{grapheme_width, is_grapheme_boundary, prev_grapheme_boundary, RopeGraphemes},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -476,9 +476,8 @@ impl<'a> Iterator for BlockVisIter<'a> {
|
||||||
/// position on a line.
|
/// position on a line.
|
||||||
fn grapheme_vis_width_at_vis_pos(g: &str, pos: usize, tab_width: usize) -> usize {
|
fn grapheme_vis_width_at_vis_pos(g: &str, pos: usize, tab_width: usize) -> usize {
|
||||||
if g == "\t" {
|
if g == "\t" {
|
||||||
return tab_stop_from_vis_pos(pos, tab_width) - pos;
|
// Tabs are special.
|
||||||
} else if is_line_ending(g) {
|
tab_stop_from_vis_pos(pos, tab_width) - pos
|
||||||
return 1;
|
|
||||||
} else {
|
} else {
|
||||||
return grapheme_width(&g);
|
return grapheme_width(&g);
|
||||||
}
|
}
|
||||||
|
|
21
src/utils.rs
21
src/utils.rs
|
@ -17,7 +17,26 @@ pub fn digit_count(mut n: u32, b: u32) -> u32 {
|
||||||
//=============================================================
|
//=============================================================
|
||||||
|
|
||||||
pub fn grapheme_width(g: &str) -> usize {
|
pub fn grapheme_width(g: &str) -> usize {
|
||||||
UnicodeWidthStr::width(g)
|
if g.as_bytes()[0] <= 127 {
|
||||||
|
// Fast-path ascii.
|
||||||
|
// Point 1: theoretically, ascii control characters should have zero
|
||||||
|
// width, but in our case we actually want them to have width: if they
|
||||||
|
// show up in text, we want to treat them as textual elements that can
|
||||||
|
// be editied. So we can get away with making all ascii single width
|
||||||
|
// here.
|
||||||
|
// Point 2: we're only examining the first codepoint here, which means
|
||||||
|
// we're ignoring graphemes formed with combining characters. However,
|
||||||
|
// if it starts with ascii, it's going to be a single-width grapeheme
|
||||||
|
// regardless, so, again, we can get away with that here.
|
||||||
|
// Point 3: we're only examining the first _byte_. But for utf8, when
|
||||||
|
// checking for ascii range values only, that works.
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
// We use max(1) here because all grapeheme clusters--even illformed
|
||||||
|
// ones--should have at least some width so they can be edited
|
||||||
|
// properly.
|
||||||
|
UnicodeWidthStr::width(g).max(1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finds the previous grapheme boundary before the given char position.
|
/// Finds the previous grapheme boundary before the given char position.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user