WIP refactor: changing internal text representation to be line-based.
This will make some of the internals a little more complex, but should simplify things on the balance.
This commit is contained in:
parent
e3e2c866b9
commit
370d8a315e
|
@ -6,6 +6,7 @@ use self::text_node::{TextNode, TextNodeData};
|
||||||
|
|
||||||
mod text_block;
|
mod text_block;
|
||||||
mod text_node;
|
mod text_node;
|
||||||
|
mod text_line;
|
||||||
|
|
||||||
|
|
||||||
/// A text buffer
|
/// A text buffer
|
||||||
|
|
413
src/buffer/text_line.rs
Normal file
413
src/buffer/text_line.rs
Normal file
|
@ -0,0 +1,413 @@
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use std::mem;
|
||||||
|
use std::str::Graphemes;
|
||||||
|
use string_utils::{grapheme_pos_to_byte_pos, is_line_ending};
|
||||||
|
|
||||||
|
|
||||||
|
/// A single line of text
|
||||||
|
pub struct TextLine {
|
||||||
|
text: Vec<u8>, // The text data, stored as UTF8
|
||||||
|
ending: LineEnding, // The type of line ending, if any
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl TextLine {
|
||||||
|
/// Creates a new empty TextLine
|
||||||
|
pub fn new() -> TextLine {
|
||||||
|
TextLine {
|
||||||
|
text: Vec::new(),
|
||||||
|
ending: LineEnding::None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Creates a new TextLine from a str.
|
||||||
|
pub fn new_from_str(text: &str) -> TextLine {
|
||||||
|
// Initialize TextLine
|
||||||
|
let mut tl = TextLine {
|
||||||
|
text: Vec::with_capacity(text.len()),
|
||||||
|
ending: LineEnding::None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Copy text data, stopping on a line ending if any is found
|
||||||
|
for g in text.graphemes(true) {
|
||||||
|
match g {
|
||||||
|
//==============
|
||||||
|
// Line endings
|
||||||
|
//==============
|
||||||
|
|
||||||
|
// CRLF
|
||||||
|
"\u{000D}\u{000A}" => {
|
||||||
|
tl.ending = LineEnding::CRLF;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// LF
|
||||||
|
"\u{000A}" => {
|
||||||
|
tl.ending = LineEnding::LF;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// VT
|
||||||
|
"\u{000B}" => {
|
||||||
|
tl.ending = LineEnding::VT;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// FF
|
||||||
|
"\u{000C}" => {
|
||||||
|
tl.ending = LineEnding::FF;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// CR
|
||||||
|
"\u{000D}" => {
|
||||||
|
tl.ending = LineEnding::CR;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// NEL
|
||||||
|
"\u{0085}" => {
|
||||||
|
tl.ending = LineEnding::NEL;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// LS
|
||||||
|
"\u{2028}" => {
|
||||||
|
tl.ending = LineEnding::LS;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
// PS
|
||||||
|
"\u{2029}" => {
|
||||||
|
tl.ending = LineEnding::PS;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
|
||||||
|
//==================
|
||||||
|
// Other characters
|
||||||
|
//==================
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
for b in g.bytes() {
|
||||||
|
tl.text.push(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Done!
|
||||||
|
return tl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Returns an immutable string slice into the text block's memory
|
||||||
|
pub fn as_str<'a>(&'a self) -> &'a str {
|
||||||
|
unsafe {
|
||||||
|
mem::transmute(self.text.as_slice())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Inserts `text` at grapheme index `pos`.
|
||||||
|
/// NOTE: panics if it encounters a line ending in the text.
|
||||||
|
pub fn insert_text(&mut self, text: &str, pos: uint) {
|
||||||
|
// Find insertion position in bytes
|
||||||
|
let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos);
|
||||||
|
|
||||||
|
// Grow data size
|
||||||
|
self.text.grow(text.len(), 0);
|
||||||
|
|
||||||
|
// Move old bytes forward
|
||||||
|
let mut from = self.text.len() - text.len();
|
||||||
|
let mut to = self.text.len();
|
||||||
|
while from > byte_pos {
|
||||||
|
from -= 1;
|
||||||
|
to -= 1;
|
||||||
|
|
||||||
|
self.text[to] = self.text[from];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy new bytes in
|
||||||
|
let mut i = byte_pos;
|
||||||
|
for g in text.graphemes(true) {
|
||||||
|
if is_line_ending(g) {
|
||||||
|
panic!("TextLine::insert_text(): line ending in inserted text.");
|
||||||
|
}
|
||||||
|
|
||||||
|
for b in g.bytes() {
|
||||||
|
self.text[i] = b;
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Returns an iterator over the graphemes of the line
|
||||||
|
pub fn grapheme_iter<'a>(&'a self) -> TextLineIter<'a> {
|
||||||
|
TextLineIter {
|
||||||
|
graphemes: self.as_str().graphemes(true),
|
||||||
|
ending: self.ending,
|
||||||
|
done: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Returns an iterator over the graphemes of the line
|
||||||
|
pub fn grapheme_iter_at_index<'a>(&'a self, index: uint) -> TextLineIter<'a> {
|
||||||
|
let temp: &str = unsafe{mem::transmute(self.text.as_slice())};
|
||||||
|
|
||||||
|
let mut iter = TextLineIter {
|
||||||
|
graphemes: temp.graphemes(true),
|
||||||
|
ending: self.ending,
|
||||||
|
done: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
for _ in range(0, index) {
|
||||||
|
iter.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Represents one of the valid Unicode line endings.
|
||||||
|
/// Also acts as an index into `LINE_ENDINGS`.
|
||||||
|
#[deriving(PartialEq, Copy)]
|
||||||
|
pub enum LineEnding {
|
||||||
|
None = 0, // No line ending
|
||||||
|
CRLF = 1, // CarriageReturn followed by LineFeed
|
||||||
|
LF = 2, // U+000A -- LineFeed
|
||||||
|
VT = 3, // U+000B -- VerticalTab
|
||||||
|
FF = 4, // U+000C -- FormFeed
|
||||||
|
CR = 5, // U+000D -- CarriageReturn
|
||||||
|
NEL = 6, // U+0085 -- NextLine
|
||||||
|
LS = 7, // U+2028 -- Line Separator
|
||||||
|
PS = 8, // U+2029 -- ParagraphSeparator
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An array of string literals corresponding to the possible
|
||||||
|
/// unicode line endings.
|
||||||
|
pub const LINE_ENDINGS: [&'static str, ..9] = ["",
|
||||||
|
"\u{000D}\u{000A}",
|
||||||
|
"\u{000A}",
|
||||||
|
"\u{000B}",
|
||||||
|
"\u{000C}",
|
||||||
|
"\u{000D}",
|
||||||
|
"\u{0085}",
|
||||||
|
"\u{2028}",
|
||||||
|
"\u{2029}"
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
|
/// An iterator over the graphemes of a TextLine
|
||||||
|
pub struct TextLineIter<'a> {
|
||||||
|
graphemes: Graphemes<'a>,
|
||||||
|
ending: LineEnding,
|
||||||
|
done: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator<&'a str> for TextLineIter<'a> {
|
||||||
|
fn next(&mut self) -> Option<&'a str> {
|
||||||
|
if self.done {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
let g = self.graphemes.next();
|
||||||
|
if let Some(_) = g {
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self.done = true;
|
||||||
|
|
||||||
|
if self.ending == LineEnding::None {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Some(LINE_ENDINGS[self.ending as uint]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//=========================================================================
|
||||||
|
// TextLine tests
|
||||||
|
//=========================================================================
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line() {
|
||||||
|
let tl = TextLine::new();
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 0);
|
||||||
|
assert!(tl.ending == LineEnding::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line_from_str() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!");
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 6);
|
||||||
|
assert!(tl.text[0] == ('H' as u8));
|
||||||
|
assert!(tl.text[1] == ('e' as u8));
|
||||||
|
assert!(tl.text[2] == ('l' as u8));
|
||||||
|
assert!(tl.text[3] == ('l' as u8));
|
||||||
|
assert!(tl.text[4] == ('o' as u8));
|
||||||
|
assert!(tl.text[5] == ('!' as u8));
|
||||||
|
assert!(tl.ending == LineEnding::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line_from_empty_str() {
|
||||||
|
let tl = TextLine::new_from_str("");
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 0);
|
||||||
|
assert!(tl.ending == LineEnding::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line_from_str_with_lf() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\n");
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 6);
|
||||||
|
assert!(tl.text[0] == ('H' as u8));
|
||||||
|
assert!(tl.text[1] == ('e' as u8));
|
||||||
|
assert!(tl.text[2] == ('l' as u8));
|
||||||
|
assert!(tl.text[3] == ('l' as u8));
|
||||||
|
assert!(tl.text[4] == ('o' as u8));
|
||||||
|
assert!(tl.text[5] == ('!' as u8));
|
||||||
|
assert!(tl.ending == LineEnding::LF);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line_from_str_with_crlf() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\r\n");
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 6);
|
||||||
|
assert!(tl.text[0] == ('H' as u8));
|
||||||
|
assert!(tl.text[1] == ('e' as u8));
|
||||||
|
assert!(tl.text[2] == ('l' as u8));
|
||||||
|
assert!(tl.text[3] == ('l' as u8));
|
||||||
|
assert!(tl.text[4] == ('o' as u8));
|
||||||
|
assert!(tl.text[5] == ('!' as u8));
|
||||||
|
assert!(tl.ending == LineEnding::CRLF);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn new_text_line_from_str_with_crlf_and_too_long() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\r\nLa la la la");
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 6);
|
||||||
|
assert!(tl.text[0] == ('H' as u8));
|
||||||
|
assert!(tl.text[1] == ('e' as u8));
|
||||||
|
assert!(tl.text[2] == ('l' as u8));
|
||||||
|
assert!(tl.text[3] == ('l' as u8));
|
||||||
|
assert!(tl.text[4] == ('o' as u8));
|
||||||
|
assert!(tl.text[5] == ('!' as u8));
|
||||||
|
assert!(tl.ending == LineEnding::CRLF);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_insert_text() {
|
||||||
|
let mut tl = TextLine::new_from_str("Hello!\r\n");
|
||||||
|
|
||||||
|
tl.insert_text(" world", 5);
|
||||||
|
|
||||||
|
assert!(tl.text.len() == 12);
|
||||||
|
assert!(tl.text[0] == ('H' as u8));
|
||||||
|
assert!(tl.text[1] == ('e' as u8));
|
||||||
|
assert!(tl.text[2] == ('l' as u8));
|
||||||
|
assert!(tl.text[3] == ('l' as u8));
|
||||||
|
assert!(tl.text[4] == ('o' as u8));
|
||||||
|
assert!(tl.text[5] == (' ' as u8));
|
||||||
|
assert!(tl.text[6] == ('w' as u8));
|
||||||
|
assert!(tl.text[7] == ('o' as u8));
|
||||||
|
assert!(tl.text[8] == ('r' as u8));
|
||||||
|
assert!(tl.text[9] == ('l' as u8));
|
||||||
|
assert!(tl.text[10] == ('d' as u8));
|
||||||
|
assert!(tl.text[11] == ('!' as u8));
|
||||||
|
assert!(tl.ending == LineEnding::CRLF);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//=========================================================================
|
||||||
|
// TextLineIter tests
|
||||||
|
//=========================================================================
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!");
|
||||||
|
let mut iter = tl.grapheme_iter();
|
||||||
|
|
||||||
|
assert!(iter.next() == Some("H"));
|
||||||
|
assert!(iter.next() == Some("e"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("o"));
|
||||||
|
assert!(iter.next() == Some("!"));
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter_with_lf() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\n");
|
||||||
|
let mut iter = tl.grapheme_iter();
|
||||||
|
|
||||||
|
assert!(iter.next() == Some("H"));
|
||||||
|
assert!(iter.next() == Some("e"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("o"));
|
||||||
|
assert!(iter.next() == Some("!"));
|
||||||
|
assert!(iter.next() == Some("\n"));
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter_with_crlf() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\r\n");
|
||||||
|
let mut iter = tl.grapheme_iter();
|
||||||
|
|
||||||
|
assert!(iter.next() == Some("H"));
|
||||||
|
assert!(iter.next() == Some("e"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("o"));
|
||||||
|
assert!(iter.next() == Some("!"));
|
||||||
|
assert!(iter.next() == Some("\r\n"));
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter_at_index() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!");
|
||||||
|
let mut iter = tl.grapheme_iter_at_index(2);
|
||||||
|
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("l"));
|
||||||
|
assert!(iter.next() == Some("o"));
|
||||||
|
assert!(iter.next() == Some("!"));
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter_at_index_past_end() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!");
|
||||||
|
let mut iter = tl.grapheme_iter_at_index(10);
|
||||||
|
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn text_line_grapheme_iter_at_index_at_lf() {
|
||||||
|
let tl = TextLine::new_from_str("Hello!\n");
|
||||||
|
let mut iter = tl.grapheme_iter_at_index(6);
|
||||||
|
|
||||||
|
assert!(iter.next() == Some("\n"));
|
||||||
|
assert!(iter.next() == None);
|
||||||
|
}
|
|
@ -22,6 +22,7 @@ pub enum IndexOrOffset {
|
||||||
pub struct TextNode {
|
pub struct TextNode {
|
||||||
pub data: TextNodeData,
|
pub data: TextNodeData,
|
||||||
pub tree_height: uint,
|
pub tree_height: uint,
|
||||||
|
|
||||||
pub char_count: uint,
|
pub char_count: uint,
|
||||||
pub newline_count: uint,
|
pub newline_count: uint,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,21 @@
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
//! Misc helpful utility functions for TextBuffer related stuff.
|
//! Misc helpful utility functions for TextBuffer related stuff.
|
||||||
|
|
||||||
|
pub fn is_line_ending(text: &str) -> bool {
|
||||||
|
match text {
|
||||||
|
"\u{000D}\u{000A}"
|
||||||
|
| "\u{000A}"
|
||||||
|
| "\u{000B}"
|
||||||
|
| "\u{000C}"
|
||||||
|
| "\u{000D}"
|
||||||
|
| "\u{0085}"
|
||||||
|
| "\u{2028}"
|
||||||
|
| "\u{2029}" => true,
|
||||||
|
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn newline_count(text: &str) -> uint {
|
pub fn newline_count(text: &str) -> uint {
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
for c in text.chars() {
|
for c in text.chars() {
|
||||||
|
@ -49,3 +64,20 @@ pub fn char_pos_to_byte_pos(text: &str, pos: uint) -> uint {
|
||||||
|
|
||||||
panic!("char_pos_to_byte_pos(): char position off the end of the string.");
|
panic!("char_pos_to_byte_pos(): char position off the end of the string.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn grapheme_pos_to_byte_pos(text: &str, pos: uint) -> uint {
|
||||||
|
let mut i: uint = 0;
|
||||||
|
|
||||||
|
for (offset, _) in text.grapheme_indices(true) {
|
||||||
|
if i == pos {
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if i == pos {
|
||||||
|
return text.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
panic!("grapheme_pos_to_byte_pos(): char position off the end of the string.");
|
||||||
|
}
|
6
todo.md
6
todo.md
|
@ -7,3 +7,9 @@
|
||||||
- UI that wraps editors, for split view.
|
- UI that wraps editors, for split view.
|
||||||
- Unit testing for text block, text node, and text buffer. They must
|
- Unit testing for text block, text node, and text buffer. They must
|
||||||
be reliable!
|
be reliable!
|
||||||
|
|
||||||
|
- Change text data structure to store lines explicitly. Still use a tree
|
||||||
|
structure to hold the lines, but just store the lines themselves as
|
||||||
|
straight vectors for now. Be mindful to keep the API's clean enough
|
||||||
|
that you can substitute another internal storage approach for lines
|
||||||
|
later on.
|
Loading…
Reference in New Issue
Block a user