Lines now internally use ropes to store their text.

The rope uses a large leaf-node text length, so in the vast majority of
cases this ends up being the same as directly storing the string data.
But in the case that the line becomes extremely long, this will allow
for reasonable performance
This commit is contained in:
Nathan Vegdahl 2015-02-15 14:56:56 -08:00
parent a89e505fb0
commit e1c9a6955d
4 changed files with 444 additions and 312 deletions

View File

@ -1,14 +1,13 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::iter::repeat;
use std::mem; use std::mem;
use std::str::Graphemes; use super::rope::{Rope, RopeGraphemeIter};
use string_utils::{grapheme_count, grapheme_pos_to_byte_pos, is_line_ending}; use string_utils::is_line_ending;
/// A single line of text /// A single line of text
pub struct Line { pub struct Line {
text: Vec<u8>, // The text data, stored as UTF8 text: Rope, // The text data, stored as UTF8
pub ending: LineEnding, // The type of line ending, if any pub ending: LineEnding, // The type of line ending, if any
} }
@ -17,7 +16,7 @@ impl Line {
/// Creates a new empty Line /// Creates a new empty Line
pub fn new() -> Line { pub fn new() -> Line {
Line { Line {
text: Vec::new(), text: Rope::new(),
ending: LineEnding::None, ending: LineEnding::None,
} }
} }
@ -25,13 +24,10 @@ impl Line {
/// Creates a new Line from a str. /// Creates a new Line from a str.
pub fn new_from_str(text: &str) -> Line { pub fn new_from_str(text: &str) -> Line {
// Initialize Line let mut ending = LineEnding::None;
let mut tl = Line { let mut end_pos = 0;
text: Vec::with_capacity(text.len()),
ending: LineEnding::None,
};
// Copy text data, stopping on a line ending if any is found // Find the slice before the line ending, if any
for g in text.graphemes(true) { for g in text.graphemes(true) {
match g { match g {
//============== //==============
@ -40,49 +36,49 @@ impl Line {
// CRLF // CRLF
"\u{000D}\u{000A}" => { "\u{000D}\u{000A}" => {
tl.ending = LineEnding::CRLF; ending = LineEnding::CRLF;
break; break;
}, },
// LF // LF
"\u{000A}" => { "\u{000A}" => {
tl.ending = LineEnding::LF; ending = LineEnding::LF;
break; break;
}, },
// VT // VT
"\u{000B}" => { "\u{000B}" => {
tl.ending = LineEnding::VT; ending = LineEnding::VT;
break; break;
}, },
// FF // FF
"\u{000C}" => { "\u{000C}" => {
tl.ending = LineEnding::FF; ending = LineEnding::FF;
break; break;
}, },
// CR // CR
"\u{000D}" => { "\u{000D}" => {
tl.ending = LineEnding::CR; ending = LineEnding::CR;
break; break;
}, },
// NEL // NEL
"\u{0085}" => { "\u{0085}" => {
tl.ending = LineEnding::NEL; ending = LineEnding::NEL;
break; break;
}, },
// LS // LS
"\u{2028}" => { "\u{2028}" => {
tl.ending = LineEnding::LS; ending = LineEnding::LS;
break; break;
}, },
// PS // PS
"\u{2029}" => { "\u{2029}" => {
tl.ending = LineEnding::PS; ending = LineEnding::PS;
break; break;
}, },
@ -91,41 +87,38 @@ impl Line {
//================== //==================
_ => { _ => {
for b in g.bytes() { end_pos += g.len();
tl.text.push(b);
}
} }
} }
} }
// Done! // Create and return Line
return tl; return Line {
text: Rope::new_from_str(&text[..end_pos]),
ending: ending,
};
} }
pub fn new_from_str_unchecked(text: &str) -> Line { pub fn new_from_str_unchecked(text: &str) -> Line {
// Initialize Line let mut ending = LineEnding::None;
let mut tl = Line {
text: Vec::new(),
ending: LineEnding::None,
};
tl.text.push_all(text.as_bytes()); let bytes = text.as_bytes();
// Check for line ending // Check for line ending
let mut le_size: usize = 0; let mut le_size: usize = 0;
let text_size = tl.text.len(); let text_size = text.len();
if tl.text.len() >= 3 { if text.len() >= 3 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-3)..])} { match &text[(text_size-3)..] {
// LS // LS
"\u{2028}" => { "\u{2028}" => {
tl.ending = LineEnding::LS; ending = LineEnding::LS;
le_size = 3; le_size = 3;
}, },
// PS // PS
"\u{2029}" => { "\u{2029}" => {
tl.ending = LineEnding::PS; ending = LineEnding::PS;
le_size = 3; le_size = 3;
}, },
@ -133,11 +126,11 @@ impl Line {
} }
} }
if le_size == 0 && tl.text.len() >= 2 { if le_size == 0 && text.len() >= 2 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-2)..])} { match &text[(text_size-2)..] {
// CRLF // CRLF
"\u{000D}\u{000A}" => { "\u{000D}\u{000A}" => {
tl.ending = LineEnding::CRLF; ending = LineEnding::CRLF;
le_size = 2; le_size = 2;
}, },
@ -145,35 +138,35 @@ impl Line {
} }
} }
if le_size == 0 && tl.text.len() >= 1 { if le_size == 0 && text.len() >= 1 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-1)..])} { match &text[(text_size-1)..] {
// LF // LF
"\u{000A}" => { "\u{000A}" => {
tl.ending = LineEnding::LF; ending = LineEnding::LF;
le_size = 1; le_size = 1;
}, },
// VT // VT
"\u{000B}" => { "\u{000B}" => {
tl.ending = LineEnding::VT; ending = LineEnding::VT;
le_size = 1; le_size = 1;
}, },
// FF // FF
"\u{000C}" => { "\u{000C}" => {
tl.ending = LineEnding::FF; ending = LineEnding::FF;
le_size = 1; le_size = 1;
}, },
// CR // CR
"\u{000D}" => { "\u{000D}" => {
tl.ending = LineEnding::CR; ending = LineEnding::CR;
le_size = 1; le_size = 1;
}, },
// NEL // NEL
"\u{0085}" => { "\u{0085}" => {
tl.ending = LineEnding::NEL; ending = LineEnding::NEL;
le_size = 1; le_size = 1;
}, },
@ -181,12 +174,11 @@ impl Line {
} }
} }
// Truncate off the line ending, if any // Create and return Line
let trunc_size = text_size - le_size; return Line {
tl.text.truncate(trunc_size); text: Rope::new_from_str(&text[..(bytes.len()-le_size)]),
ending: ending,
// Done! };
return tl;
} }
@ -194,13 +186,16 @@ impl Line {
/// Does not check to see if the string has internal newlines. /// Does not check to see if the string has internal newlines.
/// This is primarily used for efficient loading of files. /// This is primarily used for efficient loading of files.
pub fn new_from_string_unchecked(text: String) -> Line { pub fn new_from_string_unchecked(text: String) -> Line {
// TODO: this can be smarter, and can pass the string
// directly to the Rope after taking off any line
// endings.
return Line::new_from_str_unchecked(text.as_slice()); return Line::new_from_str_unchecked(text.as_slice());
} }
/// Returns the total number of unicode graphemes in the line /// Returns the total number of unicode graphemes in the line
pub fn grapheme_count(&self) -> usize { pub fn grapheme_count(&self) -> usize {
let mut count = grapheme_count(self.as_str()); let mut count = self.text.grapheme_count();
match self.ending { match self.ending {
LineEnding::None => {}, LineEnding::None => {},
_ => {count += 1;} _ => {count += 1;}
@ -212,11 +207,13 @@ impl Line {
/// Returns the total number of unicode graphemes in the line, /// Returns the total number of unicode graphemes in the line,
/// not counting the line ending grapheme, if any. /// not counting the line ending grapheme, if any.
pub fn grapheme_count_sans_line_ending(&self) -> usize { pub fn grapheme_count_sans_line_ending(&self) -> usize {
grapheme_count(self.as_str()) self.text.grapheme_count()
} }
pub fn grapheme_at_index<'a>(&'a self, index: usize) -> &'a str { pub fn grapheme_at_index<'a>(&'a self, index: usize) -> &'a str {
// TODO: we don't have to iterate over the entire line
// anymore because we're using a rope now. Update.
let mut i = 0; let mut i = 0;
for g in self.grapheme_iter() { for g in self.grapheme_iter() {
@ -233,45 +230,25 @@ impl Line {
} }
/// Returns an immutable string slice into the text block's memory /// Returns a string containing the line's text
pub fn as_str<'a>(&'a self) -> &'a str { pub fn to_string(&self) -> String {
unsafe { let s = self.text.to_string();
mem::transmute(&self.text[]) return s;
}
} }
/// Inserts `text` at grapheme index `pos`. /// Inserts `text` at grapheme index `pos`.
/// NOTE: panics if it encounters a line ending in the text. /// NOTE: panics if it encounters a line ending in the text.
pub fn insert_text(&mut self, text: &str, pos: usize) { pub fn insert_text(&mut self, text: &str, pos: usize) {
// Find insertion position in bytes // Check for line endings
let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos);
// Grow data size
self.text.extend(repeat(0).take(text.len()));
// Move old bytes forward
let mut from = self.text.len() - text.len();
let mut to = self.text.len();
while from > byte_pos {
from -= 1;
to -= 1;
self.text[to] = self.text[from];
}
// Copy new bytes in
let mut i = byte_pos;
for g in text.graphemes(true) { for g in text.graphemes(true) {
if is_line_ending(g) { if is_line_ending(g) {
panic!("Line::insert_text(): line ending in inserted text."); panic!("Line::insert_text(): line ending in inserted text.");
} }
}
for b in g.bytes() { // Insert text
self.text[i] = b; self.text.insert_text_at_grapheme_index(text, pos);
i += 1
}
}
} }
@ -279,49 +256,22 @@ impl Line {
/// any). /// any).
/// NOTE: panics if it encounters a line ending in the text. /// NOTE: panics if it encounters a line ending in the text.
pub fn append_text(&mut self, text: &str) { pub fn append_text(&mut self, text: &str) {
let mut i = self.text.len(); // Check for line endings
// Grow data size
self.text.extend(repeat(0).take(text.len()));
// Copy new bytes in
for g in text.graphemes(true) { for g in text.graphemes(true) {
if is_line_ending(g) { if is_line_ending(g) {
panic!("Line::append_text(): line ending in inserted text."); panic!("Line::append_text(): line ending in inserted text.");
} }
}
for b in g.bytes() { // Append text
self.text[i] = b; let gc = self.text.grapheme_count();
i += 1 self.text.insert_text_at_grapheme_index(text, gc);
}
}
} }
/// Remove the text between grapheme positions 'pos_a' and 'pos_b'. /// Remove the text between grapheme positions 'pos_a' and 'pos_b'.
pub fn remove_text(&mut self, pos_a: usize, pos_b: usize) { pub fn remove_text(&mut self, pos_a: usize, pos_b: usize) {
// Bounds checks self.text.remove_text_between_grapheme_indices(pos_a, pos_b);
if pos_a > pos_b {
panic!("Line::remove_text(): pos_a must be less than or equal to pos_b.");
}
// Find removal positions in bytes
let byte_pos_a = grapheme_pos_to_byte_pos(self.as_str(), pos_a);
let byte_pos_b = grapheme_pos_to_byte_pos(self.as_str(), pos_b);
// Move bytes to fill in the gap left by the removed bytes
let mut from = byte_pos_b;
let mut to = byte_pos_a;
while from < self.text.len() {
self.text[to] = self.text[from];
from += 1;
to += 1;
}
// Remove data from the end
let final_text_size = self.text.len() + byte_pos_a - byte_pos_b;
self.text.truncate(final_text_size);
} }
@ -329,6 +279,7 @@ impl Line {
/// This line stays as the first part of the split. The second /// This line stays as the first part of the split. The second
/// part is returned. /// part is returned.
pub fn split(&mut self, ending: LineEnding, pos: usize) -> Line { pub fn split(&mut self, ending: LineEnding, pos: usize) -> Line {
// TODO: change code to use Rope
let mut other = Line::new(); let mut other = Line::new();
// Inserting at very beginning: special cased for efficiency // Inserting at very beginning: special cased for efficiency
@ -338,14 +289,8 @@ impl Line {
} }
// Otherwise, general case // Otherwise, general case
else { else {
// Find the byte index to split at // Split the text
let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); other.text = self.text.split(pos);
// Copy the elements after the split index to the second line
other.text.push_all(&self.text[byte_pos..]);
// Truncate the first line
self.text.truncate(byte_pos);
// Set the line endings appropriately // Set the line endings appropriately
other.ending = self.ending; other.ending = self.ending;
@ -356,10 +301,20 @@ impl Line {
} }
/// Appends another line to the end of this one, consuming the other
/// line.
/// Note that the resulting line ending is the ending of the other
/// line, if any.
pub fn append(&mut self, other: Line) {
self.ending = other.ending;
self.text.append(other.text);
}
/// Returns an iterator over the graphemes of the line /// Returns an iterator over the graphemes of the line
pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> { pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> {
LineGraphemeIter { LineGraphemeIter {
graphemes: self.as_str().graphemes(true), graphemes: self.text.grapheme_iter(),
ending: self.ending, ending: self.ending,
done: false, done: false,
} }
@ -368,19 +323,11 @@ impl Line {
/// Returns an iterator over the graphemes of the line /// Returns an iterator over the graphemes of the line
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> LineGraphemeIter<'a> { pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> LineGraphemeIter<'a> {
let temp: &str = unsafe{mem::transmute(&self.text[])}; LineGraphemeIter {
graphemes: self.text.grapheme_iter_at_index(index),
let mut iter = LineGraphemeIter {
graphemes: temp.graphemes(true),
ending: self.ending, ending: self.ending,
done: false, done: false,
};
for _ in range(0, index) {
iter.next();
} }
return iter;
} }
} }
@ -473,7 +420,7 @@ pub const LINE_ENDINGS: [&'static str; 9] = ["",
/// An iterator over the graphemes of a Line /// An iterator over the graphemes of a Line
pub struct LineGraphemeIter<'a> { pub struct LineGraphemeIter<'a> {
graphemes: Graphemes<'a>, graphemes: RopeGraphemeIter<'a>,
ending: LineEnding, ending: LineEnding,
done: bool, done: bool,
} }
@ -531,7 +478,7 @@ mod tests {
fn new_text_line() { fn new_text_line() {
let tl = Line::new(); let tl = Line::new();
assert!(tl.text.len() == 0); assert_eq!(tl.text.grapheme_count(), 0);
assert!(tl.ending == LineEnding::None); assert!(tl.ending == LineEnding::None);
} }
@ -539,13 +486,13 @@ mod tests {
fn new_text_line_from_str() { fn new_text_line_from_str() {
let tl = Line::new_from_str("Hello!"); let tl = Line::new_from_str("Hello!");
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::None); assert!(tl.ending == LineEnding::None);
} }
@ -553,7 +500,7 @@ mod tests {
fn new_text_line_from_empty_str() { fn new_text_line_from_empty_str() {
let tl = Line::new_from_str(""); let tl = Line::new_from_str("");
assert!(tl.text.len() == 0); assert_eq!(tl.text.grapheme_count(), 0);
assert!(tl.ending == LineEnding::None); assert!(tl.ending == LineEnding::None);
} }
@ -561,13 +508,13 @@ mod tests {
fn new_text_line_from_str_with_lf() { fn new_text_line_from_str_with_lf() {
let tl = Line::new_from_str("Hello!\n"); let tl = Line::new_from_str("Hello!\n");
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LF); assert!(tl.ending == LineEnding::LF);
} }
@ -575,13 +522,13 @@ mod tests {
fn new_text_line_from_str_with_crlf() { fn new_text_line_from_str_with_crlf() {
let tl = Line::new_from_str("Hello!\r\n"); let tl = Line::new_from_str("Hello!\r\n");
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -589,13 +536,13 @@ mod tests {
fn new_text_line_from_str_with_crlf_and_too_long() { fn new_text_line_from_str_with_crlf_and_too_long() {
let tl = Line::new_from_str("Hello!\r\nLa la la la"); let tl = Line::new_from_str("Hello!\r\nLa la la la");
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -605,13 +552,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s); let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::None); assert!(tl.ending == LineEnding::None);
} }
@ -621,13 +568,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s); let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LF); assert!(tl.ending == LineEnding::LF);
} }
@ -637,13 +584,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s); let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -653,13 +600,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s); let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LS); assert!(tl.ending == LineEnding::LS);
} }
@ -669,19 +616,19 @@ mod tests {
tl.insert_text(" world", 5); tl.insert_text(" world", 5);
assert!(tl.text.len() == 12); assert_eq!(tl.text.grapheme_count(), 12);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == (' ' as u8)); assert!(&tl.text[5] == " ");
assert!(tl.text[6] == ('w' as u8)); assert!(&tl.text[6] == "w");
assert!(tl.text[7] == ('o' as u8)); assert!(&tl.text[7] == "o");
assert!(tl.text[8] == ('r' as u8)); assert!(&tl.text[8] == "r");
assert!(tl.text[9] == ('l' as u8)); assert!(&tl.text[9] == "l");
assert!(tl.text[10] == ('d' as u8)); assert!(&tl.text[10] == "d");
assert!(tl.text[11] == ('!' as u8)); assert!(&tl.text[11] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -691,19 +638,19 @@ mod tests {
tl.append_text(" world!"); tl.append_text(" world!");
assert!(tl.text.len() == 12); assert_eq!(tl.text.grapheme_count(), 12);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == (' ' as u8)); assert!(&tl.text[5] == " ");
assert!(tl.text[6] == ('w' as u8)); assert!(&tl.text[6] == "w");
assert!(tl.text[7] == ('o' as u8)); assert!(&tl.text[7] == "o");
assert!(tl.text[8] == ('r' as u8)); assert!(&tl.text[8] == "r");
assert!(tl.text[9] == ('l' as u8)); assert!(&tl.text[9] == "l");
assert!(tl.text[10] == ('d' as u8)); assert!(&tl.text[10] == "d");
assert!(tl.text[11] == ('!' as u8)); assert!(&tl.text[11] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -713,13 +660,13 @@ mod tests {
tl.remove_text(5, 11); tl.remove_text(5, 11);
assert!(tl.text.len() == 6); assert_eq!(tl.text.grapheme_count(), 6);
assert!(tl.text[0] == ('H' as u8)); assert!(&tl.text[0] == "H");
assert!(tl.text[1] == ('e' as u8)); assert!(&tl.text[1] == "e");
assert!(tl.text[2] == ('l' as u8)); assert!(&tl.text[2] == "l");
assert!(tl.text[3] == ('l' as u8)); assert!(&tl.text[3] == "l");
assert!(tl.text[4] == ('o' as u8)); assert!(&tl.text[4] == "o");
assert!(tl.text[5] == ('!' as u8)); assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF); assert!(tl.ending == LineEnding::CRLF);
} }
@ -729,22 +676,22 @@ mod tests {
let tl2 = tl1.split(LineEnding::LF, 5); let tl2 = tl1.split(LineEnding::LF, 5);
assert!(tl1.text.len() == 5); assert_eq!(tl1.text.grapheme_count(), 5);
assert!(tl1.text[0] == ('H' as u8)); assert!(&tl1.text[0] == "H");
assert!(tl1.text[1] == ('e' as u8)); assert!(&tl1.text[1] == "e");
assert!(tl1.text[2] == ('l' as u8)); assert!(&tl1.text[2] == "l");
assert!(tl1.text[3] == ('l' as u8)); assert!(&tl1.text[3] == "l");
assert!(tl1.text[4] == ('o' as u8)); assert!(&tl1.text[4] == "o");
assert!(tl1.ending == LineEnding::LF); assert!(tl1.ending == LineEnding::LF);
assert!(tl2.text.len() == 7); assert_eq!(tl2.text.grapheme_count(), 7);
assert!(tl2.text[0] == (' ' as u8)); assert!(&tl2.text[0] == " ");
assert!(tl2.text[1] == ('w' as u8)); assert!(&tl2.text[1] == "w");
assert!(tl2.text[2] == ('o' as u8)); assert!(&tl2.text[2] == "o");
assert!(tl2.text[3] == ('r' as u8)); assert!(&tl2.text[3] == "r");
assert!(tl2.text[4] == ('l' as u8)); assert!(&tl2.text[4] == "l");
assert!(tl2.text[5] == ('d' as u8)); assert!(&tl2.text[5] == "d");
assert!(tl2.text[6] == ('!' as u8)); assert!(&tl2.text[6] == "!");
assert!(tl2.ending == LineEnding::CRLF); assert!(tl2.ending == LineEnding::CRLF);
} }
@ -754,16 +701,16 @@ mod tests {
let tl2 = tl1.split(LineEnding::LF, 0); let tl2 = tl1.split(LineEnding::LF, 0);
assert!(tl1.text.len() == 0); assert_eq!(tl1.text.grapheme_count(), 0);
assert!(tl1.ending == LineEnding::LF); assert!(tl1.ending == LineEnding::LF);
assert!(tl2.text.len() == 6); assert_eq!(tl2.text.grapheme_count(), 6);
assert!(tl2.text[0] == ('H' as u8)); assert!(&tl2.text[0] == "H");
assert!(tl2.text[1] == ('e' as u8)); assert!(&tl2.text[1] == "e");
assert!(tl2.text[2] == ('l' as u8)); assert!(&tl2.text[2] == "l");
assert!(tl2.text[3] == ('l' as u8)); assert!(&tl2.text[3] == "l");
assert!(tl2.text[4] == ('o' as u8)); assert!(&tl2.text[4] == "o");
assert!(tl2.text[5] == ('!' as u8)); assert!(&tl2.text[5] == "!");
assert!(tl2.ending == LineEnding::CRLF); assert!(tl2.ending == LineEnding::CRLF);
} }

View File

@ -5,7 +5,7 @@ use std::old_path::Path;
use std::old_io::fs::File; use std::old_io::fs::File;
use std::old_io::{IoResult, BufferedReader, BufferedWriter}; use std::old_io::{IoResult, BufferedReader, BufferedWriter};
use self::line::{Line, line_ending_to_str}; use self::line::Line;
use self::node::{BufferNode, BufferNodeGraphemeIter, BufferNodeLineIter}; use self::node::{BufferNode, BufferNodeGraphemeIter, BufferNodeLineIter};
use self::undo_stack::{UndoStack}; use self::undo_stack::{UndoStack};
use self::undo_stack::Operation::*; use self::undo_stack::Operation::*;
@ -89,11 +89,11 @@ impl Buffer {
pub fn save_to_file(&self, path: &Path) -> IoResult<()> { pub fn save_to_file(&self, path: &Path) -> IoResult<()> {
// TODO: make more efficient
let mut f = BufferedWriter::new(try!(File::create(path))); let mut f = BufferedWriter::new(try!(File::create(path)));
for l in self.line_iter() { for g in self.grapheme_iter() {
let _ = f.write_str(l.as_str()); let _ = f.write_str(g);
let _ = f.write_str(line_ending_to_str(l.ending));
} }
return Ok(()); return Ok(());

View File

@ -582,11 +582,11 @@ impl BufferNode {
} }
pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option<&Line>) { pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option<Line>) {
match fetched_line { match fetched_line {
None => { None => {
let line: Option<Line> = self.pull_out_line_recursive(line_number + 1); let line: Option<Line> = self.pull_out_line_recursive(line_number + 1);
if let Some(ref l) = line { if let Some(l) = line {
self.merge_line_with_next_recursive(line_number, Some(l)); self.merge_line_with_next_recursive(line_number, Some(l));
} }
}, },
@ -603,8 +603,7 @@ impl BufferNode {
}, },
BufferNodeData::Leaf(ref mut line2) => { BufferNodeData::Leaf(ref mut line2) => {
line2.append_text(line.as_str()); line2.append(line);
line2.ending = line.ending;
} }
} }
} }
@ -1069,7 +1068,7 @@ mod tests {
assert!(node.line_count == 5); assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(0).unwrap(); let line = node.pull_out_line_recursive(0).unwrap();
assert!(line.as_str() == "Hi"); assert!(line.to_string().as_slice() == "Hi");
assert!(line.ending == LineEnding::LF); assert!(line.ending == LineEnding::LF);
let mut iter = node.grapheme_iter(); let mut iter = node.grapheme_iter();
@ -1119,7 +1118,7 @@ mod tests {
assert!(node.line_count == 5); assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(2).unwrap(); let line = node.pull_out_line_recursive(2).unwrap();
assert!(line.as_str() == " people "); assert!(line.to_string().as_slice() == " people ");
assert!(line.ending == LineEnding::LF); assert!(line.ending == LineEnding::LF);
let mut iter = node.grapheme_iter(); let mut iter = node.grapheme_iter();
@ -1163,7 +1162,7 @@ mod tests {
assert!(node.line_count == 5); assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(4).unwrap(); let line = node.pull_out_line_recursive(4).unwrap();
assert!(line.as_str() == " world!"); assert!(line.to_string().as_slice() == " world!");
assert!(line.ending == LineEnding::None); assert!(line.ending == LineEnding::None);
let mut iter = node.grapheme_iter(); let mut iter = node.grapheme_iter();

View File

@ -1,9 +1,10 @@
use std::cmp::{min, max}; use std::cmp::{min, max};
use std::mem; use std::mem;
use std::str::Graphemes; use std::str::Graphemes;
use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index}; use std::ops::Index;
use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index, grapheme_pos_to_byte_pos};
const MIN_NODE_SIZE: usize = 1024; const MIN_NODE_SIZE: usize = 2048;
const MAX_NODE_SIZE: usize = MIN_NODE_SIZE * 2; const MAX_NODE_SIZE: usize = MIN_NODE_SIZE * 2;
@ -40,7 +41,7 @@ impl Rope {
tree_height: 1, tree_height: 1,
}; };
rope.split(); rope.split_if_too_large();
return rope; return rope;
} }
@ -55,7 +56,7 @@ impl Rope {
tree_height: 1, tree_height: 1,
}; };
rope.split(); rope.split_if_too_large();
return rope; return rope;
} }
@ -83,7 +84,7 @@ impl Rope {
} }
self.update_stats(); self.update_stats();
self.split(); self.split_if_too_large();
self.rebalance(); self.rebalance();
} }
@ -115,19 +116,53 @@ impl Rope {
} }
self.update_stats(); self.update_stats();
self.merge(); self.merge_if_too_small();
self.rebalance(); self.rebalance();
} }
/// Splits a rope into two pieces from the given grapheme index.
/// The first piece remains in this rope, the second piece is returned
/// as a new rope.
pub fn split(&mut self, pos: usize) -> Rope {
// TODO: make more efficient.
let s = self.to_string();
let gc = self.grapheme_count();
let bp = grapheme_pos_to_byte_pos(s.as_slice(), pos);
self.remove_text_between_grapheme_indices(pos, gc);
/// Creates an iterator at the first grapheme of the rope Rope::new_from_str(&s.as_slice()[bp..])
pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> { }
self.grapheme_iter_at_index(0)
/// Appends another rope to the end of this one, consuming the other rope.
pub fn append(&mut self, rope: Rope) {
// TODO: make more efficient. Converting to a string and then
// inserting is pretty slow...
let s = rope.to_string();
let gc = self.grapheme_count();
self.insert_text_at_grapheme_index(s.as_slice(), gc);
}
/// Makes a copy of the rope as a string
pub fn to_string(&self) -> String {
let mut s = String::new();
for chunk in self.chunk_iter() {
s.push_str(chunk);
}
return s;
} }
/// Creates an iterator at the given grapheme index /// Creates a chunk iterator for the rope
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> { pub fn chunk_iter<'a>(&'a self) -> RopeChunkIter<'a> {
self.chunk_iter_at_index(0).1
}
/// Creates a chunk iter starting at the chunk containing the given
/// grapheme index. Returns the chunk and its starting grapheme index.
pub fn chunk_iter_at_index<'a>(&'a self, index: usize) -> (usize, RopeChunkIter<'a>) {
let mut node_stack: Vec<&'a Rope> = Vec::new(); let mut node_stack: Vec<&'a Rope> = Vec::new();
let mut cur_node = self; let mut cur_node = self;
let mut grapheme_i = index; let mut grapheme_i = index;
@ -136,6 +171,7 @@ impl Rope {
loop { loop {
match cur_node.data { match cur_node.data {
RopeData::Leaf(_) => { RopeData::Leaf(_) => {
node_stack.push(cur_node);
break; break;
}, },
@ -152,8 +188,22 @@ impl Rope {
} }
} }
(index - grapheme_i, RopeChunkIter {node_stack: node_stack})
}
/// Creates an iterator at the first grapheme of the rope
pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> {
self.grapheme_iter_at_index(0)
}
/// Creates an iterator at the given grapheme index
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> {
let (grapheme_i, mut chunk_iter) = self.chunk_iter_at_index(index);
// Create the grapheme iter for the current node // Create the grapheme iter for the current node
let mut gi = if let RopeData::Leaf(ref text) = cur_node.data { let mut giter = if let Some(text) = chunk_iter.next() {
text.as_slice().graphemes(true) text.as_slice().graphemes(true)
} }
else { else {
@ -161,14 +211,14 @@ impl Rope {
}; };
// Get to the right spot in the iter // Get to the right spot in the iter
for _ in 0..grapheme_i { for _ in grapheme_i..index {
gi.next(); giter.next();
} }
// Create the rope grapheme iter // Create the rope grapheme iter
return RopeGraphemeIter { return RopeGraphemeIter {
node_stack: node_stack, chunk_iter: chunk_iter,
cur_chunk: gi, cur_chunk: giter,
}; };
} }
@ -208,7 +258,7 @@ impl Rope {
// if lots of splits need to happen. This version ends up re-scanning // if lots of splits need to happen. This version ends up re-scanning
// the text quite a lot, as well as doing quite a few unnecessary // the text quite a lot, as well as doing quite a few unnecessary
// allocations. // allocations.
fn split(&mut self) { fn split_if_too_large(&mut self) {
if self.grapheme_count_ > MAX_NODE_SIZE && self.is_leaf() { if self.grapheme_count_ > MAX_NODE_SIZE && self.is_leaf() {
// Calculate split position and how large the left and right // Calculate split position and how large the left and right
@ -229,8 +279,8 @@ impl Rope {
// Recursively split // Recursively split
nl.grapheme_count_ = new_gc_l; nl.grapheme_count_ = new_gc_l;
nr.grapheme_count_ = new_gc_r; nr.grapheme_count_ = new_gc_r;
nl.split(); nl.split_if_too_large();
nr.split(); nr.split_if_too_large();
// Update the new left and right node's stats // Update the new left and right node's stats
nl.update_stats(); nl.update_stats();
@ -244,14 +294,14 @@ impl Rope {
/// Merges a non-leaf node into a leaf node if it's too small /// Merges a non-leaf node into a leaf node if it's too small
fn merge(&mut self) { fn merge_if_too_small(&mut self) {
if self.grapheme_count_ < MIN_NODE_SIZE && !self.is_leaf() { if self.grapheme_count_ < MIN_NODE_SIZE && !self.is_leaf() {
let mut merged_text = String::new(); let mut merged_text = String::new();
if let RopeData::Branch(ref mut left, ref mut right) = self.data { if let RopeData::Branch(ref mut left, ref mut right) = self.data {
// First, recursively merge the children // First, recursively merge the children
left.merge(); left.merge_if_too_small();
right.merge(); right.merge_if_too_small();
// Then put their text into merged_text // Then put their text into merged_text
if let RopeData::Leaf(ref mut text) = left.data { if let RopeData::Leaf(ref mut text) = left.data {
@ -386,28 +436,62 @@ impl Rope {
} }
// Direct indexing to graphemes in the rope
impl Index<usize> for Rope {
type Output = str;
fn index<'a>(&'a self, index: &usize) -> &'a str {
if *index >= self.grapheme_count() {
panic!("Rope::Index: attempting to fetch grapheme that outside the bounds of the text.");
}
match self.data {
RopeData::Leaf(ref text) => {
let mut i: usize = 0;
for g in text.graphemes(true) {
if i == *index {
return &g;
}
i += 1;
}
unreachable!();
},
RopeData::Branch(ref left, ref right) => {
if *index < left.grapheme_count() {
return &left[*index];
}
else {
return &right[*index - left.grapheme_count()];
}
},
}
}
}
//============================================================= //=============================================================
// Rope iterators // Rope iterators
//============================================================= //=============================================================
/// An iterator over a text buffer's graphemes /// An iterator over a rope's string chunks
pub struct RopeGraphemeIter<'a> { pub struct RopeChunkIter<'a> {
node_stack: Vec<&'a Rope>, node_stack: Vec<&'a Rope>,
cur_chunk: Graphemes<'a>,
} }
impl<'a> Iterator for RopeChunkIter<'a> {
type Item = &'a str;
impl<'a> RopeGraphemeIter<'a> { fn next(&mut self) -> Option<&'a str> {
// Skips the iterator to the next chunk of the rope, if any. if let Some(next_chunk) = self.node_stack.pop() {
pub fn next_chunk(&mut self) -> bool {
loop { loop {
if let Option::Some(node) = self.node_stack.pop() { if let Option::Some(node) = self.node_stack.pop() {
match node.data { match node.data {
RopeData::Leaf(ref text) => { RopeData::Leaf(_) => {
self.cur_chunk = text.as_slice().graphemes(true); self.node_stack.push(node);
return true; break;
}, },
RopeData::Branch(ref left, ref right) => { RopeData::Branch(ref left, ref right) => {
@ -418,10 +502,29 @@ impl<'a> RopeGraphemeIter<'a> {
} }
} }
else { else {
return false; break;
} }
} }
if let RopeData::Leaf(ref text) = next_chunk.data {
return Some(text.as_slice());
} }
else {
unreachable!();
}
}
else {
return None;
}
}
}
/// An iterator over a rope's graphemes
pub struct RopeGraphemeIter<'a> {
chunk_iter: RopeChunkIter<'a>,
cur_chunk: Graphemes<'a>,
} }
@ -430,15 +533,16 @@ impl<'a> Iterator for RopeGraphemeIter<'a> {
fn next(&mut self) -> Option<&'a str> { fn next(&mut self) -> Option<&'a str> {
loop { loop {
if let Option::Some(g) = self.cur_chunk.next() { if let Some(g) = self.cur_chunk.next() {
return Option::Some(g); return Some(g);
} }
else { else {
if self.next_chunk() { if let Some(s) = self.chunk_iter.next() {
self.cur_chunk = s.graphemes(true);
continue; continue;
} }
else { else {
return Option::None; return None;
} }
} }
} }
@ -454,7 +558,7 @@ impl<'a> Iterator for RopeGraphemeIter<'a> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{Rope, RopeGraphemeIter}; use super::*;
#[test] #[test]
@ -509,6 +613,87 @@ mod tests {
} }
#[test]
fn index() {
let rope = Rope::new_from_str("Hel世界lo world!");
assert_eq!("H", &rope[0]);
assert_eq!("", &rope[4]);
}
#[test]
fn to_string() {
let rope = Rope::new_from_str("Hello there good people of the world!");
let s = rope.to_string();
assert_eq!("Hello there good people of the world!", s.as_slice());
}
#[test]
fn split_1() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(18);
assert_eq!("Hello there good p", rope1.to_string().as_slice());
assert_eq!("eople of the world!", rope2.to_string().as_slice());
}
#[test]
fn split_2() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(37);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
assert_eq!("", rope2.to_string().as_slice());
}
#[test]
fn split_3() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(0);
assert_eq!("", rope1.to_string().as_slice());
assert_eq!("Hello there good people of the world!", rope2.to_string().as_slice());
}
#[test]
fn append_1() {
let mut rope1 = Rope::new_from_str("Hello there good p");
let rope2 = Rope::new_from_str("eople of the world!");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test]
fn append_2() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = Rope::new_from_str("");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test]
fn append_3() {
let mut rope1 = Rope::new_from_str("");
let rope2 = Rope::new_from_str("Hello there good people of the world!");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test] #[test]
fn insert_text() { fn insert_text() {
let mut rope = Rope::new(); let mut rope = Rope::new();
@ -1017,4 +1202,5 @@ mod tests {
assert!(Some("9") == iter.next()); assert!(Some("9") == iter.next());
assert!(None == iter.next()); assert!(None == iter.next());
} }
} }