Lines now internally use ropes to store their text.

The rope uses a large leaf-node text length, so in the vast majority of
cases this ends up being the same as directly storing the string data.
But in the case that the line becomes extremely long, this will allow
for reasonable performance
This commit is contained in:
Nathan Vegdahl 2015-02-15 14:56:56 -08:00
parent a89e505fb0
commit e1c9a6955d
4 changed files with 444 additions and 312 deletions

View File

@ -1,14 +1,13 @@
#![allow(dead_code)]
use std::iter::repeat;
use std::mem;
use std::str::Graphemes;
use string_utils::{grapheme_count, grapheme_pos_to_byte_pos, is_line_ending};
use super::rope::{Rope, RopeGraphemeIter};
use string_utils::is_line_ending;
/// A single line of text
pub struct Line {
text: Vec<u8>, // The text data, stored as UTF8
text: Rope, // The text data, stored as UTF8
pub ending: LineEnding, // The type of line ending, if any
}
@ -17,7 +16,7 @@ impl Line {
/// Creates a new empty Line
pub fn new() -> Line {
Line {
text: Vec::new(),
text: Rope::new(),
ending: LineEnding::None,
}
}
@ -25,14 +24,11 @@ impl Line {
/// Creates a new Line from a str.
pub fn new_from_str(text: &str) -> Line {
// Initialize Line
let mut tl = Line {
text: Vec::with_capacity(text.len()),
ending: LineEnding::None,
};
let mut ending = LineEnding::None;
let mut end_pos = 0;
// Copy text data, stopping on a line ending if any is found
for g in text.graphemes(true) {
// Find the slice before the line ending, if any
for g in text.graphemes(true) {
match g {
//==============
// Line endings
@ -40,49 +36,49 @@ impl Line {
// CRLF
"\u{000D}\u{000A}" => {
tl.ending = LineEnding::CRLF;
ending = LineEnding::CRLF;
break;
},
// LF
"\u{000A}" => {
tl.ending = LineEnding::LF;
ending = LineEnding::LF;
break;
},
// VT
"\u{000B}" => {
tl.ending = LineEnding::VT;
ending = LineEnding::VT;
break;
},
// FF
"\u{000C}" => {
tl.ending = LineEnding::FF;
ending = LineEnding::FF;
break;
},
// CR
"\u{000D}" => {
tl.ending = LineEnding::CR;
ending = LineEnding::CR;
break;
},
// NEL
"\u{0085}" => {
tl.ending = LineEnding::NEL;
ending = LineEnding::NEL;
break;
},
// LS
"\u{2028}" => {
tl.ending = LineEnding::LS;
ending = LineEnding::LS;
break;
},
// PS
"\u{2029}" => {
tl.ending = LineEnding::PS;
ending = LineEnding::PS;
break;
},
@ -91,41 +87,38 @@ impl Line {
//==================
_ => {
for b in g.bytes() {
tl.text.push(b);
}
end_pos += g.len();
}
}
}
// Done!
return tl;
// Create and return Line
return Line {
text: Rope::new_from_str(&text[..end_pos]),
ending: ending,
};
}
pub fn new_from_str_unchecked(text: &str) -> Line {
// Initialize Line
let mut tl = Line {
text: Vec::new(),
ending: LineEnding::None,
};
let mut ending = LineEnding::None;
tl.text.push_all(text.as_bytes());
let bytes = text.as_bytes();
// Check for line ending
let mut le_size: usize = 0;
let text_size = tl.text.len();
if tl.text.len() >= 3 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-3)..])} {
let text_size = text.len();
if text.len() >= 3 {
match &text[(text_size-3)..] {
// LS
"\u{2028}" => {
tl.ending = LineEnding::LS;
ending = LineEnding::LS;
le_size = 3;
},
// PS
"\u{2029}" => {
tl.ending = LineEnding::PS;
ending = LineEnding::PS;
le_size = 3;
},
@ -133,11 +126,11 @@ impl Line {
}
}
if le_size == 0 && tl.text.len() >= 2 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-2)..])} {
if le_size == 0 && text.len() >= 2 {
match &text[(text_size-2)..] {
// CRLF
"\u{000D}\u{000A}" => {
tl.ending = LineEnding::CRLF;
ending = LineEnding::CRLF;
le_size = 2;
},
@ -145,35 +138,35 @@ impl Line {
}
}
if le_size == 0 && tl.text.len() >= 1 {
match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-1)..])} {
if le_size == 0 && text.len() >= 1 {
match &text[(text_size-1)..] {
// LF
"\u{000A}" => {
tl.ending = LineEnding::LF;
ending = LineEnding::LF;
le_size = 1;
},
// VT
"\u{000B}" => {
tl.ending = LineEnding::VT;
ending = LineEnding::VT;
le_size = 1;
},
// FF
"\u{000C}" => {
tl.ending = LineEnding::FF;
ending = LineEnding::FF;
le_size = 1;
},
// CR
"\u{000D}" => {
tl.ending = LineEnding::CR;
ending = LineEnding::CR;
le_size = 1;
},
// NEL
"\u{0085}" => {
tl.ending = LineEnding::NEL;
ending = LineEnding::NEL;
le_size = 1;
},
@ -181,12 +174,11 @@ impl Line {
}
}
// Truncate off the line ending, if any
let trunc_size = text_size - le_size;
tl.text.truncate(trunc_size);
// Done!
return tl;
// Create and return Line
return Line {
text: Rope::new_from_str(&text[..(bytes.len()-le_size)]),
ending: ending,
};
}
@ -194,13 +186,16 @@ impl Line {
/// Does not check to see if the string has internal newlines.
/// This is primarily used for efficient loading of files.
pub fn new_from_string_unchecked(text: String) -> Line {
// TODO: this can be smarter, and can pass the string
// directly to the Rope after taking off any line
// endings.
return Line::new_from_str_unchecked(text.as_slice());
}
/// Returns the total number of unicode graphemes in the line
pub fn grapheme_count(&self) -> usize {
let mut count = grapheme_count(self.as_str());
let mut count = self.text.grapheme_count();
match self.ending {
LineEnding::None => {},
_ => {count += 1;}
@ -212,11 +207,13 @@ impl Line {
/// Returns the total number of unicode graphemes in the line,
/// not counting the line ending grapheme, if any.
pub fn grapheme_count_sans_line_ending(&self) -> usize {
grapheme_count(self.as_str())
self.text.grapheme_count()
}
pub fn grapheme_at_index<'a>(&'a self, index: usize) -> &'a str {
// TODO: we don't have to iterate over the entire line
// anymore because we're using a rope now. Update.
let mut i = 0;
for g in self.grapheme_iter() {
@ -233,45 +230,25 @@ impl Line {
}
/// Returns an immutable string slice into the text block's memory
pub fn as_str<'a>(&'a self) -> &'a str {
unsafe {
mem::transmute(&self.text[])
}
/// Returns a string containing the line's text
pub fn to_string(&self) -> String {
let s = self.text.to_string();
return s;
}
/// Inserts `text` at grapheme index `pos`.
/// NOTE: panics if it encounters a line ending in the text.
pub fn insert_text(&mut self, text: &str, pos: usize) {
// Find insertion position in bytes
let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos);
// Grow data size
self.text.extend(repeat(0).take(text.len()));
// Move old bytes forward
let mut from = self.text.len() - text.len();
let mut to = self.text.len();
while from > byte_pos {
from -= 1;
to -= 1;
self.text[to] = self.text[from];
}
// Copy new bytes in
let mut i = byte_pos;
// Check for line endings
for g in text.graphemes(true) {
if is_line_ending(g) {
panic!("Line::insert_text(): line ending in inserted text.");
}
for b in g.bytes() {
self.text[i] = b;
i += 1
}
}
// Insert text
self.text.insert_text_at_grapheme_index(text, pos);
}
@ -279,49 +256,22 @@ impl Line {
/// any).
/// NOTE: panics if it encounters a line ending in the text.
pub fn append_text(&mut self, text: &str) {
let mut i = self.text.len();
// Grow data size
self.text.extend(repeat(0).take(text.len()));
// Copy new bytes in
// Check for line endings
for g in text.graphemes(true) {
if is_line_ending(g) {
panic!("Line::append_text(): line ending in inserted text.");
}
for b in g.bytes() {
self.text[i] = b;
i += 1
}
}
// Append text
let gc = self.text.grapheme_count();
self.text.insert_text_at_grapheme_index(text, gc);
}
/// Remove the text between grapheme positions 'pos_a' and 'pos_b'.
pub fn remove_text(&mut self, pos_a: usize, pos_b: usize) {
// Bounds checks
if pos_a > pos_b {
panic!("Line::remove_text(): pos_a must be less than or equal to pos_b.");
}
// Find removal positions in bytes
let byte_pos_a = grapheme_pos_to_byte_pos(self.as_str(), pos_a);
let byte_pos_b = grapheme_pos_to_byte_pos(self.as_str(), pos_b);
// Move bytes to fill in the gap left by the removed bytes
let mut from = byte_pos_b;
let mut to = byte_pos_a;
while from < self.text.len() {
self.text[to] = self.text[from];
from += 1;
to += 1;
}
// Remove data from the end
let final_text_size = self.text.len() + byte_pos_a - byte_pos_b;
self.text.truncate(final_text_size);
self.text.remove_text_between_grapheme_indices(pos_a, pos_b);
}
@ -329,6 +279,7 @@ impl Line {
/// This line stays as the first part of the split. The second
/// part is returned.
pub fn split(&mut self, ending: LineEnding, pos: usize) -> Line {
// TODO: change code to use Rope
let mut other = Line::new();
// Inserting at very beginning: special cased for efficiency
@ -338,14 +289,8 @@ impl Line {
}
// Otherwise, general case
else {
// Find the byte index to split at
let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos);
// Copy the elements after the split index to the second line
other.text.push_all(&self.text[byte_pos..]);
// Truncate the first line
self.text.truncate(byte_pos);
// Split the text
other.text = self.text.split(pos);
// Set the line endings appropriately
other.ending = self.ending;
@ -356,10 +301,20 @@ impl Line {
}
/// Appends another line to the end of this one, consuming the other
/// line.
/// Note that the resulting line ending is the ending of the other
/// line, if any.
pub fn append(&mut self, other: Line) {
self.ending = other.ending;
self.text.append(other.text);
}
/// Returns an iterator over the graphemes of the line
pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> {
LineGraphemeIter {
graphemes: self.as_str().graphemes(true),
graphemes: self.text.grapheme_iter(),
ending: self.ending,
done: false,
}
@ -368,19 +323,11 @@ impl Line {
/// Returns an iterator over the graphemes of the line
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> LineGraphemeIter<'a> {
let temp: &str = unsafe{mem::transmute(&self.text[])};
let mut iter = LineGraphemeIter {
graphemes: temp.graphemes(true),
LineGraphemeIter {
graphemes: self.text.grapheme_iter_at_index(index),
ending: self.ending,
done: false,
};
for _ in range(0, index) {
iter.next();
}
return iter;
}
}
@ -473,7 +420,7 @@ pub const LINE_ENDINGS: [&'static str; 9] = ["",
/// An iterator over the graphemes of a Line
pub struct LineGraphemeIter<'a> {
graphemes: Graphemes<'a>,
graphemes: RopeGraphemeIter<'a>,
ending: LineEnding,
done: bool,
}
@ -531,7 +478,7 @@ mod tests {
fn new_text_line() {
let tl = Line::new();
assert!(tl.text.len() == 0);
assert_eq!(tl.text.grapheme_count(), 0);
assert!(tl.ending == LineEnding::None);
}
@ -539,13 +486,13 @@ mod tests {
fn new_text_line_from_str() {
let tl = Line::new_from_str("Hello!");
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::None);
}
@ -553,7 +500,7 @@ mod tests {
fn new_text_line_from_empty_str() {
let tl = Line::new_from_str("");
assert!(tl.text.len() == 0);
assert_eq!(tl.text.grapheme_count(), 0);
assert!(tl.ending == LineEnding::None);
}
@ -561,13 +508,13 @@ mod tests {
fn new_text_line_from_str_with_lf() {
let tl = Line::new_from_str("Hello!\n");
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LF);
}
@ -575,13 +522,13 @@ mod tests {
fn new_text_line_from_str_with_crlf() {
let tl = Line::new_from_str("Hello!\r\n");
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -589,13 +536,13 @@ mod tests {
fn new_text_line_from_str_with_crlf_and_too_long() {
let tl = Line::new_from_str("Hello!\r\nLa la la la");
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -605,13 +552,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::None);
}
@ -621,13 +568,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LF);
}
@ -637,13 +584,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -653,13 +600,13 @@ mod tests {
let tl = Line::new_from_string_unchecked(s);
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::LS);
}
@ -669,19 +616,19 @@ mod tests {
tl.insert_text(" world", 5);
assert!(tl.text.len() == 12);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == (' ' as u8));
assert!(tl.text[6] == ('w' as u8));
assert!(tl.text[7] == ('o' as u8));
assert!(tl.text[8] == ('r' as u8));
assert!(tl.text[9] == ('l' as u8));
assert!(tl.text[10] == ('d' as u8));
assert!(tl.text[11] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 12);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == " ");
assert!(&tl.text[6] == "w");
assert!(&tl.text[7] == "o");
assert!(&tl.text[8] == "r");
assert!(&tl.text[9] == "l");
assert!(&tl.text[10] == "d");
assert!(&tl.text[11] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -691,19 +638,19 @@ mod tests {
tl.append_text(" world!");
assert!(tl.text.len() == 12);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == (' ' as u8));
assert!(tl.text[6] == ('w' as u8));
assert!(tl.text[7] == ('o' as u8));
assert!(tl.text[8] == ('r' as u8));
assert!(tl.text[9] == ('l' as u8));
assert!(tl.text[10] == ('d' as u8));
assert!(tl.text[11] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 12);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == " ");
assert!(&tl.text[6] == "w");
assert!(&tl.text[7] == "o");
assert!(&tl.text[8] == "r");
assert!(&tl.text[9] == "l");
assert!(&tl.text[10] == "d");
assert!(&tl.text[11] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -713,13 +660,13 @@ mod tests {
tl.remove_text(5, 11);
assert!(tl.text.len() == 6);
assert!(tl.text[0] == ('H' as u8));
assert!(tl.text[1] == ('e' as u8));
assert!(tl.text[2] == ('l' as u8));
assert!(tl.text[3] == ('l' as u8));
assert!(tl.text[4] == ('o' as u8));
assert!(tl.text[5] == ('!' as u8));
assert_eq!(tl.text.grapheme_count(), 6);
assert!(&tl.text[0] == "H");
assert!(&tl.text[1] == "e");
assert!(&tl.text[2] == "l");
assert!(&tl.text[3] == "l");
assert!(&tl.text[4] == "o");
assert!(&tl.text[5] == "!");
assert!(tl.ending == LineEnding::CRLF);
}
@ -729,22 +676,22 @@ mod tests {
let tl2 = tl1.split(LineEnding::LF, 5);
assert!(tl1.text.len() == 5);
assert!(tl1.text[0] == ('H' as u8));
assert!(tl1.text[1] == ('e' as u8));
assert!(tl1.text[2] == ('l' as u8));
assert!(tl1.text[3] == ('l' as u8));
assert!(tl1.text[4] == ('o' as u8));
assert_eq!(tl1.text.grapheme_count(), 5);
assert!(&tl1.text[0] == "H");
assert!(&tl1.text[1] == "e");
assert!(&tl1.text[2] == "l");
assert!(&tl1.text[3] == "l");
assert!(&tl1.text[4] == "o");
assert!(tl1.ending == LineEnding::LF);
assert!(tl2.text.len() == 7);
assert!(tl2.text[0] == (' ' as u8));
assert!(tl2.text[1] == ('w' as u8));
assert!(tl2.text[2] == ('o' as u8));
assert!(tl2.text[3] == ('r' as u8));
assert!(tl2.text[4] == ('l' as u8));
assert!(tl2.text[5] == ('d' as u8));
assert!(tl2.text[6] == ('!' as u8));
assert_eq!(tl2.text.grapheme_count(), 7);
assert!(&tl2.text[0] == " ");
assert!(&tl2.text[1] == "w");
assert!(&tl2.text[2] == "o");
assert!(&tl2.text[3] == "r");
assert!(&tl2.text[4] == "l");
assert!(&tl2.text[5] == "d");
assert!(&tl2.text[6] == "!");
assert!(tl2.ending == LineEnding::CRLF);
}
@ -754,16 +701,16 @@ mod tests {
let tl2 = tl1.split(LineEnding::LF, 0);
assert!(tl1.text.len() == 0);
assert_eq!(tl1.text.grapheme_count(), 0);
assert!(tl1.ending == LineEnding::LF);
assert!(tl2.text.len() == 6);
assert!(tl2.text[0] == ('H' as u8));
assert!(tl2.text[1] == ('e' as u8));
assert!(tl2.text[2] == ('l' as u8));
assert!(tl2.text[3] == ('l' as u8));
assert!(tl2.text[4] == ('o' as u8));
assert!(tl2.text[5] == ('!' as u8));
assert_eq!(tl2.text.grapheme_count(), 6);
assert!(&tl2.text[0] == "H");
assert!(&tl2.text[1] == "e");
assert!(&tl2.text[2] == "l");
assert!(&tl2.text[3] == "l");
assert!(&tl2.text[4] == "o");
assert!(&tl2.text[5] == "!");
assert!(tl2.ending == LineEnding::CRLF);
}

View File

@ -5,7 +5,7 @@ use std::old_path::Path;
use std::old_io::fs::File;
use std::old_io::{IoResult, BufferedReader, BufferedWriter};
use self::line::{Line, line_ending_to_str};
use self::line::Line;
use self::node::{BufferNode, BufferNodeGraphemeIter, BufferNodeLineIter};
use self::undo_stack::{UndoStack};
use self::undo_stack::Operation::*;
@ -89,11 +89,11 @@ impl Buffer {
pub fn save_to_file(&self, path: &Path) -> IoResult<()> {
// TODO: make more efficient
let mut f = BufferedWriter::new(try!(File::create(path)));
for l in self.line_iter() {
let _ = f.write_str(l.as_str());
let _ = f.write_str(line_ending_to_str(l.ending));
for g in self.grapheme_iter() {
let _ = f.write_str(g);
}
return Ok(());

View File

@ -582,11 +582,11 @@ impl BufferNode {
}
pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option<&Line>) {
pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option<Line>) {
match fetched_line {
None => {
let line: Option<Line> = self.pull_out_line_recursive(line_number + 1);
if let Some(ref l) = line {
if let Some(l) = line {
self.merge_line_with_next_recursive(line_number, Some(l));
}
},
@ -603,8 +603,7 @@ impl BufferNode {
},
BufferNodeData::Leaf(ref mut line2) => {
line2.append_text(line.as_str());
line2.ending = line.ending;
line2.append(line);
}
}
}
@ -1069,7 +1068,7 @@ mod tests {
assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(0).unwrap();
assert!(line.as_str() == "Hi");
assert!(line.to_string().as_slice() == "Hi");
assert!(line.ending == LineEnding::LF);
let mut iter = node.grapheme_iter();
@ -1119,7 +1118,7 @@ mod tests {
assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(2).unwrap();
assert!(line.as_str() == " people ");
assert!(line.to_string().as_slice() == " people ");
assert!(line.ending == LineEnding::LF);
let mut iter = node.grapheme_iter();
@ -1163,7 +1162,7 @@ mod tests {
assert!(node.line_count == 5);
let line = node.pull_out_line_recursive(4).unwrap();
assert!(line.as_str() == " world!");
assert!(line.to_string().as_slice() == " world!");
assert!(line.ending == LineEnding::None);
let mut iter = node.grapheme_iter();

View File

@ -1,9 +1,10 @@
use std::cmp::{min, max};
use std::mem;
use std::str::Graphemes;
use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index};
use std::ops::Index;
use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index, grapheme_pos_to_byte_pos};
const MIN_NODE_SIZE: usize = 1024;
const MIN_NODE_SIZE: usize = 2048;
const MAX_NODE_SIZE: usize = MIN_NODE_SIZE * 2;
@ -40,7 +41,7 @@ impl Rope {
tree_height: 1,
};
rope.split();
rope.split_if_too_large();
return rope;
}
@ -55,7 +56,7 @@ impl Rope {
tree_height: 1,
};
rope.split();
rope.split_if_too_large();
return rope;
}
@ -83,7 +84,7 @@ impl Rope {
}
self.update_stats();
self.split();
self.split_if_too_large();
self.rebalance();
}
@ -115,19 +116,53 @@ impl Rope {
}
self.update_stats();
self.merge();
self.merge_if_too_small();
self.rebalance();
}
/// Splits a rope into two pieces from the given grapheme index.
/// The first piece remains in this rope, the second piece is returned
/// as a new rope.
pub fn split(&mut self, pos: usize) -> Rope {
// TODO: make more efficient.
let s = self.to_string();
let gc = self.grapheme_count();
let bp = grapheme_pos_to_byte_pos(s.as_slice(), pos);
self.remove_text_between_grapheme_indices(pos, gc);
Rope::new_from_str(&s.as_slice()[bp..])
}
/// Appends another rope to the end of this one, consuming the other rope.
pub fn append(&mut self, rope: Rope) {
// TODO: make more efficient. Converting to a string and then
// inserting is pretty slow...
let s = rope.to_string();
let gc = self.grapheme_count();
self.insert_text_at_grapheme_index(s.as_slice(), gc);
}
/// Creates an iterator at the first grapheme of the rope
pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> {
self.grapheme_iter_at_index(0)
/// Makes a copy of the rope as a string
pub fn to_string(&self) -> String {
let mut s = String::new();
for chunk in self.chunk_iter() {
s.push_str(chunk);
}
return s;
}
/// Creates an iterator at the given grapheme index
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> {
/// Creates a chunk iterator for the rope
pub fn chunk_iter<'a>(&'a self) -> RopeChunkIter<'a> {
self.chunk_iter_at_index(0).1
}
/// Creates a chunk iter starting at the chunk containing the given
/// grapheme index. Returns the chunk and its starting grapheme index.
pub fn chunk_iter_at_index<'a>(&'a self, index: usize) -> (usize, RopeChunkIter<'a>) {
let mut node_stack: Vec<&'a Rope> = Vec::new();
let mut cur_node = self;
let mut grapheme_i = index;
@ -136,6 +171,7 @@ impl Rope {
loop {
match cur_node.data {
RopeData::Leaf(_) => {
node_stack.push(cur_node);
break;
},
@ -152,8 +188,22 @@ impl Rope {
}
}
(index - grapheme_i, RopeChunkIter {node_stack: node_stack})
}
/// Creates an iterator at the first grapheme of the rope
pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> {
self.grapheme_iter_at_index(0)
}
/// Creates an iterator at the given grapheme index
pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> {
let (grapheme_i, mut chunk_iter) = self.chunk_iter_at_index(index);
// Create the grapheme iter for the current node
let mut gi = if let RopeData::Leaf(ref text) = cur_node.data {
let mut giter = if let Some(text) = chunk_iter.next() {
text.as_slice().graphemes(true)
}
else {
@ -161,14 +211,14 @@ impl Rope {
};
// Get to the right spot in the iter
for _ in 0..grapheme_i {
gi.next();
for _ in grapheme_i..index {
giter.next();
}
// Create the rope grapheme iter
return RopeGraphemeIter {
node_stack: node_stack,
cur_chunk: gi,
chunk_iter: chunk_iter,
cur_chunk: giter,
};
}
@ -208,7 +258,7 @@ impl Rope {
// if lots of splits need to happen. This version ends up re-scanning
// the text quite a lot, as well as doing quite a few unnecessary
// allocations.
fn split(&mut self) {
fn split_if_too_large(&mut self) {
if self.grapheme_count_ > MAX_NODE_SIZE && self.is_leaf() {
// Calculate split position and how large the left and right
@ -229,8 +279,8 @@ impl Rope {
// Recursively split
nl.grapheme_count_ = new_gc_l;
nr.grapheme_count_ = new_gc_r;
nl.split();
nr.split();
nl.split_if_too_large();
nr.split_if_too_large();
// Update the new left and right node's stats
nl.update_stats();
@ -244,14 +294,14 @@ impl Rope {
/// Merges a non-leaf node into a leaf node if it's too small
fn merge(&mut self) {
fn merge_if_too_small(&mut self) {
if self.grapheme_count_ < MIN_NODE_SIZE && !self.is_leaf() {
let mut merged_text = String::new();
if let RopeData::Branch(ref mut left, ref mut right) = self.data {
// First, recursively merge the children
left.merge();
right.merge();
left.merge_if_too_small();
right.merge_if_too_small();
// Then put their text into merged_text
if let RopeData::Leaf(ref mut text) = left.data {
@ -386,42 +436,95 @@ impl Rope {
}
// Direct indexing to graphemes in the rope
impl Index<usize> for Rope {
type Output = str;
fn index<'a>(&'a self, index: &usize) -> &'a str {
if *index >= self.grapheme_count() {
panic!("Rope::Index: attempting to fetch grapheme that outside the bounds of the text.");
}
match self.data {
RopeData::Leaf(ref text) => {
let mut i: usize = 0;
for g in text.graphemes(true) {
if i == *index {
return &g;
}
i += 1;
}
unreachable!();
},
RopeData::Branch(ref left, ref right) => {
if *index < left.grapheme_count() {
return &left[*index];
}
else {
return &right[*index - left.grapheme_count()];
}
},
}
}
}
//=============================================================
// Rope iterators
//=============================================================
/// An iterator over a text buffer's graphemes
pub struct RopeGraphemeIter<'a> {
/// An iterator over a rope's string chunks
pub struct RopeChunkIter<'a> {
node_stack: Vec<&'a Rope>,
cur_chunk: Graphemes<'a>,
}
impl<'a> Iterator for RopeChunkIter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if let Some(next_chunk) = self.node_stack.pop() {
loop {
if let Option::Some(node) = self.node_stack.pop() {
match node.data {
RopeData::Leaf(_) => {
self.node_stack.push(node);
break;
},
RopeData::Branch(ref left, ref right) => {
self.node_stack.push(&(**right));
self.node_stack.push(&(**left));
continue;
}
}
}
else {
break;
}
}
if let RopeData::Leaf(ref text) = next_chunk.data {
return Some(text.as_slice());
}
else {
unreachable!();
}
}
else {
return None;
}
}
}
impl<'a> RopeGraphemeIter<'a> {
// Skips the iterator to the next chunk of the rope, if any.
pub fn next_chunk(&mut self) -> bool {
loop {
if let Option::Some(node) = self.node_stack.pop() {
match node.data {
RopeData::Leaf(ref text) => {
self.cur_chunk = text.as_slice().graphemes(true);
return true;
},
RopeData::Branch(ref left, ref right) => {
self.node_stack.push(&(**right));
self.node_stack.push(&(**left));
continue;
}
}
}
else {
return false;
}
}
}
/// An iterator over a rope's graphemes
pub struct RopeGraphemeIter<'a> {
chunk_iter: RopeChunkIter<'a>,
cur_chunk: Graphemes<'a>,
}
@ -430,15 +533,16 @@ impl<'a> Iterator for RopeGraphemeIter<'a> {
fn next(&mut self) -> Option<&'a str> {
loop {
if let Option::Some(g) = self.cur_chunk.next() {
return Option::Some(g);
if let Some(g) = self.cur_chunk.next() {
return Some(g);
}
else {
if self.next_chunk() {
if let Some(s) = self.chunk_iter.next() {
self.cur_chunk = s.graphemes(true);
continue;
}
else {
return Option::None;
return None;
}
}
}
@ -454,7 +558,7 @@ impl<'a> Iterator for RopeGraphemeIter<'a> {
#[cfg(test)]
mod tests {
use super::{Rope, RopeGraphemeIter};
use super::*;
#[test]
@ -509,6 +613,87 @@ mod tests {
}
#[test]
fn index() {
let rope = Rope::new_from_str("Hel世界lo world!");
assert_eq!("H", &rope[0]);
assert_eq!("", &rope[4]);
}
#[test]
fn to_string() {
let rope = Rope::new_from_str("Hello there good people of the world!");
let s = rope.to_string();
assert_eq!("Hello there good people of the world!", s.as_slice());
}
#[test]
fn split_1() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(18);
assert_eq!("Hello there good p", rope1.to_string().as_slice());
assert_eq!("eople of the world!", rope2.to_string().as_slice());
}
#[test]
fn split_2() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(37);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
assert_eq!("", rope2.to_string().as_slice());
}
#[test]
fn split_3() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = rope1.split(0);
assert_eq!("", rope1.to_string().as_slice());
assert_eq!("Hello there good people of the world!", rope2.to_string().as_slice());
}
#[test]
fn append_1() {
let mut rope1 = Rope::new_from_str("Hello there good p");
let rope2 = Rope::new_from_str("eople of the world!");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test]
fn append_2() {
let mut rope1 = Rope::new_from_str("Hello there good people of the world!");
let rope2 = Rope::new_from_str("");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test]
fn append_3() {
let mut rope1 = Rope::new_from_str("");
let rope2 = Rope::new_from_str("Hello there good people of the world!");
rope1.append(rope2);
assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice());
}
#[test]
fn insert_text() {
let mut rope = Rope::new();
@ -1017,4 +1202,5 @@ mod tests {
assert!(Some("9") == iter.next());
assert!(None == iter.next());
}
}