diff --git a/src/buffer/line.rs b/src/buffer/line.rs index 79abdb0..d136056 100644 --- a/src/buffer/line.rs +++ b/src/buffer/line.rs @@ -1,14 +1,13 @@ #![allow(dead_code)] -use std::iter::repeat; use std::mem; -use std::str::Graphemes; -use string_utils::{grapheme_count, grapheme_pos_to_byte_pos, is_line_ending}; +use super::rope::{Rope, RopeGraphemeIter}; +use string_utils::is_line_ending; /// A single line of text pub struct Line { - text: Vec, // The text data, stored as UTF8 + text: Rope, // The text data, stored as UTF8 pub ending: LineEnding, // The type of line ending, if any } @@ -17,7 +16,7 @@ impl Line { /// Creates a new empty Line pub fn new() -> Line { Line { - text: Vec::new(), + text: Rope::new(), ending: LineEnding::None, } } @@ -25,14 +24,11 @@ impl Line { /// Creates a new Line from a str. pub fn new_from_str(text: &str) -> Line { - // Initialize Line - let mut tl = Line { - text: Vec::with_capacity(text.len()), - ending: LineEnding::None, - }; + let mut ending = LineEnding::None; + let mut end_pos = 0; - // Copy text data, stopping on a line ending if any is found - for g in text.graphemes(true) { + // Find the slice before the line ending, if any + for g in text.graphemes(true) { match g { //============== // Line endings @@ -40,49 +36,49 @@ impl Line { // CRLF "\u{000D}\u{000A}" => { - tl.ending = LineEnding::CRLF; + ending = LineEnding::CRLF; break; }, // LF "\u{000A}" => { - tl.ending = LineEnding::LF; + ending = LineEnding::LF; break; }, // VT "\u{000B}" => { - tl.ending = LineEnding::VT; + ending = LineEnding::VT; break; }, // FF "\u{000C}" => { - tl.ending = LineEnding::FF; + ending = LineEnding::FF; break; }, // CR "\u{000D}" => { - tl.ending = LineEnding::CR; + ending = LineEnding::CR; break; }, // NEL "\u{0085}" => { - tl.ending = LineEnding::NEL; + ending = LineEnding::NEL; break; }, // LS "\u{2028}" => { - tl.ending = LineEnding::LS; + ending = LineEnding::LS; break; }, // PS "\u{2029}" => { - tl.ending = LineEnding::PS; + ending = LineEnding::PS; break; }, @@ -91,41 +87,38 @@ impl Line { //================== _ => { - for b in g.bytes() { - tl.text.push(b); - } + end_pos += g.len(); } } } - // Done! - return tl; + // Create and return Line + return Line { + text: Rope::new_from_str(&text[..end_pos]), + ending: ending, + }; } pub fn new_from_str_unchecked(text: &str) -> Line { - // Initialize Line - let mut tl = Line { - text: Vec::new(), - ending: LineEnding::None, - }; + let mut ending = LineEnding::None; - tl.text.push_all(text.as_bytes()); + let bytes = text.as_bytes(); // Check for line ending let mut le_size: usize = 0; - let text_size = tl.text.len(); - if tl.text.len() >= 3 { - match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-3)..])} { + let text_size = text.len(); + if text.len() >= 3 { + match &text[(text_size-3)..] { // LS "\u{2028}" => { - tl.ending = LineEnding::LS; + ending = LineEnding::LS; le_size = 3; }, // PS "\u{2029}" => { - tl.ending = LineEnding::PS; + ending = LineEnding::PS; le_size = 3; }, @@ -133,11 +126,11 @@ impl Line { } } - if le_size == 0 && tl.text.len() >= 2 { - match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-2)..])} { + if le_size == 0 && text.len() >= 2 { + match &text[(text_size-2)..] { // CRLF "\u{000D}\u{000A}" => { - tl.ending = LineEnding::CRLF; + ending = LineEnding::CRLF; le_size = 2; }, @@ -145,35 +138,35 @@ impl Line { } } - if le_size == 0 && tl.text.len() >= 1 { - match unsafe{mem::transmute::<&[u8], &str>(&tl.text[(text_size-1)..])} { + if le_size == 0 && text.len() >= 1 { + match &text[(text_size-1)..] { // LF "\u{000A}" => { - tl.ending = LineEnding::LF; + ending = LineEnding::LF; le_size = 1; }, // VT "\u{000B}" => { - tl.ending = LineEnding::VT; + ending = LineEnding::VT; le_size = 1; }, // FF "\u{000C}" => { - tl.ending = LineEnding::FF; + ending = LineEnding::FF; le_size = 1; }, // CR "\u{000D}" => { - tl.ending = LineEnding::CR; + ending = LineEnding::CR; le_size = 1; }, // NEL "\u{0085}" => { - tl.ending = LineEnding::NEL; + ending = LineEnding::NEL; le_size = 1; }, @@ -181,12 +174,11 @@ impl Line { } } - // Truncate off the line ending, if any - let trunc_size = text_size - le_size; - tl.text.truncate(trunc_size); - - // Done! - return tl; + // Create and return Line + return Line { + text: Rope::new_from_str(&text[..(bytes.len()-le_size)]), + ending: ending, + }; } @@ -194,13 +186,16 @@ impl Line { /// Does not check to see if the string has internal newlines. /// This is primarily used for efficient loading of files. pub fn new_from_string_unchecked(text: String) -> Line { + // TODO: this can be smarter, and can pass the string + // directly to the Rope after taking off any line + // endings. return Line::new_from_str_unchecked(text.as_slice()); } /// Returns the total number of unicode graphemes in the line pub fn grapheme_count(&self) -> usize { - let mut count = grapheme_count(self.as_str()); + let mut count = self.text.grapheme_count(); match self.ending { LineEnding::None => {}, _ => {count += 1;} @@ -212,11 +207,13 @@ impl Line { /// Returns the total number of unicode graphemes in the line, /// not counting the line ending grapheme, if any. pub fn grapheme_count_sans_line_ending(&self) -> usize { - grapheme_count(self.as_str()) + self.text.grapheme_count() } pub fn grapheme_at_index<'a>(&'a self, index: usize) -> &'a str { + // TODO: we don't have to iterate over the entire line + // anymore because we're using a rope now. Update. let mut i = 0; for g in self.grapheme_iter() { @@ -233,45 +230,25 @@ impl Line { } - /// Returns an immutable string slice into the text block's memory - pub fn as_str<'a>(&'a self) -> &'a str { - unsafe { - mem::transmute(&self.text[]) - } + /// Returns a string containing the line's text + pub fn to_string(&self) -> String { + let s = self.text.to_string(); + return s; } /// Inserts `text` at grapheme index `pos`. /// NOTE: panics if it encounters a line ending in the text. pub fn insert_text(&mut self, text: &str, pos: usize) { - // Find insertion position in bytes - let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); - - // Grow data size - self.text.extend(repeat(0).take(text.len())); - - // Move old bytes forward - let mut from = self.text.len() - text.len(); - let mut to = self.text.len(); - while from > byte_pos { - from -= 1; - to -= 1; - - self.text[to] = self.text[from]; - } - - // Copy new bytes in - let mut i = byte_pos; + // Check for line endings for g in text.graphemes(true) { if is_line_ending(g) { panic!("Line::insert_text(): line ending in inserted text."); } - - for b in g.bytes() { - self.text[i] = b; - i += 1 - } } + + // Insert text + self.text.insert_text_at_grapheme_index(text, pos); } @@ -279,49 +256,22 @@ impl Line { /// any). /// NOTE: panics if it encounters a line ending in the text. pub fn append_text(&mut self, text: &str) { - let mut i = self.text.len(); - - // Grow data size - self.text.extend(repeat(0).take(text.len())); - - // Copy new bytes in + // Check for line endings for g in text.graphemes(true) { if is_line_ending(g) { panic!("Line::append_text(): line ending in inserted text."); } - - for b in g.bytes() { - self.text[i] = b; - i += 1 - } } + + // Append text + let gc = self.text.grapheme_count(); + self.text.insert_text_at_grapheme_index(text, gc); } /// Remove the text between grapheme positions 'pos_a' and 'pos_b'. pub fn remove_text(&mut self, pos_a: usize, pos_b: usize) { - // Bounds checks - if pos_a > pos_b { - panic!("Line::remove_text(): pos_a must be less than or equal to pos_b."); - } - - // Find removal positions in bytes - let byte_pos_a = grapheme_pos_to_byte_pos(self.as_str(), pos_a); - let byte_pos_b = grapheme_pos_to_byte_pos(self.as_str(), pos_b); - - // Move bytes to fill in the gap left by the removed bytes - let mut from = byte_pos_b; - let mut to = byte_pos_a; - while from < self.text.len() { - self.text[to] = self.text[from]; - - from += 1; - to += 1; - } - - // Remove data from the end - let final_text_size = self.text.len() + byte_pos_a - byte_pos_b; - self.text.truncate(final_text_size); + self.text.remove_text_between_grapheme_indices(pos_a, pos_b); } @@ -329,6 +279,7 @@ impl Line { /// This line stays as the first part of the split. The second /// part is returned. pub fn split(&mut self, ending: LineEnding, pos: usize) -> Line { + // TODO: change code to use Rope let mut other = Line::new(); // Inserting at very beginning: special cased for efficiency @@ -338,14 +289,8 @@ impl Line { } // Otherwise, general case else { - // Find the byte index to split at - let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); - - // Copy the elements after the split index to the second line - other.text.push_all(&self.text[byte_pos..]); - - // Truncate the first line - self.text.truncate(byte_pos); + // Split the text + other.text = self.text.split(pos); // Set the line endings appropriately other.ending = self.ending; @@ -356,10 +301,20 @@ impl Line { } + /// Appends another line to the end of this one, consuming the other + /// line. + /// Note that the resulting line ending is the ending of the other + /// line, if any. + pub fn append(&mut self, other: Line) { + self.ending = other.ending; + self.text.append(other.text); + } + + /// Returns an iterator over the graphemes of the line pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> { LineGraphemeIter { - graphemes: self.as_str().graphemes(true), + graphemes: self.text.grapheme_iter(), ending: self.ending, done: false, } @@ -368,19 +323,11 @@ impl Line { /// Returns an iterator over the graphemes of the line pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> LineGraphemeIter<'a> { - let temp: &str = unsafe{mem::transmute(&self.text[])}; - - let mut iter = LineGraphemeIter { - graphemes: temp.graphemes(true), + LineGraphemeIter { + graphemes: self.text.grapheme_iter_at_index(index), ending: self.ending, done: false, - }; - - for _ in range(0, index) { - iter.next(); } - - return iter; } } @@ -473,7 +420,7 @@ pub const LINE_ENDINGS: [&'static str; 9] = ["", /// An iterator over the graphemes of a Line pub struct LineGraphemeIter<'a> { - graphemes: Graphemes<'a>, + graphemes: RopeGraphemeIter<'a>, ending: LineEnding, done: bool, } @@ -531,7 +478,7 @@ mod tests { fn new_text_line() { let tl = Line::new(); - assert!(tl.text.len() == 0); + assert_eq!(tl.text.grapheme_count(), 0); assert!(tl.ending == LineEnding::None); } @@ -539,13 +486,13 @@ mod tests { fn new_text_line_from_str() { let tl = Line::new_from_str("Hello!"); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::None); } @@ -553,7 +500,7 @@ mod tests { fn new_text_line_from_empty_str() { let tl = Line::new_from_str(""); - assert!(tl.text.len() == 0); + assert_eq!(tl.text.grapheme_count(), 0); assert!(tl.ending == LineEnding::None); } @@ -561,13 +508,13 @@ mod tests { fn new_text_line_from_str_with_lf() { let tl = Line::new_from_str("Hello!\n"); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::LF); } @@ -575,13 +522,13 @@ mod tests { fn new_text_line_from_str_with_crlf() { let tl = Line::new_from_str("Hello!\r\n"); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -589,13 +536,13 @@ mod tests { fn new_text_line_from_str_with_crlf_and_too_long() { let tl = Line::new_from_str("Hello!\r\nLa la la la"); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -605,13 +552,13 @@ mod tests { let tl = Line::new_from_string_unchecked(s); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::None); } @@ -621,13 +568,13 @@ mod tests { let tl = Line::new_from_string_unchecked(s); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::LF); } @@ -637,13 +584,13 @@ mod tests { let tl = Line::new_from_string_unchecked(s); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -653,13 +600,13 @@ mod tests { let tl = Line::new_from_string_unchecked(s); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::LS); } @@ -669,19 +616,19 @@ mod tests { tl.insert_text(" world", 5); - assert!(tl.text.len() == 12); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == (' ' as u8)); - assert!(tl.text[6] == ('w' as u8)); - assert!(tl.text[7] == ('o' as u8)); - assert!(tl.text[8] == ('r' as u8)); - assert!(tl.text[9] == ('l' as u8)); - assert!(tl.text[10] == ('d' as u8)); - assert!(tl.text[11] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 12); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == " "); + assert!(&tl.text[6] == "w"); + assert!(&tl.text[7] == "o"); + assert!(&tl.text[8] == "r"); + assert!(&tl.text[9] == "l"); + assert!(&tl.text[10] == "d"); + assert!(&tl.text[11] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -691,19 +638,19 @@ mod tests { tl.append_text(" world!"); - assert!(tl.text.len() == 12); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == (' ' as u8)); - assert!(tl.text[6] == ('w' as u8)); - assert!(tl.text[7] == ('o' as u8)); - assert!(tl.text[8] == ('r' as u8)); - assert!(tl.text[9] == ('l' as u8)); - assert!(tl.text[10] == ('d' as u8)); - assert!(tl.text[11] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 12); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == " "); + assert!(&tl.text[6] == "w"); + assert!(&tl.text[7] == "o"); + assert!(&tl.text[8] == "r"); + assert!(&tl.text[9] == "l"); + assert!(&tl.text[10] == "d"); + assert!(&tl.text[11] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -713,13 +660,13 @@ mod tests { tl.remove_text(5, 11); - assert!(tl.text.len() == 6); - assert!(tl.text[0] == ('H' as u8)); - assert!(tl.text[1] == ('e' as u8)); - assert!(tl.text[2] == ('l' as u8)); - assert!(tl.text[3] == ('l' as u8)); - assert!(tl.text[4] == ('o' as u8)); - assert!(tl.text[5] == ('!' as u8)); + assert_eq!(tl.text.grapheme_count(), 6); + assert!(&tl.text[0] == "H"); + assert!(&tl.text[1] == "e"); + assert!(&tl.text[2] == "l"); + assert!(&tl.text[3] == "l"); + assert!(&tl.text[4] == "o"); + assert!(&tl.text[5] == "!"); assert!(tl.ending == LineEnding::CRLF); } @@ -729,22 +676,22 @@ mod tests { let tl2 = tl1.split(LineEnding::LF, 5); - assert!(tl1.text.len() == 5); - assert!(tl1.text[0] == ('H' as u8)); - assert!(tl1.text[1] == ('e' as u8)); - assert!(tl1.text[2] == ('l' as u8)); - assert!(tl1.text[3] == ('l' as u8)); - assert!(tl1.text[4] == ('o' as u8)); + assert_eq!(tl1.text.grapheme_count(), 5); + assert!(&tl1.text[0] == "H"); + assert!(&tl1.text[1] == "e"); + assert!(&tl1.text[2] == "l"); + assert!(&tl1.text[3] == "l"); + assert!(&tl1.text[4] == "o"); assert!(tl1.ending == LineEnding::LF); - assert!(tl2.text.len() == 7); - assert!(tl2.text[0] == (' ' as u8)); - assert!(tl2.text[1] == ('w' as u8)); - assert!(tl2.text[2] == ('o' as u8)); - assert!(tl2.text[3] == ('r' as u8)); - assert!(tl2.text[4] == ('l' as u8)); - assert!(tl2.text[5] == ('d' as u8)); - assert!(tl2.text[6] == ('!' as u8)); + assert_eq!(tl2.text.grapheme_count(), 7); + assert!(&tl2.text[0] == " "); + assert!(&tl2.text[1] == "w"); + assert!(&tl2.text[2] == "o"); + assert!(&tl2.text[3] == "r"); + assert!(&tl2.text[4] == "l"); + assert!(&tl2.text[5] == "d"); + assert!(&tl2.text[6] == "!"); assert!(tl2.ending == LineEnding::CRLF); } @@ -754,16 +701,16 @@ mod tests { let tl2 = tl1.split(LineEnding::LF, 0); - assert!(tl1.text.len() == 0); + assert_eq!(tl1.text.grapheme_count(), 0); assert!(tl1.ending == LineEnding::LF); - assert!(tl2.text.len() == 6); - assert!(tl2.text[0] == ('H' as u8)); - assert!(tl2.text[1] == ('e' as u8)); - assert!(tl2.text[2] == ('l' as u8)); - assert!(tl2.text[3] == ('l' as u8)); - assert!(tl2.text[4] == ('o' as u8)); - assert!(tl2.text[5] == ('!' as u8)); + assert_eq!(tl2.text.grapheme_count(), 6); + assert!(&tl2.text[0] == "H"); + assert!(&tl2.text[1] == "e"); + assert!(&tl2.text[2] == "l"); + assert!(&tl2.text[3] == "l"); + assert!(&tl2.text[4] == "o"); + assert!(&tl2.text[5] == "!"); assert!(tl2.ending == LineEnding::CRLF); } diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index 8947584..a6d09d0 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -5,7 +5,7 @@ use std::old_path::Path; use std::old_io::fs::File; use std::old_io::{IoResult, BufferedReader, BufferedWriter}; -use self::line::{Line, line_ending_to_str}; +use self::line::Line; use self::node::{BufferNode, BufferNodeGraphemeIter, BufferNodeLineIter}; use self::undo_stack::{UndoStack}; use self::undo_stack::Operation::*; @@ -89,11 +89,11 @@ impl Buffer { pub fn save_to_file(&self, path: &Path) -> IoResult<()> { + // TODO: make more efficient let mut f = BufferedWriter::new(try!(File::create(path))); - for l in self.line_iter() { - let _ = f.write_str(l.as_str()); - let _ = f.write_str(line_ending_to_str(l.ending)); + for g in self.grapheme_iter() { + let _ = f.write_str(g); } return Ok(()); diff --git a/src/buffer/node.rs b/src/buffer/node.rs index d5cede9..1e3590d 100644 --- a/src/buffer/node.rs +++ b/src/buffer/node.rs @@ -582,11 +582,11 @@ impl BufferNode { } - pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option<&Line>) { + pub fn merge_line_with_next_recursive(&mut self, line_number: usize, fetched_line: Option) { match fetched_line { None => { let line: Option = self.pull_out_line_recursive(line_number + 1); - if let Some(ref l) = line { + if let Some(l) = line { self.merge_line_with_next_recursive(line_number, Some(l)); } }, @@ -603,8 +603,7 @@ impl BufferNode { }, BufferNodeData::Leaf(ref mut line2) => { - line2.append_text(line.as_str()); - line2.ending = line.ending; + line2.append(line); } } } @@ -1069,7 +1068,7 @@ mod tests { assert!(node.line_count == 5); let line = node.pull_out_line_recursive(0).unwrap(); - assert!(line.as_str() == "Hi"); + assert!(line.to_string().as_slice() == "Hi"); assert!(line.ending == LineEnding::LF); let mut iter = node.grapheme_iter(); @@ -1119,7 +1118,7 @@ mod tests { assert!(node.line_count == 5); let line = node.pull_out_line_recursive(2).unwrap(); - assert!(line.as_str() == " people "); + assert!(line.to_string().as_slice() == " people "); assert!(line.ending == LineEnding::LF); let mut iter = node.grapheme_iter(); @@ -1163,7 +1162,7 @@ mod tests { assert!(node.line_count == 5); let line = node.pull_out_line_recursive(4).unwrap(); - assert!(line.as_str() == " world!"); + assert!(line.to_string().as_slice() == " world!"); assert!(line.ending == LineEnding::None); let mut iter = node.grapheme_iter(); diff --git a/src/buffer/rope.rs b/src/buffer/rope.rs index b96043a..673f1f7 100644 --- a/src/buffer/rope.rs +++ b/src/buffer/rope.rs @@ -1,9 +1,10 @@ use std::cmp::{min, max}; use std::mem; use std::str::Graphemes; -use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index}; +use std::ops::Index; +use string_utils::{grapheme_count, insert_text_at_grapheme_index, remove_text_between_grapheme_indices, split_string_at_grapheme_index, grapheme_pos_to_byte_pos}; -const MIN_NODE_SIZE: usize = 1024; +const MIN_NODE_SIZE: usize = 2048; const MAX_NODE_SIZE: usize = MIN_NODE_SIZE * 2; @@ -40,7 +41,7 @@ impl Rope { tree_height: 1, }; - rope.split(); + rope.split_if_too_large(); return rope; } @@ -55,7 +56,7 @@ impl Rope { tree_height: 1, }; - rope.split(); + rope.split_if_too_large(); return rope; } @@ -83,7 +84,7 @@ impl Rope { } self.update_stats(); - self.split(); + self.split_if_too_large(); self.rebalance(); } @@ -115,19 +116,53 @@ impl Rope { } self.update_stats(); - self.merge(); + self.merge_if_too_small(); self.rebalance(); } + /// Splits a rope into two pieces from the given grapheme index. + /// The first piece remains in this rope, the second piece is returned + /// as a new rope. + pub fn split(&mut self, pos: usize) -> Rope { + // TODO: make more efficient. + let s = self.to_string(); + let gc = self.grapheme_count(); + let bp = grapheme_pos_to_byte_pos(s.as_slice(), pos); + self.remove_text_between_grapheme_indices(pos, gc); + + Rope::new_from_str(&s.as_slice()[bp..]) + } + + /// Appends another rope to the end of this one, consuming the other rope. + pub fn append(&mut self, rope: Rope) { + // TODO: make more efficient. Converting to a string and then + // inserting is pretty slow... + let s = rope.to_string(); + let gc = self.grapheme_count(); + self.insert_text_at_grapheme_index(s.as_slice(), gc); + } - /// Creates an iterator at the first grapheme of the rope - pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> { - self.grapheme_iter_at_index(0) + /// Makes a copy of the rope as a string + pub fn to_string(&self) -> String { + let mut s = String::new(); + + for chunk in self.chunk_iter() { + s.push_str(chunk); + } + + return s; } - /// Creates an iterator at the given grapheme index - pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> { + /// Creates a chunk iterator for the rope + pub fn chunk_iter<'a>(&'a self) -> RopeChunkIter<'a> { + self.chunk_iter_at_index(0).1 + } + + + /// Creates a chunk iter starting at the chunk containing the given + /// grapheme index. Returns the chunk and its starting grapheme index. + pub fn chunk_iter_at_index<'a>(&'a self, index: usize) -> (usize, RopeChunkIter<'a>) { let mut node_stack: Vec<&'a Rope> = Vec::new(); let mut cur_node = self; let mut grapheme_i = index; @@ -136,6 +171,7 @@ impl Rope { loop { match cur_node.data { RopeData::Leaf(_) => { + node_stack.push(cur_node); break; }, @@ -152,8 +188,22 @@ impl Rope { } } + (index - grapheme_i, RopeChunkIter {node_stack: node_stack}) + } + + + /// Creates an iterator at the first grapheme of the rope + pub fn grapheme_iter<'a>(&'a self) -> RopeGraphemeIter<'a> { + self.grapheme_iter_at_index(0) + } + + + /// Creates an iterator at the given grapheme index + pub fn grapheme_iter_at_index<'a>(&'a self, index: usize) -> RopeGraphemeIter<'a> { + let (grapheme_i, mut chunk_iter) = self.chunk_iter_at_index(index); + // Create the grapheme iter for the current node - let mut gi = if let RopeData::Leaf(ref text) = cur_node.data { + let mut giter = if let Some(text) = chunk_iter.next() { text.as_slice().graphemes(true) } else { @@ -161,14 +211,14 @@ impl Rope { }; // Get to the right spot in the iter - for _ in 0..grapheme_i { - gi.next(); + for _ in grapheme_i..index { + giter.next(); } // Create the rope grapheme iter return RopeGraphemeIter { - node_stack: node_stack, - cur_chunk: gi, + chunk_iter: chunk_iter, + cur_chunk: giter, }; } @@ -208,7 +258,7 @@ impl Rope { // if lots of splits need to happen. This version ends up re-scanning // the text quite a lot, as well as doing quite a few unnecessary // allocations. - fn split(&mut self) { + fn split_if_too_large(&mut self) { if self.grapheme_count_ > MAX_NODE_SIZE && self.is_leaf() { // Calculate split position and how large the left and right @@ -229,8 +279,8 @@ impl Rope { // Recursively split nl.grapheme_count_ = new_gc_l; nr.grapheme_count_ = new_gc_r; - nl.split(); - nr.split(); + nl.split_if_too_large(); + nr.split_if_too_large(); // Update the new left and right node's stats nl.update_stats(); @@ -244,14 +294,14 @@ impl Rope { /// Merges a non-leaf node into a leaf node if it's too small - fn merge(&mut self) { + fn merge_if_too_small(&mut self) { if self.grapheme_count_ < MIN_NODE_SIZE && !self.is_leaf() { let mut merged_text = String::new(); if let RopeData::Branch(ref mut left, ref mut right) = self.data { // First, recursively merge the children - left.merge(); - right.merge(); + left.merge_if_too_small(); + right.merge_if_too_small(); // Then put their text into merged_text if let RopeData::Leaf(ref mut text) = left.data { @@ -386,42 +436,95 @@ impl Rope { } +// Direct indexing to graphemes in the rope +impl Index for Rope { + type Output = str; + + fn index<'a>(&'a self, index: &usize) -> &'a str { + if *index >= self.grapheme_count() { + panic!("Rope::Index: attempting to fetch grapheme that outside the bounds of the text."); + } + + match self.data { + RopeData::Leaf(ref text) => { + let mut i: usize = 0; + for g in text.graphemes(true) { + if i == *index { + return &g; + } + i += 1; + } + unreachable!(); + }, + + RopeData::Branch(ref left, ref right) => { + if *index < left.grapheme_count() { + return &left[*index]; + } + else { + return &right[*index - left.grapheme_count()]; + } + }, + } + } +} + + //============================================================= // Rope iterators //============================================================= -/// An iterator over a text buffer's graphemes -pub struct RopeGraphemeIter<'a> { +/// An iterator over a rope's string chunks +pub struct RopeChunkIter<'a> { node_stack: Vec<&'a Rope>, - cur_chunk: Graphemes<'a>, +} + +impl<'a> Iterator for RopeChunkIter<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option<&'a str> { + if let Some(next_chunk) = self.node_stack.pop() { + loop { + if let Option::Some(node) = self.node_stack.pop() { + match node.data { + RopeData::Leaf(_) => { + self.node_stack.push(node); + break; + }, + + RopeData::Branch(ref left, ref right) => { + self.node_stack.push(&(**right)); + self.node_stack.push(&(**left)); + continue; + } + } + } + else { + break; + } + } + + if let RopeData::Leaf(ref text) = next_chunk.data { + return Some(text.as_slice()); + } + else { + unreachable!(); + } + } + else { + return None; + } + } } -impl<'a> RopeGraphemeIter<'a> { - // Skips the iterator to the next chunk of the rope, if any. - pub fn next_chunk(&mut self) -> bool { - loop { - if let Option::Some(node) = self.node_stack.pop() { - match node.data { - RopeData::Leaf(ref text) => { - self.cur_chunk = text.as_slice().graphemes(true); - return true; - }, - - RopeData::Branch(ref left, ref right) => { - self.node_stack.push(&(**right)); - self.node_stack.push(&(**left)); - continue; - } - } - } - else { - return false; - } - } - } + +/// An iterator over a rope's graphemes +pub struct RopeGraphemeIter<'a> { + chunk_iter: RopeChunkIter<'a>, + cur_chunk: Graphemes<'a>, } @@ -430,15 +533,16 @@ impl<'a> Iterator for RopeGraphemeIter<'a> { fn next(&mut self) -> Option<&'a str> { loop { - if let Option::Some(g) = self.cur_chunk.next() { - return Option::Some(g); + if let Some(g) = self.cur_chunk.next() { + return Some(g); } else { - if self.next_chunk() { + if let Some(s) = self.chunk_iter.next() { + self.cur_chunk = s.graphemes(true); continue; } else { - return Option::None; + return None; } } } @@ -454,7 +558,7 @@ impl<'a> Iterator for RopeGraphemeIter<'a> { #[cfg(test)] mod tests { - use super::{Rope, RopeGraphemeIter}; + use super::*; #[test] @@ -509,6 +613,87 @@ mod tests { } + #[test] + fn index() { + let rope = Rope::new_from_str("Hel世界lo world!"); + + assert_eq!("H", &rope[0]); + assert_eq!("界", &rope[4]); + } + + + #[test] + fn to_string() { + let rope = Rope::new_from_str("Hello there good people of the world!"); + let s = rope.to_string(); + + assert_eq!("Hello there good people of the world!", s.as_slice()); + } + + + #[test] + fn split_1() { + let mut rope1 = Rope::new_from_str("Hello there good people of the world!"); + let rope2 = rope1.split(18); + + assert_eq!("Hello there good p", rope1.to_string().as_slice()); + assert_eq!("eople of the world!", rope2.to_string().as_slice()); + } + + + #[test] + fn split_2() { + let mut rope1 = Rope::new_from_str("Hello there good people of the world!"); + let rope2 = rope1.split(37); + + assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice()); + assert_eq!("", rope2.to_string().as_slice()); + } + + + #[test] + fn split_3() { + let mut rope1 = Rope::new_from_str("Hello there good people of the world!"); + let rope2 = rope1.split(0); + + assert_eq!("", rope1.to_string().as_slice()); + assert_eq!("Hello there good people of the world!", rope2.to_string().as_slice()); + } + + + #[test] + fn append_1() { + let mut rope1 = Rope::new_from_str("Hello there good p"); + let rope2 = Rope::new_from_str("eople of the world!"); + + rope1.append(rope2); + + assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice()); + } + + + #[test] + fn append_2() { + let mut rope1 = Rope::new_from_str("Hello there good people of the world!"); + let rope2 = Rope::new_from_str(""); + + rope1.append(rope2); + + assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice()); + } + + + #[test] + fn append_3() { + let mut rope1 = Rope::new_from_str(""); + let rope2 = Rope::new_from_str("Hello there good people of the world!"); + + rope1.append(rope2); + + assert_eq!("Hello there good people of the world!", rope1.to_string().as_slice()); + } + + #[test] fn insert_text() { let mut rope = Rope::new(); @@ -1017,4 +1202,5 @@ mod tests { assert!(Some("9") == iter.next()); assert!(None == iter.next()); } + } \ No newline at end of file