diff --git a/src/buffer/line.rs b/src/buffer/line.rs index 459c122..d2946fe 100644 --- a/src/buffer/line.rs +++ b/src/buffer/line.rs @@ -189,6 +189,37 @@ impl Line { } + /// Insert a line break into the line, splitting it into two. + /// This line stays as the first part of the split. The second + /// part is returned. + pub fn split(&mut self, ending: LineEnding, pos: uint) -> Line { + let mut other = Line::new(); + + // Inserting at very beginning: special cased for efficiency + if pos == 0 { + mem::swap(self, &mut other); + self.ending = ending; + } + // Otherwise, general case + else { + // Find the byte index to split at + let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); + + // Copy the elements after the split index to the second line + other.text.push_all(self.text.slice_from_or_fail(&byte_pos)); + + // Truncate the first line + self.text.truncate(byte_pos); + + // Set the line endings appropriately + other.ending = self.ending; + self.ending = ending; + } + + return other; + } + + /// Returns an iterator over the graphemes of the line pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> { LineGraphemeIter { @@ -233,6 +264,59 @@ pub enum LineEnding { PS = 8, // U+2029 -- ParagraphSeparator } +pub fn str_to_line_ending(g: &str) -> LineEnding { + match g { + //============== + // Line endings + //============== + + // CRLF + "\u{000D}\u{000A}" => { + return LineEnding::CRLF; + }, + + // LF + "\u{000A}" => { + return LineEnding::LF; + }, + + // VT + "\u{000B}" => { + return LineEnding::VT; + }, + + // FF + "\u{000C}" => { + return LineEnding::FF; + }, + + // CR + "\u{000D}" => { + return LineEnding::CR; + }, + + // NEL + "\u{0085}" => { + return LineEnding::NEL; + }, + + // LS + "\u{2028}" => { + return LineEnding::LS; + }, + + // PS + "\u{2029}" => { + return LineEnding::PS; + }, + + // Not a line ending + _ => { + return LineEnding::None; + } + } +} + /// An array of string literals corresponding to the possible /// unicode line endings. pub const LINE_ENDINGS: [&'static str, ..9] = ["", @@ -395,6 +479,50 @@ fn text_line_remove_text() { assert!(tl.ending == LineEnding::CRLF); } +#[test] +fn text_line_split() { + let mut tl1 = Line::new_from_str("Hello world!\r\n"); + + let tl2 = tl1.split(LineEnding::LF, 5); + + assert!(tl1.text.len() == 5); + assert!(tl1.text[0] == ('H' as u8)); + assert!(tl1.text[1] == ('e' as u8)); + assert!(tl1.text[2] == ('l' as u8)); + assert!(tl1.text[3] == ('l' as u8)); + assert!(tl1.text[4] == ('o' as u8)); + assert!(tl1.ending == LineEnding::LF); + + assert!(tl2.text.len() == 7); + assert!(tl2.text[0] == (' ' as u8)); + assert!(tl2.text[1] == ('w' as u8)); + assert!(tl2.text[2] == ('o' as u8)); + assert!(tl2.text[3] == ('r' as u8)); + assert!(tl2.text[4] == ('l' as u8)); + assert!(tl2.text[5] == ('d' as u8)); + assert!(tl2.text[6] == ('!' as u8)); + assert!(tl2.ending == LineEnding::CRLF); +} + +#[test] +fn text_line_split_beginning() { + let mut tl1 = Line::new_from_str("Hello!\r\n"); + + let tl2 = tl1.split(LineEnding::LF, 0); + + assert!(tl1.text.len() == 0); + assert!(tl1.ending == LineEnding::LF); + + assert!(tl2.text.len() == 6); + assert!(tl2.text[0] == ('H' as u8)); + assert!(tl2.text[1] == ('e' as u8)); + assert!(tl2.text[2] == ('l' as u8)); + assert!(tl2.text[3] == ('l' as u8)); + assert!(tl2.text[4] == ('o' as u8)); + assert!(tl2.text[5] == ('!' as u8)); + assert!(tl2.ending == LineEnding::CRLF); +} + //========================================================================= // LineGraphemeIter tests diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index 0ad65a9..a81b0f4 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -5,21 +5,22 @@ use std::fmt; use std::mem; use std::cmp::{min, max}; +use string_utils::is_line_ending; use self::node::{BufferNode, BufferNodeData}; -use self::line::{Line, LineGraphemeIter}; +use self::line::{Line, LineGraphemeIter, str_to_line_ending}; -//mod text_block; -//mod text_node; mod line; mod node; - +//============================================================= +// Buffer +//============================================================= /// A text buffer pub struct Buffer { root: BufferNode, -}// +} impl Buffer { @@ -88,10 +89,43 @@ impl Buffer { } - // /// Insert 'text' at char position 'pos'. - // pub fn insert_text(&mut self, text: &str, pos: uint) { - // self.root.insert_text(text, pos); - // } + /// Insert 'text' at char position 'pos'. + pub fn insert_text(&mut self, text: &str, pos: uint) { + // Byte indices + let mut b1: uint = 0; + let mut b2: uint = 0; + + // Grapheme indices + let mut g1: uint = 0; + let mut g2: uint = 0; + + // Iterate through graphemes + for grapheme in text.grapheme_indices(true) { + if is_line_ending(grapheme.1) { + if g1 < g2 { + self.root.insert_text_recursive(text.slice(b1, b2), pos + g1); + } + + b1 = b2; + g1 = g2; + b2 += grapheme.1.len(); + g2 += 1; + + self.root.insert_line_break_recursive(str_to_line_ending(grapheme.1), pos + g1); + + b1 = b2; + g1 = g2; + } + else { + b2 += grapheme.1.len(); + g2 += 1; + } + } + + if g1 < g2 { + self.root.insert_text_recursive(text.slice(b1, b2), pos + g1); + } + } // // /// Remove the text between char positions 'pos_a' and 'pos_b'. @@ -101,37 +135,37 @@ impl Buffer { /// Creates an iterator at the first character - // pub fn root_iter<'a>(&'a self) -> TextBufferIter<'a> { - // let mut node_stack: Vec<&'a TextNode> = Vec::new(); - // let mut cur_node = &self.root; - // - // loop { - // match cur_node.data { - // TextNodeData::Leaf(_) => { - // break; - // }, - // - // TextNodeData::Branch(ref left, ref right) => { - // node_stack.push(&(**right)); - // cur_node = &(**left); - // } - // } - // } - // - // TextBufferIter { - // node_stack: node_stack, - // cur_block: match cur_node.data { - // TextNodeData::Leaf(ref tb) => tb.as_str().chars(), - // _ => panic!("This should never happen.") - // } - // } - // } - // - // - // /// Creates an iterator starting at the specified character index. + pub fn grapheme_iter<'a>(&'a self) -> BufferGraphemeIter<'a> { + let mut node_stack: Vec<&'a BufferNode> = Vec::new(); + let mut cur_node = &self.root; + + loop { + match cur_node.data { + BufferNodeData::Leaf(_) => { + break; + }, + + BufferNodeData::Branch(ref left, ref right) => { + node_stack.push(&(**right)); + cur_node = &(**left); + } + } + } + + BufferGraphemeIter { + node_stack: node_stack, + cur_line: match cur_node.data { + BufferNodeData::Leaf(ref line) => line.grapheme_iter(), + _ => panic!("This should never happen.") + } + } + } + + + // /// Creates an iterator starting at the specified grapheme index. // /// If the index is past the end of the text, then the iterator will // /// return None on next(). - // pub fn iter_at_char<'a>(&'a self, index: uint) -> TextBufferIter<'a> { + // pub fn grapheme_iter_at_index<'a>(&'a self, index: uint) -> BufferGraphemeIter<'a> { // let mut node_stack: Vec<&'a TextNode> = Vec::new(); // let mut cur_node = &self.root; // let mut char_i = index; @@ -182,97 +216,134 @@ impl Buffer { - +//============================================================= +// Buffer iterators +//============================================================= /// An iterator over a text buffer's graphemes pub struct BufferGraphemeIter<'a> { node_stack: Vec<&'a BufferNode>, - cur_line: &'a Line, + cur_line: LineGraphemeIter<'a>, } -// impl<'a> TextBufferIter<'a> { -// // Puts the iterator on the next line -// pub fn next_line(&mut self) -> Option { -// // TODO: more efficient implementation, taking advantage of rope -// // structure. -// for c in *self { -// if c == '\n' { -// return Option::Some(c); -// } -// } -// -// return Option::None; -// } -// -// -// // Skips the iterator n characters ahead -// pub fn skip_chars(&mut self, n: uint) { -// // TODO: more efficient implementation, taking advantage of rope -// // structure. -// for _ in range(0, n) { -// if let Option::None = self.next() { -// break; -// } -// } -// } -// -// -// // Skips the iterator n characters ahead, unless it hits a newline -// // character. If it hits a newline character, returns true, otherwise, -// // false. -// pub fn skip_non_newline_chars(&mut self, n: uint) -> bool { -// // TODO: more efficient implementation, taking advantage of rope -// // structure. -// for _ in range(0, n) { -// match self.next() { -// Option::Some(c) => { -// if c == '\n' { -// return true; -// } -// }, -// -// Option::None => { -// break; -// } -// } -// } -// -// return false; -// } -// } -// -// -// impl<'a> Iterator for TextBufferIter<'a> { -// fn next(&mut self) -> Option { -// if let Option::Some(c) = self.cur_block.next() { -// return Option::Some(c); -// } -// -// loop { -// if let Option::Some(node) = self.node_stack.pop() { -// match node.data { -// TextNodeData::Leaf(ref tb) => { -// self.cur_block = tb.as_str().chars(); -// -// if let Option::Some(c) = self.cur_block.next() { -// return Option::Some(c); -// } -// else { -// continue; -// } -// }, -// -// TextNodeData::Branch(ref left, ref right) => { -// self.node_stack.push(&(**right)); -// self.node_stack.push(&(**left)); -// continue; -// } -// } -// } -// else { -// return Option::None; -// } -// } -// } -// } \ No newline at end of file +impl<'a> BufferGraphemeIter<'a> { + // Puts the iterator on the next line. + // Returns true if there was a next line, + // false if there wasn't. + pub fn next_line(&mut self) -> bool { + loop { + if let Option::Some(node) = self.node_stack.pop() { + match node.data { + BufferNodeData::Leaf(ref line) => { + self.cur_line = line.grapheme_iter(); + return true; + }, + + BufferNodeData::Branch(ref left, ref right) => { + self.node_stack.push(&(**right)); + self.node_stack.push(&(**left)); + continue; + } + } + } + else { + return false; + } + } + } + + + // Skips the iterator n graphemes ahead. + // If it runs out of graphemes before reaching the desired skip count, + // returns false. Otherwise returns true. + pub fn skip_graphemes(&mut self, n: uint) -> bool { + // TODO: more efficient implementation + for _ in range(0, n) { + if let Option::None = self.next() { + return false; + } + } + + return true; + } + + +} + + +impl<'a> Iterator<&'a str> for BufferGraphemeIter<'a> { + fn next(&mut self) -> Option<&'a str> { + loop { + if let Option::Some(g) = self.cur_line.next() { + return Option::Some(g); + } + + if self.next_line() { + continue; + } + else { + return Option::None; + } + } + } + + +} + + + +//================================================================ +// TESTS +//================================================================ + +#[test] +fn insert_text() { + let mut buf = Buffer::new(); + + buf.insert_text("Hello world!", 0); + + let mut iter = buf.grapheme_iter(); + + assert!(buf.root.line_count == 1); + assert!(Some("H") == iter.next()); + assert!(Some("e") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("o") == iter.next()); + assert!(Some(" ") == iter.next()); + assert!(Some("w") == iter.next()); + assert!(Some("o") == iter.next()); + assert!(Some("r") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("d") == iter.next()); + assert!(Some("!") == iter.next()); + assert!(None == iter.next()); +} + + +#[test] +fn insert_text_with_newlines() { + let mut buf = Buffer::new(); + + buf.insert_text("Hello\n world\r\n!", 0); + + let mut iter = buf.grapheme_iter(); + + assert!(buf.root.line_count == 3); + assert!(Some("H") == iter.next()); + assert!(Some("e") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("o") == iter.next()); + assert!(Some("\n") == iter.next()); + assert!(Some(" ") == iter.next()); + assert!(Some("w") == iter.next()); + assert!(Some("o") == iter.next()); + assert!(Some("r") == iter.next()); + assert!(Some("l") == iter.next()); + assert!(Some("d") == iter.next()); + assert!(Some("\r\n") == iter.next()); + assert!(Some("!") == iter.next()); + assert!(None == iter.next()); +} \ No newline at end of file diff --git a/src/buffer/node.rs b/src/buffer/node.rs index 6f35a2a..6742ec9 100644 --- a/src/buffer/node.rs +++ b/src/buffer/node.rs @@ -3,7 +3,7 @@ use std::fmt; use std::mem; use std::cmp::{min, max}; -use super::line::{Line, LineGraphemeIter}; +use super::line::{Line, LineEnding, LineGraphemeIter}; pub enum BufferNodeData { Leaf(Line), @@ -29,6 +29,18 @@ impl BufferNode { } + pub fn new_from_line(line: Line) -> BufferNode { + let gc = line.grapheme_count(); + + BufferNode { + data: BufferNodeData::Leaf(line), + tree_height: 1, + grapheme_count: gc, + line_count: 1, + } + } + + fn update_height(&mut self) { match self.data { BufferNodeData::Leaf(_) => { @@ -233,6 +245,77 @@ impl BufferNode { } } + + /// Inserts the given text string at the given grapheme position. + /// Note: this assumes the given text has no newline graphemes. + pub fn insert_text_recursive(&mut self, text: &str, pos: uint) { + match self.data { + // Find node for text to be inserted into + BufferNodeData::Branch(ref mut left, ref mut right) => { + if pos < left.grapheme_count { + left.insert_text_recursive(text, pos); + } + else { + right.insert_text_recursive(text, pos - left.grapheme_count); + } + + }, + + // Insert the text + BufferNodeData::Leaf(ref mut line) => { + line.insert_text(text, pos); + }, + } + + self.update_stats(); + } + + + /// Inserts a line break at the given grapheme position + pub fn insert_line_break_recursive(&mut self, ending: LineEnding, pos: uint) { + if ending == LineEnding::None { + return; + } + + let mut old_line = Line::new(); + let mut do_split: bool; + + match self.data { + // Find node for the line break to be inserted into + BufferNodeData::Branch(ref mut left, ref mut right) => { + if pos < left.grapheme_count { + left.insert_line_break_recursive(ending, pos); + } + else { + right.insert_line_break_recursive(ending, pos - left.grapheme_count); + } + do_split = false; + }, + + // We need to insert the line break, so get the data we + // need for that (can't do it here because of borrow checker). + BufferNodeData::Leaf(ref mut line) => { + mem::swap(&mut old_line, line); + do_split = true; + }, + } + + if do_split { + // Insert line break + let new_line = old_line.split(ending, pos); + let new_node_a = box BufferNode::new_from_line(old_line); + let new_node_b = box BufferNode::new_from_line(new_line); + + self.data = BufferNodeData::Branch(new_node_a, new_node_b); + + self.update_stats(); + } + else { + self.update_stats(); + self.rebalance(); + } + } + pub fn remove_lines_recursive(&mut self, line_a: uint, line_b: uint) { let mut remove_left = false;