#![allow(dead_code)] use std::mem; use std::str::Graphemes; use string_utils::{grapheme_count, grapheme_pos_to_byte_pos, is_line_ending}; /// A single line of text pub struct Line { text: Vec, // The text data, stored as UTF8 pub ending: LineEnding, // The type of line ending, if any } impl Line { /// Creates a new empty Line pub fn new() -> Line { Line { text: Vec::new(), ending: LineEnding::None, } } /// Creates a new Line from a str. pub fn new_from_str(text: &str) -> Line { // Initialize Line let mut tl = Line { text: Vec::with_capacity(text.len()), ending: LineEnding::None, }; // Copy text data, stopping on a line ending if any is found for g in text.graphemes(true) { match g { //============== // Line endings //============== // CRLF "\u{000D}\u{000A}" => { tl.ending = LineEnding::CRLF; break; }, // LF "\u{000A}" => { tl.ending = LineEnding::LF; break; }, // VT "\u{000B}" => { tl.ending = LineEnding::VT; break; }, // FF "\u{000C}" => { tl.ending = LineEnding::FF; break; }, // CR "\u{000D}" => { tl.ending = LineEnding::CR; break; }, // NEL "\u{0085}" => { tl.ending = LineEnding::NEL; break; }, // LS "\u{2028}" => { tl.ending = LineEnding::LS; break; }, // PS "\u{2029}" => { tl.ending = LineEnding::PS; break; }, //================== // Other characters //================== _ => { for b in g.bytes() { tl.text.push(b); } } } } // Done! return tl; } /// Returns the total number of unicode graphemes in the line pub fn grapheme_count(&self) -> uint { let mut count = grapheme_count(self.as_str()); match self.ending { LineEnding::None => {}, _ => {count += 1;} } return count; } /// Returns the total number of unicode graphemes in the line, /// not counting the line ending grapheme, if any. pub fn grapheme_count_sans_line_ending(&self) -> uint { grapheme_count(self.as_str()) } /// Returns an immutable string slice into the text block's memory pub fn as_str<'a>(&'a self) -> &'a str { unsafe { mem::transmute(self.text.as_slice()) } } /// Inserts `text` at grapheme index `pos`. /// NOTE: panics if it encounters a line ending in the text. pub fn insert_text(&mut self, text: &str, pos: uint) { // Find insertion position in bytes let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); // Grow data size self.text.grow(text.len(), 0); // Move old bytes forward let mut from = self.text.len() - text.len(); let mut to = self.text.len(); while from > byte_pos { from -= 1; to -= 1; self.text[to] = self.text[from]; } // Copy new bytes in let mut i = byte_pos; for g in text.graphemes(true) { if is_line_ending(g) { panic!("Line::insert_text(): line ending in inserted text."); } for b in g.bytes() { self.text[i] = b; i += 1 } } } /// Remove the text between grapheme positions 'pos_a' and 'pos_b'. pub fn remove_text(&mut self, pos_a: uint, pos_b: uint) { // Bounds checks if pos_a > pos_b { panic!("Line::remove_text(): pos_a must be less than or equal to pos_b."); } // Find removal positions in bytes let byte_pos_a = grapheme_pos_to_byte_pos(self.as_str(), pos_a); let byte_pos_b = grapheme_pos_to_byte_pos(self.as_str(), pos_b); // Move bytes to fill in the gap left by the removed bytes let mut from = byte_pos_b; let mut to = byte_pos_a; while from < self.text.len() { self.text[to] = self.text[from]; from += 1; to += 1; } // Remove data from the end let final_text_size = self.text.len() + byte_pos_a - byte_pos_b; self.text.truncate(final_text_size); } /// Insert a line break into the line, splitting it into two. /// This line stays as the first part of the split. The second /// part is returned. pub fn split(&mut self, ending: LineEnding, pos: uint) -> Line { let mut other = Line::new(); // Inserting at very beginning: special cased for efficiency if pos == 0 { mem::swap(self, &mut other); self.ending = ending; } // Otherwise, general case else { // Find the byte index to split at let byte_pos = grapheme_pos_to_byte_pos(self.as_str(), pos); // Copy the elements after the split index to the second line other.text.push_all(self.text.slice_from_or_fail(&byte_pos)); // Truncate the first line self.text.truncate(byte_pos); // Set the line endings appropriately other.ending = self.ending; self.ending = ending; } return other; } /// Returns an iterator over the graphemes of the line pub fn grapheme_iter<'a>(&'a self) -> LineGraphemeIter<'a> { LineGraphemeIter { graphemes: self.as_str().graphemes(true), ending: self.ending, done: false, } } /// Returns an iterator over the graphemes of the line pub fn grapheme_iter_at_index<'a>(&'a self, index: uint) -> LineGraphemeIter<'a> { let temp: &str = unsafe{mem::transmute(self.text.as_slice())}; let mut iter = LineGraphemeIter { graphemes: temp.graphemes(true), ending: self.ending, done: false, }; for _ in range(0, index) { iter.next(); } return iter; } } /// Represents one of the valid Unicode line endings. /// Also acts as an index into `LINE_ENDINGS`. #[deriving(PartialEq, Copy)] pub enum LineEnding { None = 0, // No line ending CRLF = 1, // CarriageReturn followed by LineFeed LF = 2, // U+000A -- LineFeed VT = 3, // U+000B -- VerticalTab FF = 4, // U+000C -- FormFeed CR = 5, // U+000D -- CarriageReturn NEL = 6, // U+0085 -- NextLine LS = 7, // U+2028 -- Line Separator PS = 8, // U+2029 -- ParagraphSeparator } pub fn str_to_line_ending(g: &str) -> LineEnding { match g { //============== // Line endings //============== // CRLF "\u{000D}\u{000A}" => { return LineEnding::CRLF; }, // LF "\u{000A}" => { return LineEnding::LF; }, // VT "\u{000B}" => { return LineEnding::VT; }, // FF "\u{000C}" => { return LineEnding::FF; }, // CR "\u{000D}" => { return LineEnding::CR; }, // NEL "\u{0085}" => { return LineEnding::NEL; }, // LS "\u{2028}" => { return LineEnding::LS; }, // PS "\u{2029}" => { return LineEnding::PS; }, // Not a line ending _ => { return LineEnding::None; } } } /// An array of string literals corresponding to the possible /// unicode line endings. pub const LINE_ENDINGS: [&'static str, ..9] = ["", "\u{000D}\u{000A}", "\u{000A}", "\u{000B}", "\u{000C}", "\u{000D}", "\u{0085}", "\u{2028}", "\u{2029}" ]; /// An iterator over the graphemes of a Line pub struct LineGraphemeIter<'a> { graphemes: Graphemes<'a>, ending: LineEnding, done: bool, } impl<'a> Iterator<&'a str> for LineGraphemeIter<'a> { fn next(&mut self) -> Option<&'a str> { if self.done { return None; } else { let g = self.graphemes.next(); if let Some(_) = g { return g; } else { self.done = true; if self.ending == LineEnding::None { return None; } else { return Some(LINE_ENDINGS[self.ending as uint]); } } } } } //========================================================================= // Line tests //========================================================================= #[test] fn new_text_line() { let tl = Line::new(); assert!(tl.text.len() == 0); assert!(tl.ending == LineEnding::None); } #[test] fn new_text_line_from_str() { let tl = Line::new_from_str("Hello!"); assert!(tl.text.len() == 6); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == ('!' as u8)); assert!(tl.ending == LineEnding::None); } #[test] fn new_text_line_from_empty_str() { let tl = Line::new_from_str(""); assert!(tl.text.len() == 0); assert!(tl.ending == LineEnding::None); } #[test] fn new_text_line_from_str_with_lf() { let tl = Line::new_from_str("Hello!\n"); assert!(tl.text.len() == 6); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == ('!' as u8)); assert!(tl.ending == LineEnding::LF); } #[test] fn new_text_line_from_str_with_crlf() { let tl = Line::new_from_str("Hello!\r\n"); assert!(tl.text.len() == 6); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == ('!' as u8)); assert!(tl.ending == LineEnding::CRLF); } #[test] fn new_text_line_from_str_with_crlf_and_too_long() { let tl = Line::new_from_str("Hello!\r\nLa la la la"); assert!(tl.text.len() == 6); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == ('!' as u8)); assert!(tl.ending == LineEnding::CRLF); } #[test] fn text_line_insert_text() { let mut tl = Line::new_from_str("Hello!\r\n"); tl.insert_text(" world", 5); assert!(tl.text.len() == 12); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == (' ' as u8)); assert!(tl.text[6] == ('w' as u8)); assert!(tl.text[7] == ('o' as u8)); assert!(tl.text[8] == ('r' as u8)); assert!(tl.text[9] == ('l' as u8)); assert!(tl.text[10] == ('d' as u8)); assert!(tl.text[11] == ('!' as u8)); assert!(tl.ending == LineEnding::CRLF); } #[test] fn text_line_remove_text() { let mut tl = Line::new_from_str("Hello world!\r\n"); tl.remove_text(5, 11); assert!(tl.text.len() == 6); assert!(tl.text[0] == ('H' as u8)); assert!(tl.text[1] == ('e' as u8)); assert!(tl.text[2] == ('l' as u8)); assert!(tl.text[3] == ('l' as u8)); assert!(tl.text[4] == ('o' as u8)); assert!(tl.text[5] == ('!' as u8)); assert!(tl.ending == LineEnding::CRLF); } #[test] fn text_line_split() { let mut tl1 = Line::new_from_str("Hello world!\r\n"); let tl2 = tl1.split(LineEnding::LF, 5); assert!(tl1.text.len() == 5); assert!(tl1.text[0] == ('H' as u8)); assert!(tl1.text[1] == ('e' as u8)); assert!(tl1.text[2] == ('l' as u8)); assert!(tl1.text[3] == ('l' as u8)); assert!(tl1.text[4] == ('o' as u8)); assert!(tl1.ending == LineEnding::LF); assert!(tl2.text.len() == 7); assert!(tl2.text[0] == (' ' as u8)); assert!(tl2.text[1] == ('w' as u8)); assert!(tl2.text[2] == ('o' as u8)); assert!(tl2.text[3] == ('r' as u8)); assert!(tl2.text[4] == ('l' as u8)); assert!(tl2.text[5] == ('d' as u8)); assert!(tl2.text[6] == ('!' as u8)); assert!(tl2.ending == LineEnding::CRLF); } #[test] fn text_line_split_beginning() { let mut tl1 = Line::new_from_str("Hello!\r\n"); let tl2 = tl1.split(LineEnding::LF, 0); assert!(tl1.text.len() == 0); assert!(tl1.ending == LineEnding::LF); assert!(tl2.text.len() == 6); assert!(tl2.text[0] == ('H' as u8)); assert!(tl2.text[1] == ('e' as u8)); assert!(tl2.text[2] == ('l' as u8)); assert!(tl2.text[3] == ('l' as u8)); assert!(tl2.text[4] == ('o' as u8)); assert!(tl2.text[5] == ('!' as u8)); assert!(tl2.ending == LineEnding::CRLF); } //========================================================================= // LineGraphemeIter tests //========================================================================= #[test] fn text_line_grapheme_iter() { let tl = Line::new_from_str("Hello!"); let mut iter = tl.grapheme_iter(); assert!(iter.next() == Some("H")); assert!(iter.next() == Some("e")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("o")); assert!(iter.next() == Some("!")); assert!(iter.next() == None); } #[test] fn text_line_grapheme_iter_with_lf() { let tl = Line::new_from_str("Hello!\n"); let mut iter = tl.grapheme_iter(); assert!(iter.next() == Some("H")); assert!(iter.next() == Some("e")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("o")); assert!(iter.next() == Some("!")); assert!(iter.next() == Some("\n")); assert!(iter.next() == None); } #[test] fn text_line_grapheme_iter_with_crlf() { let tl = Line::new_from_str("Hello!\r\n"); let mut iter = tl.grapheme_iter(); assert!(iter.next() == Some("H")); assert!(iter.next() == Some("e")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("o")); assert!(iter.next() == Some("!")); assert!(iter.next() == Some("\r\n")); assert!(iter.next() == None); } #[test] fn text_line_grapheme_iter_at_index() { let tl = Line::new_from_str("Hello!"); let mut iter = tl.grapheme_iter_at_index(2); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("l")); assert!(iter.next() == Some("o")); assert!(iter.next() == Some("!")); assert!(iter.next() == None); } #[test] fn text_line_grapheme_iter_at_index_past_end() { let tl = Line::new_from_str("Hello!"); let mut iter = tl.grapheme_iter_at_index(10); assert!(iter.next() == None); } #[test] fn text_line_grapheme_iter_at_index_at_lf() { let tl = Line::new_from_str("Hello!\n"); let mut iter = tl.grapheme_iter_at_index(6); assert!(iter.next() == Some("\n")); assert!(iter.next() == None); }