#![allow(dead_code)] use std::iter::Iterator; use std::result::Result; use std::slice; #[derive(Debug, Eq, PartialEq)] pub enum DataTree<'a> { Internal { type_name: &'a str, ident: Option<&'a str>, children: Vec>, byte_offset: usize, }, Leaf { type_name: &'a str, contents: &'a str, byte_offset: usize, }, } impl<'a> DataTree<'a> { pub fn from_str(source_text: &'a str) -> Result, ParseError> { let mut items = Vec::new(); let mut remaining_text = (0, source_text); while let Some((item, text)) = parse_node(remaining_text)? { remaining_text = text; items.push(item); } remaining_text = skip_ws_and_comments(remaining_text); if remaining_text.1.len() == 0 { return Ok(DataTree::Internal { type_name: "ROOT", ident: None, children: items, byte_offset: 0, }); } else { // If the whole text wasn't parsed, something went wrong. return Err(ParseError::Other((0, "Failed to parse the entire string."))); } } pub fn type_name(&'a self) -> &'a str { match self { &DataTree::Internal { type_name, .. } => type_name, &DataTree::Leaf { type_name, .. } => type_name, } } pub fn byte_offset(&'a self) -> usize { match self { &DataTree::Internal { byte_offset, .. } => byte_offset, &DataTree::Leaf { byte_offset, .. } => byte_offset, } } pub fn is_internal(&self) -> bool { match self { &DataTree::Internal { .. } => true, &DataTree::Leaf { .. } => false, } } pub fn is_leaf(&self) -> bool { match self { &DataTree::Internal { .. } => false, &DataTree::Leaf { .. } => true, } } pub fn leaf_contents(&'a self) -> Option<&'a str> { match self { &DataTree::Internal { .. } => None, &DataTree::Leaf { contents, .. } => Some(contents), } } pub fn iter_children(&'a self) -> slice::Iter<'a, DataTree<'a>> { if let &DataTree::Internal { ref children, .. } = self { children.iter() } else { [].iter() } } pub fn iter_children_with_type(&'a self, type_name: &'static str) -> DataTreeFilterIter<'a> { if let &DataTree::Internal { ref children, .. } = self { DataTreeFilterIter { type_name: type_name, iter: children.iter(), } } else { DataTreeFilterIter { type_name: type_name, iter: [].iter(), } } } pub fn iter_internal_children_with_type(&'a self, type_name: &'static str) -> DataTreeFilterInternalIter<'a> { if let &DataTree::Internal { ref children, .. } = self { DataTreeFilterInternalIter { type_name: type_name, iter: children.iter(), } } else { DataTreeFilterInternalIter { type_name: type_name, iter: [].iter(), } } } pub fn iter_leaf_children_with_type(&'a self, type_name: &'static str) -> DataTreeFilterLeafIter<'a> { if let &DataTree::Internal { ref children, .. } = self { DataTreeFilterLeafIter { type_name: type_name, iter: children.iter(), } } else { DataTreeFilterLeafIter { type_name: type_name, iter: [].iter(), } } } // For unit tests fn internal_data_or_panic(&'a self) -> (&'a str, Option<&'a str>, &'a Vec>) { if let DataTree::Internal { type_name, ident, ref children, byte_offset: _ } = *self { (type_name, ident, children) } else { panic!("Expected DataTree::Internal, found DataTree::Leaf") } } fn leaf_data_or_panic(&'a self) -> (&'a str, &'a str) { if let DataTree::Leaf { type_name, contents, byte_offset: _ } = *self { (type_name, contents) } else { panic!("Expected DataTree::Leaf, found DataTree::Internal") } } } /// An iterator over the children of a DataTree node that filters out the /// children not matching a specified type name. pub struct DataTreeFilterIter<'a> { type_name: &'a str, iter: slice::Iter<'a, DataTree<'a>>, } impl<'a> Iterator for DataTreeFilterIter<'a> { type Item = &'a DataTree<'a>; fn next(&mut self) -> Option<&'a DataTree<'a>> { loop { if let Some(dt) = self.iter.next() { if dt.type_name() == self.type_name { return Some(dt); } else { continue; } } else { return None; } } } } /// An iterator over the children of a DataTree node that filters out the /// children that aren't internal nodes and that don't match a specified /// type name. pub struct DataTreeFilterInternalIter<'a> { type_name: &'a str, iter: slice::Iter<'a, DataTree<'a>>, } impl<'a> Iterator for DataTreeFilterInternalIter<'a> { type Item = (&'a str, Option<&'a str>, &'a Vec>, usize); fn next(&mut self) -> Option<(&'a str, Option<&'a str>, &'a Vec>, usize)> { loop { match self.iter.next() { Some(&DataTree::Internal { type_name, ident, ref children, byte_offset }) => { if type_name == self.type_name { return Some((type_name, ident, children, byte_offset)); } else { continue; } } Some(&DataTree::Leaf { .. }) => { continue; } None => { return None; } } } } } /// An iterator over the children of a DataTree node that filters out the /// children that aren't internal nodes and that don't match a specified /// type name. pub struct DataTreeFilterLeafIter<'a> { type_name: &'a str, iter: slice::Iter<'a, DataTree<'a>>, } impl<'a> Iterator for DataTreeFilterLeafIter<'a> { type Item = (&'a str, &'a str, usize); fn next(&mut self) -> Option<(&'a str, &'a str, usize)> { loop { match self.iter.next() { Some(&DataTree::Internal { .. }) => { continue; } Some(&DataTree::Leaf { type_name, contents, byte_offset }) => { if type_name == self.type_name { return Some((type_name, contents, byte_offset)); } else { continue; } } None => { return None; } } } } } #[derive(Copy, Clone, Eq, PartialEq, Debug)] pub enum ParseError { MissingOpener(usize), MissingOpenInternal(usize), MissingCloseInternal(usize), MissingOpenLeaf(usize), MissingCloseLeaf(usize), MissingTypeName(usize), UnexpectedIdent(usize), UnknownToken(usize), Other((usize, &'static str)), } // ================================================================ #[derive(Debug, PartialEq, Eq)] enum Token<'a> { OpenInner, CloseInner, OpenLeaf, CloseLeaf, TypeName(&'a str), Ident(&'a str), End, Unknown, } type ParseResult<'a> = Result, (usize, &'a str))>, ParseError>; fn parse_node<'a>(source_text: (usize, &'a str)) -> ParseResult<'a> { let (token, text1) = next_token(source_text); if let Token::TypeName(type_name) = token { match next_token(text1) { // Internal with name (Token::Ident(n), text2) => { if let (Token::OpenInner, text3) = next_token(text2) { let mut children = Vec::new(); let mut text_remaining = text3; while let Some((node, text4)) = parse_node(text_remaining)? { text_remaining = text4; children.push(node); } if let (Token::CloseInner, text4) = next_token(text_remaining) { return Ok(Some((DataTree::Internal { type_name: type_name, ident: Some(n), children: children, byte_offset: text1.0, }, text4))); } else { return Err(ParseError::MissingCloseInternal(text_remaining.0)); } } else { return Err(ParseError::MissingOpenInternal(text2.0)); } } // Internal without name (Token::OpenInner, text2) => { let mut children = Vec::new(); let mut text_remaining = text2; while let Some((node, text3)) = parse_node(text_remaining)? { text_remaining = text3; children.push(node); } if let (Token::CloseInner, text3) = next_token(text_remaining) { return Ok(Some((DataTree::Internal { type_name: type_name, ident: None, children: children, byte_offset: text1.0, }, text3))); } else { return Err(ParseError::MissingCloseInternal(text_remaining.0)); } } // Leaf (Token::OpenLeaf, text2) => { let (contents, text3) = parse_leaf_content(text2); if let (Token::CloseLeaf, text4) = next_token(text3) { return Ok(Some((DataTree::Leaf { type_name: type_name, contents: contents, byte_offset: text1.0, }, text4))); } else { return Err(ParseError::MissingCloseLeaf(text3.0)); } } // Other _ => { return Err(ParseError::MissingOpener(text1.0)); } } } else { return Ok(None); } } fn parse_leaf_content<'a>(source_text: (usize, &'a str)) -> (&'a str, (usize, &'a str)) { let mut si = 1; let mut escaped = false; let mut reached_end = true; for (i, c) in source_text.1.char_indices() { si = i; if escaped { escaped = false; } else if c == '\\' { escaped = true; } else if c == ']' { reached_end = false; break; } } if reached_end { si = source_text.1.len(); } return (&source_text.1[0..si], (source_text.0 + si, &source_text.1[si..])); } fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) { let text1 = skip_ws_and_comments(source_text); if let Some(c) = text1.1.chars().nth(0) { let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]); match c { '{' => { return (Token::OpenInner, text2); } '}' => { return (Token::CloseInner, text2); } '[' => { return (Token::OpenLeaf, text2); } ']' => { return (Token::CloseLeaf, text2); } '$' => { // Parse name let mut si = 1; let mut escaped = false; let mut reached_end = true; for (i, c) in text1.1.char_indices().skip(1) { si = i; if escaped { escaped = false; } else if c == '\\' { escaped = true; } else if !is_ident_char(c) { reached_end = false; break; } } if reached_end { si = text1.1.len(); } return (Token::Ident(&text1.1[0..si]), (text1.0 + si, &text1.1[si..])); } _ => { if is_ident_char(c) { // Parse type let mut si = 0; let mut reached_end = true; for (i, c) in text1.1.char_indices() { si = i; if !is_ident_char(c) { reached_end = false; break; } } if reached_end { si = text1.1.len(); } return (Token::TypeName(&text1.1[0..si]), (text1.0 + si, &text1.1[si..])); } } } } else { return (Token::End, text1); } return (Token::Unknown, text1); } fn is_ws(c: char) -> bool { match c { '\n' | '\r' | '\t' | ' ' => true, _ => false, } } fn is_nl(c: char) -> bool { match c { '\n' | '\r' => true, _ => false, } } fn is_reserved_char(c: char) -> bool { match c { '{' | '}' | '[' | ']' | '$' | '#' | '\\' => true, _ => false, } } fn is_ident_char(c: char) -> bool { // Anything that isn't whitespace or a reserved character !is_ws(c) && !is_reserved_char(c) } fn skip_ws<'a>(text: &'a str) -> &'a str { let mut si = 0; let mut reached_end = true; for (i, c) in text.char_indices() { si = i; if !is_ws(c) { reached_end = false; break; } } if reached_end { si = text.len(); } return &text[si..]; } fn skip_comment<'a>(text: &'a str) -> &'a str { let mut si = 0; if Some('#') == text.chars().nth(0) { let mut reached_end = true; for (i, c) in text.char_indices() { si = i; if is_nl(c) { reached_end = false; break; } } if reached_end { si = text.len(); } } return &text[si..]; } fn skip_ws_and_comments<'a>(text: (usize, &'a str)) -> (usize, &'a str) { let mut remaining_text = text.1; loop { let tmp = skip_comment(skip_ws(remaining_text)); if tmp.len() == remaining_text.len() { break; } else { remaining_text = tmp; } } let offset = text.0 + text.1.len() - remaining_text.len(); return (offset, remaining_text); } // ================================================================ #[cfg(test)] mod tests { use super::*; use super::{next_token, Token}; #[test] fn tokenize_1() { let input = (0, "Thing"); assert_eq!(next_token(input), (Token::TypeName("Thing"), (5, ""))); } #[test] fn tokenize_2() { let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing"); assert_eq!(next_token(input), (Token::TypeName("Thing"), (41, ""))); } #[test] fn tokenize_3() { let input1 = (0, " Thing { }"); let (token1, input2) = next_token(input1); let (token2, input3) = next_token(input2); let (token3, input4) = next_token(input3); assert_eq!((token1, input2.1), (Token::TypeName("Thing"), " { }")); assert_eq!((token2, input3.1), (Token::OpenInner, " }")); assert_eq!((token3, input4.1), (Token::CloseInner, "")); } #[test] fn tokenize_4() { let input = (0, " $hi_there "); assert_eq!(next_token(input), (Token::Ident("$hi_there"), (10, " "))); } #[test] fn tokenize_5() { let input = (0, " $hi\\ t\\#he\\[re "); assert_eq!(next_token(input), (Token::Ident("$hi\\ t\\#he\\[re"), (15, " "))); } #[test] fn tokenize_6() { let input1 = (0, " $hi the[re"); let (token1, input2) = next_token(input1); let (token2, input3) = next_token(input2); let (token3, input4) = next_token(input3); let (token4, input5) = next_token(input4); let (token5, input6) = next_token(input5); assert_eq!((token1, input2), (Token::Ident("$hi"), (4, " the[re"))); assert_eq!((token2, input3), (Token::TypeName("the"), (8, "[re"))); assert_eq!((token3, input4), (Token::OpenLeaf, (9, "re"))); assert_eq!((token4, input5), (Token::TypeName("re"), (11, ""))); assert_eq!((token5, input6), (Token::End, (11, ""))); } #[test] fn tokenize_7() { let input1 = (0, "Thing $yar { # A comment\n\tThing2 []\n}"); let (token1, input2) = next_token(input1); let (token2, input3) = next_token(input2); let (token3, input4) = next_token(input3); let (token4, input5) = next_token(input4); let (token5, input6) = next_token(input5); let (token6, input7) = next_token(input6); let (token7, input8) = next_token(input7); let (token8, input9) = next_token(input8); assert_eq!((token1, input2), (Token::TypeName("Thing"), (5, " $yar { # A comment\n\tThing2 []\n}"))); assert_eq!((token2, input3), (Token::Ident("$yar"), (10, " { # A comment\n\tThing2 []\n}"))); assert_eq!((token3, input4), (Token::OpenInner, (12, " # A comment\n\tThing2 []\n}"))); assert_eq!((token4, input5), (Token::TypeName("Thing2"), (32, " []\n}"))); assert_eq!((token5, input6), (Token::OpenLeaf, (34, "]\n}"))); assert_eq!((token6, input7), (Token::CloseLeaf, (35, "\n}"))); assert_eq!((token7, input8), (Token::CloseInner, (37, ""))); assert_eq!((token8, input9), (Token::End, (37, ""))); } #[test] fn parse_1() { let input = r#" Thing {} "#; let dt = DataTree::from_str(input).unwrap(); // Root let (t, i, c) = dt.internal_data_or_panic(); assert_eq!(t, "ROOT"); assert_eq!(i, None); assert_eq!(c.len(), 1); // First (and only) child let (t, i, c) = c[0].internal_data_or_panic(); assert_eq!(t, "Thing"); assert_eq!(i, None); assert_eq!(c.len(), 0); } #[test] fn iter_1() { let dt = DataTree::from_str(r#" A {} B {} A [] A {} B {} "#) .unwrap(); let i = dt.iter_children_with_type("A"); assert_eq!(i.count(), 3); } #[test] fn iter_2() { let dt = DataTree::from_str(r#" A {} B {} A [] A {} B {} "#) .unwrap(); let i = dt.iter_internal_children_with_type("A"); assert_eq!(i.count(), 2); } #[test] fn iter_3() { let dt = DataTree::from_str(r#" A [] B {} A {} A [] B {} "#) .unwrap(); let i = dt.iter_leaf_children_with_type("A"); assert_eq!(i.count(), 2); } }