psychopath/src/parse/data_tree.rs
Nathan Vegdahl 6623443e2e Improved .psy file parsing error messages.
Biggest improvement: it gives you line numbers.  But also progress
on better descriptions.
2017-04-10 14:03:01 -07:00

701 lines
20 KiB
Rust

#![allow(dead_code)]
use std::iter::Iterator;
use std::result::Result;
use std::slice;
#[derive(Debug, Eq, PartialEq)]
pub enum DataTree<'a> {
Internal {
type_name: &'a str,
ident: Option<&'a str>,
children: Vec<DataTree<'a>>,
byte_offset: usize,
},
Leaf {
type_name: &'a str,
contents: &'a str,
byte_offset: usize,
},
}
impl<'a> DataTree<'a> {
pub fn from_str(source_text: &'a str) -> Result<DataTree<'a>, ParseError> {
let mut items = Vec::new();
let mut remaining_text = (0, source_text);
while let Some((item, text)) = parse_node(remaining_text)? {
remaining_text = text;
items.push(item);
}
remaining_text = skip_ws_and_comments(remaining_text);
if remaining_text.1.len() == 0 {
return Ok(DataTree::Internal {
type_name: "ROOT",
ident: None,
children: items,
byte_offset: 0,
});
} else {
// If the whole text wasn't parsed, something went wrong.
return Err(ParseError::Other((0, "Failed to parse the entire string.")));
}
}
pub fn type_name(&'a self) -> &'a str {
match self {
&DataTree::Internal { type_name, .. } => type_name,
&DataTree::Leaf { type_name, .. } => type_name,
}
}
pub fn byte_offset(&'a self) -> usize {
match self {
&DataTree::Internal { byte_offset, .. } => byte_offset,
&DataTree::Leaf { byte_offset, .. } => byte_offset,
}
}
pub fn is_internal(&self) -> bool {
match self {
&DataTree::Internal { .. } => true,
&DataTree::Leaf { .. } => false,
}
}
pub fn is_leaf(&self) -> bool {
match self {
&DataTree::Internal { .. } => false,
&DataTree::Leaf { .. } => true,
}
}
pub fn leaf_contents(&'a self) -> Option<&'a str> {
match self {
&DataTree::Internal { .. } => None,
&DataTree::Leaf { contents, .. } => Some(contents),
}
}
pub fn iter_children(&'a self) -> slice::Iter<'a, DataTree<'a>> {
if let &DataTree::Internal { ref children, .. } = self {
children.iter()
} else {
[].iter()
}
}
pub fn iter_children_with_type(&'a self, type_name: &'static str) -> DataTreeFilterIter<'a> {
if let &DataTree::Internal { ref children, .. } = self {
DataTreeFilterIter {
type_name: type_name,
iter: children.iter(),
}
} else {
DataTreeFilterIter {
type_name: type_name,
iter: [].iter(),
}
}
}
pub fn iter_internal_children_with_type(&'a self,
type_name: &'static str)
-> DataTreeFilterInternalIter<'a> {
if let &DataTree::Internal { ref children, .. } = self {
DataTreeFilterInternalIter {
type_name: type_name,
iter: children.iter(),
}
} else {
DataTreeFilterInternalIter {
type_name: type_name,
iter: [].iter(),
}
}
}
pub fn iter_leaf_children_with_type(&'a self,
type_name: &'static str)
-> DataTreeFilterLeafIter<'a> {
if let &DataTree::Internal { ref children, .. } = self {
DataTreeFilterLeafIter {
type_name: type_name,
iter: children.iter(),
}
} else {
DataTreeFilterLeafIter {
type_name: type_name,
iter: [].iter(),
}
}
}
// For unit tests
fn internal_data_or_panic(&'a self) -> (&'a str, Option<&'a str>, &'a Vec<DataTree<'a>>) {
if let DataTree::Internal { type_name, ident, ref children, byte_offset: _ } = *self {
(type_name, ident, children)
} else {
panic!("Expected DataTree::Internal, found DataTree::Leaf")
}
}
fn leaf_data_or_panic(&'a self) -> (&'a str, &'a str) {
if let DataTree::Leaf { type_name, contents, byte_offset: _ } = *self {
(type_name, contents)
} else {
panic!("Expected DataTree::Leaf, found DataTree::Internal")
}
}
}
/// An iterator over the children of a DataTree node that filters out the
/// children not matching a specified type name.
pub struct DataTreeFilterIter<'a> {
type_name: &'a str,
iter: slice::Iter<'a, DataTree<'a>>,
}
impl<'a> Iterator for DataTreeFilterIter<'a> {
type Item = &'a DataTree<'a>;
fn next(&mut self) -> Option<&'a DataTree<'a>> {
loop {
if let Some(dt) = self.iter.next() {
if dt.type_name() == self.type_name {
return Some(dt);
} else {
continue;
}
} else {
return None;
}
}
}
}
/// An iterator over the children of a DataTree node that filters out the
/// children that aren't internal nodes and that don't match a specified
/// type name.
pub struct DataTreeFilterInternalIter<'a> {
type_name: &'a str,
iter: slice::Iter<'a, DataTree<'a>>,
}
impl<'a> Iterator for DataTreeFilterInternalIter<'a> {
type Item = (&'a str, Option<&'a str>, &'a Vec<DataTree<'a>>, usize);
fn next(&mut self) -> Option<(&'a str, Option<&'a str>, &'a Vec<DataTree<'a>>, usize)> {
loop {
match self.iter.next() {
Some(&DataTree::Internal { type_name, ident, ref children, byte_offset }) => {
if type_name == self.type_name {
return Some((type_name, ident, children, byte_offset));
} else {
continue;
}
}
Some(&DataTree::Leaf { .. }) => {
continue;
}
None => {
return None;
}
}
}
}
}
/// An iterator over the children of a DataTree node that filters out the
/// children that aren't internal nodes and that don't match a specified
/// type name.
pub struct DataTreeFilterLeafIter<'a> {
type_name: &'a str,
iter: slice::Iter<'a, DataTree<'a>>,
}
impl<'a> Iterator for DataTreeFilterLeafIter<'a> {
type Item = (&'a str, &'a str, usize);
fn next(&mut self) -> Option<(&'a str, &'a str, usize)> {
loop {
match self.iter.next() {
Some(&DataTree::Internal { .. }) => {
continue;
}
Some(&DataTree::Leaf { type_name, contents, byte_offset }) => {
if type_name == self.type_name {
return Some((type_name, contents, byte_offset));
} else {
continue;
}
}
None => {
return None;
}
}
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum ParseError {
MissingOpener(usize),
MissingOpenInternal(usize),
MissingCloseInternal(usize),
MissingOpenLeaf(usize),
MissingCloseLeaf(usize),
MissingTypeName(usize),
UnexpectedIdent(usize),
UnknownToken(usize),
Other((usize, &'static str)),
}
// ================================================================
#[derive(Debug, PartialEq, Eq)]
enum Token<'a> {
OpenInner,
CloseInner,
OpenLeaf,
CloseLeaf,
TypeName(&'a str),
Ident(&'a str),
End,
Unknown,
}
type ParseResult<'a> = Result<Option<(DataTree<'a>, (usize, &'a str))>, ParseError>;
fn parse_node<'a>(source_text: (usize, &'a str)) -> ParseResult<'a> {
let (token, text1) = next_token(source_text);
if let Token::TypeName(type_name) = token {
match next_token(text1) {
// Internal with name
(Token::Ident(n), text2) => {
if let (Token::OpenInner, text3) = next_token(text2) {
let mut children = Vec::new();
let mut text_remaining = text3;
while let Some((node, text4)) = parse_node(text_remaining)? {
text_remaining = text4;
children.push(node);
}
if let (Token::CloseInner, text4) = next_token(text_remaining) {
return Ok(Some((DataTree::Internal {
type_name: type_name,
ident: Some(n),
children: children,
byte_offset: text1.0,
},
text4)));
} else {
return Err(ParseError::MissingCloseInternal(text_remaining.0));
}
} else {
return Err(ParseError::MissingOpenInternal(text2.0));
}
}
// Internal without name
(Token::OpenInner, text2) => {
let mut children = Vec::new();
let mut text_remaining = text2;
while let Some((node, text3)) = parse_node(text_remaining)? {
text_remaining = text3;
children.push(node);
}
if let (Token::CloseInner, text3) = next_token(text_remaining) {
return Ok(Some((DataTree::Internal {
type_name: type_name,
ident: None,
children: children,
byte_offset: text1.0,
},
text3)));
} else {
return Err(ParseError::MissingCloseInternal(text_remaining.0));
}
}
// Leaf
(Token::OpenLeaf, text2) => {
let (contents, text3) = parse_leaf_content(text2);
if let (Token::CloseLeaf, text4) = next_token(text3) {
return Ok(Some((DataTree::Leaf {
type_name: type_name,
contents: contents,
byte_offset: text1.0,
},
text4)));
} else {
return Err(ParseError::MissingCloseLeaf(text3.0));
}
}
// Other
_ => {
return Err(ParseError::MissingOpener(text1.0));
}
}
} else {
return Ok(None);
}
}
fn parse_leaf_content<'a>(source_text: (usize, &'a str)) -> (&'a str, (usize, &'a str)) {
let mut si = 1;
let mut escaped = false;
let mut reached_end = true;
for (i, c) in source_text.1.char_indices() {
si = i;
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == ']' {
reached_end = false;
break;
}
}
if reached_end {
si = source_text.1.len();
}
return (&source_text.1[0..si], (source_text.0 + si, &source_text.1[si..]));
}
fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) {
let text1 = skip_ws_and_comments(source_text);
if let Some(c) = text1.1.chars().nth(0) {
let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]);
match c {
'{' => {
return (Token::OpenInner, text2);
}
'}' => {
return (Token::CloseInner, text2);
}
'[' => {
return (Token::OpenLeaf, text2);
}
']' => {
return (Token::CloseLeaf, text2);
}
'$' => {
// Parse name
let mut si = 1;
let mut escaped = false;
let mut reached_end = true;
for (i, c) in text1.1.char_indices().skip(1) {
si = i;
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if !is_ident_char(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text1.1.len();
}
return (Token::Ident(&text1.1[0..si]), (text1.0 + si, &text1.1[si..]));
}
_ => {
if is_ident_char(c) {
// Parse type
let mut si = 0;
let mut reached_end = true;
for (i, c) in text1.1.char_indices() {
si = i;
if !is_ident_char(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text1.1.len();
}
return (Token::TypeName(&text1.1[0..si]), (text1.0 + si, &text1.1[si..]));
}
}
}
} else {
return (Token::End, text1);
}
return (Token::Unknown, text1);
}
fn is_ws(c: char) -> bool {
match c {
'\n' | '\r' | '\t' | ' ' => true,
_ => false,
}
}
fn is_nl(c: char) -> bool {
match c {
'\n' | '\r' => true,
_ => false,
}
}
fn is_reserved_char(c: char) -> bool {
match c {
'{' | '}' | '[' | ']' | '$' | '#' | '\\' => true,
_ => false,
}
}
fn is_ident_char(c: char) -> bool {
// Anything that isn't whitespace or a reserved character
!is_ws(c) && !is_reserved_char(c)
}
fn skip_ws<'a>(text: &'a str) -> &'a str {
let mut si = 0;
let mut reached_end = true;
for (i, c) in text.char_indices() {
si = i;
if !is_ws(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text.len();
}
return &text[si..];
}
fn skip_comment<'a>(text: &'a str) -> &'a str {
let mut si = 0;
if Some('#') == text.chars().nth(0) {
let mut reached_end = true;
for (i, c) in text.char_indices() {
si = i;
if is_nl(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text.len();
}
}
return &text[si..];
}
fn skip_ws_and_comments<'a>(text: (usize, &'a str)) -> (usize, &'a str) {
let mut remaining_text = text.1;
loop {
let tmp = skip_comment(skip_ws(remaining_text));
if tmp.len() == remaining_text.len() {
break;
} else {
remaining_text = tmp;
}
}
let offset = text.0 + text.1.len() - remaining_text.len();
return (offset, remaining_text);
}
// ================================================================
#[cfg(test)]
mod tests {
use super::*;
use super::{next_token, Token};
#[test]
fn tokenize_1() {
let input = (0, "Thing");
assert_eq!(next_token(input), (Token::TypeName("Thing"), (5, "")));
}
#[test]
fn tokenize_2() {
let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing");
assert_eq!(next_token(input), (Token::TypeName("Thing"), (41, "")));
}
#[test]
fn tokenize_3() {
let input1 = (0, " Thing { }");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
assert_eq!((token1, input2.1), (Token::TypeName("Thing"), " { }"));
assert_eq!((token2, input3.1), (Token::OpenInner, " }"));
assert_eq!((token3, input4.1), (Token::CloseInner, ""));
}
#[test]
fn tokenize_4() {
let input = (0, " $hi_there ");
assert_eq!(next_token(input), (Token::Ident("$hi_there"), (10, " ")));
}
#[test]
fn tokenize_5() {
let input = (0, " $hi\\ t\\#he\\[re ");
assert_eq!(next_token(input),
(Token::Ident("$hi\\ t\\#he\\[re"), (15, " ")));
}
#[test]
fn tokenize_6() {
let input1 = (0, " $hi the[re");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
let (token5, input6) = next_token(input5);
assert_eq!((token1, input2), (Token::Ident("$hi"), (4, " the[re")));
assert_eq!((token2, input3), (Token::TypeName("the"), (8, "[re")));
assert_eq!((token3, input4), (Token::OpenLeaf, (9, "re")));
assert_eq!((token4, input5), (Token::TypeName("re"), (11, "")));
assert_eq!((token5, input6), (Token::End, (11, "")));
}
#[test]
fn tokenize_7() {
let input1 = (0, "Thing $yar { # A comment\n\tThing2 []\n}");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
let (token5, input6) = next_token(input5);
let (token6, input7) = next_token(input6);
let (token7, input8) = next_token(input7);
let (token8, input9) = next_token(input8);
assert_eq!((token1, input2),
(Token::TypeName("Thing"), (5, " $yar { # A comment\n\tThing2 []\n}")));
assert_eq!((token2, input3),
(Token::Ident("$yar"), (10, " { # A comment\n\tThing2 []\n}")));
assert_eq!((token3, input4),
(Token::OpenInner, (12, " # A comment\n\tThing2 []\n}")));
assert_eq!((token4, input5),
(Token::TypeName("Thing2"), (32, " []\n}")));
assert_eq!((token5, input6), (Token::OpenLeaf, (34, "]\n}")));
assert_eq!((token6, input7), (Token::CloseLeaf, (35, "\n}")));
assert_eq!((token7, input8), (Token::CloseInner, (37, "")));
assert_eq!((token8, input9), (Token::End, (37, "")));
}
#[test]
fn parse_1() {
let input = r#"
Thing {}
"#;
let dt = DataTree::from_str(input).unwrap();
// Root
let (t, i, c) = dt.internal_data_or_panic();
assert_eq!(t, "ROOT");
assert_eq!(i, None);
assert_eq!(c.len(), 1);
// First (and only) child
let (t, i, c) = c[0].internal_data_or_panic();
assert_eq!(t, "Thing");
assert_eq!(i, None);
assert_eq!(c.len(), 0);
}
#[test]
fn iter_1() {
let dt = DataTree::from_str(r#"
A {}
B {}
A []
A {}
B {}
"#)
.unwrap();
let i = dt.iter_children_with_type("A");
assert_eq!(i.count(), 3);
}
#[test]
fn iter_2() {
let dt = DataTree::from_str(r#"
A {}
B {}
A []
A {}
B {}
"#)
.unwrap();
let i = dt.iter_internal_children_with_type("A");
assert_eq!(i.count(), 2);
}
#[test]
fn iter_3() {
let dt = DataTree::from_str(r#"
A []
B {}
A {}
A []
B {}
"#)
.unwrap();
let i = dt.iter_leaf_children_with_type("A");
assert_eq!(i.count(), 2);
}
}