Added basic "data tree" parsing.

The code here is a bit messy right now.  Just did enough to get
it working.  But it needs to be cleaned up and report parse
errors in a human-readable way, among other things.
This commit is contained in:
Nathan Vegdahl 2015-12-28 00:49:15 -08:00
parent 81dc3d14a8
commit 37f0eb33dd
2 changed files with 402 additions and 0 deletions

385
src/data_tree.rs Normal file
View File

@ -0,0 +1,385 @@
#![allow(dead_code)]
use std::result;
use std::cmp::Eq;
#[derive(Debug)]
pub enum DataTree<'a> {
Internal {
type_: &'a str,
name: Option<&'a str>,
children: Vec<DataTree<'a>>,
},
Leaf {
type_: &'a str,
contents: &'a str,
},
}
impl<'a> DataTree<'a> {
pub fn from_str(source_text: &'a str) -> Option<Vec<DataTree<'a>>> {
let mut items = Vec::new();
let mut remaining_text = source_text;
while let Ok((item, text)) = parse(remaining_text) {
remaining_text = text;
items.push(item);
}
remaining_text = skip_ws_and_comments(remaining_text);
if remaining_text.len() > 0 {
return None;
} else {
return Some(items);
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum Token<'a> {
OpenInner,
CloseInner,
OpenLeaf,
CloseLeaf,
Type(&'a str),
Name(&'a str),
End,
Unknown,
}
type ParseResult<'a> = result::Result<(DataTree<'a>, &'a str), ()>;
fn parse<'a>(source_text: &'a str) -> ParseResult<'a> {
let (token, text1) = next_token(source_text);
if let Token::Type(t) = token {
match next_token(text1) {
// Inner with name
(Token::Name(n), text2) => {
if let (Token::OpenInner, text3) = next_token(text2) {
let mut children = Vec::new();
let mut text_remaining = text3;
while let Ok((node, text4)) = parse(text_remaining) {
text_remaining = text4;
children.push(node);
}
if let (Token::CloseInner, text4) = next_token(text_remaining) {
return Ok((DataTree::Internal {
type_: t,
name: Some(n),
children: children,
},
text4));
} else {
return Err(());
}
} else {
return Err(());
}
}
// Inner without name
(Token::OpenInner, text2) => {
let mut children = Vec::new();
let mut text_remaining = text2;
while let Ok((node, text3)) = parse(text_remaining) {
text_remaining = text3;
children.push(node);
}
if let (Token::CloseInner, text3) = next_token(text2) {
return Ok((DataTree::Internal {
type_: t,
name: None,
children: children,
},
text3));
} else {
return Err(());
}
}
// Leaf
(Token::OpenLeaf, text2) => {
if let Ok((lc, text3)) = parse_leaf_content(text2) {
if let (Token::CloseLeaf, text4) = next_token(text3) {
return Ok((DataTree::Leaf {
type_: t,
contents: lc,
},
text4));
} else {
return Err(());
}
} else {
return Err(());
}
}
// Other
_ => {
return Err(());
}
}
} else {
return Err(());
}
}
fn parse_leaf_content<'a>(source_text: &'a str) -> result::Result<(&'a str, &'a str), ()> {
let mut escape = false;
for (i, c) in source_text.char_indices() {
if escape {
escape = false;
continue;
}
if c == ']' {
return Ok((&source_text[0..i], &source_text[i..]));
} else if c == '\\' {
escape = true;
}
}
return Err(());
}
pub fn next_token<'a>(source_text: &'a str) -> (Token<'a>, &'a str) {
let text1 = skip_ws_and_comments(source_text);
if let Some(c) = text1.chars().nth(0) {
let text2 = &text1[c.len_utf8()..];
match c {
'{' => {
return (Token::OpenInner, text2);
}
'}' => {
return (Token::CloseInner, text2);
}
'[' => {
return (Token::OpenLeaf, text2);
}
']' => {
return (Token::CloseLeaf, text2);
}
'$' => {
// Parse name
let mut si = 0;
let mut escape = false;
let mut broke = false;
for (i, c) in text2.char_indices() {
if c == '\\' {
escape = true;
} else if (is_reserved_char(c) || is_ws(c)) && !escape {
si = i;
broke = true;
break;
} else {
escape = false;
}
}
if broke {
return (Token::Name(&text1[0..si + 1]), &text1[si + 1..]);
} else {
return (Token::Name(text1), "");
}
}
_ => {
// Parse type
let mut si = 0;
let mut broke = false;
for (i, c) in text1.char_indices() {
if (is_reserved_char(c) || is_ws(c)) && c != '\\' {
si = i;
broke = true;
break;
}
}
if broke {
return (Token::Type(&text1[0..si]), &text1[si..]);
} else {
return (Token::Type(text1), "");
}
}
}
} else {
return (Token::End, text1);
}
return (Token::Unknown, text1);
}
fn is_ws(c: char) -> bool {
match c {
'\n' | '\r' | '\t' | ' ' => true,
_ => false,
}
}
fn is_reserved_char(c: char) -> bool {
match c {
'{' | '}' | '[' | ']' | '$' | '\\' => true,
_ => false,
}
}
fn skip_ws<'a>(text: &'a str) -> Option<&'a str> {
for (i, c) in text.char_indices() {
if !is_ws(c) {
if i > 0 {
return Some(&text[i..]);
} else {
return None;
}
}
}
if text.len() > 0 {
return Some("");
} else {
return None;
}
}
fn skip_comment<'a>(text: &'a str) -> Option<&'a str> {
let mut tci = text.char_indices();
if let Some((_, '#')) = tci.next() {
for (i, c) in tci {
match c {
'\n' | '\r' => {
return Some(&text[i..]);
}
_ => {}
}
}
return Some("");
} else {
return None;
}
}
fn skip_ws_and_comments<'a>(text: &'a str) -> &'a str {
let mut remaining_text = text;
loop {
let mut ws = 0;
let mut comment = 0;
while let Some(t) = skip_ws(remaining_text) {
remaining_text = t;
ws += 1;
}
while let Some(t) = skip_comment(remaining_text) {
remaining_text = t;
comment += 1;
}
if ws == 0 && comment == 0 {
break;
}
}
return remaining_text;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_1() {
let input = "Thing";
assert_eq!(next_token(input), (Token::Type("Thing"), ""));
}
#[test]
fn test_tokenize_2() {
let input = " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing";
assert_eq!(next_token(input), (Token::Type("Thing"), ""));
}
#[test]
fn test_tokenize_3() {
let input1 = " Thing { }";
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
assert_eq!((token1, input2), (Token::Type("Thing"), " { }"));
assert_eq!((token2, input3), (Token::OpenInner, " }"));
assert_eq!((token3, input4), (Token::CloseInner, ""));
}
#[test]
fn test_tokenize_4() {
let input = " $hi_there ";
assert_eq!(next_token(input), (Token::Name("$hi_there"), " "));
}
#[test]
fn test_tokenize_5() {
let input = " $hi\\ t\\#he\\[re ";
assert_eq!(next_token(input), (Token::Name("$hi\\ t\\#he\\[re"), " "));
}
#[test]
fn test_tokenize_6() {
let input1 = " $hi the[re";
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
assert_eq!((token1, input2), (Token::Name("$hi"), " the[re"));
assert_eq!((token2, input3), (Token::Type("the"), "[re"));
assert_eq!((token3, input4), (Token::OpenLeaf, "re"));
assert_eq!((token4, input5), (Token::Type("re"), ""));
}
#[test]
fn test_tokenize_7() {
let input1 = "Thing $yar { # A comment\n\tThing2 []\n}";
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
let (token5, input6) = next_token(input5);
let (token6, input7) = next_token(input6);
assert_eq!((token1, input2),
(Token::Type("Thing"), " $yar { # A comment\n\tThing2 []\n}"));
assert_eq!((token2, input3),
(Token::Name("$yar"), " { # A comment\n\tThing2 []\n}"));
assert_eq!((token3, input4),
(Token::OpenInner, " # A comment\n\tThing2 []\n}"));
assert_eq!((token4, input5), (Token::Type("Thing2"), " []\n}"));
assert_eq!((token5, input6), (Token::OpenLeaf, "]\n}"));
assert_eq!((token6, input7), (Token::CloseLeaf, "\n}"));
}
}

View File

@ -6,6 +6,7 @@ mod lerp;
mod float4;
mod ray;
mod bbox;
mod data_tree;
mod image;
use std::path::Path;
@ -13,6 +14,7 @@ use std::path::Path;
use docopt::Docopt;
use image::Image;
use data_tree::DataTree;
// ----------------------------------------------------------------
@ -56,4 +58,19 @@ fn main() {
let mut img = Image::new(512, 512);
img.set(256, 256, (255, 255, 255));
let _ = img.write_binary_ppm(Path::new(&args.arg_imgpath));
let test_string = r##"
Thing $yar { # A comment
Obj [Things and stuff\]]
}
Thing $yar { # A comment
Obj [23]
Obj [42]
Obj ["The meaning of life!"]
}
"##;
let tree = DataTree::from_str(test_string);
println!("{:#?}", tree);
}