From 2bb45a9876a824bef197a8b5549ed210873e2300 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 4 Aug 2022 13:50:13 -0700 Subject: [PATCH] Add streaming data tree parser sub-crate. Not used yet. --- Cargo.lock | 5 + Cargo.toml | 4 + sub_crates/data_tree/Cargo.toml | 10 + sub_crates/data_tree/src/lib.rs | 199 ++++++++ sub_crates/data_tree/src/parse.rs | 762 ++++++++++++++++++++++++++++++ 5 files changed, 980 insertions(+) create mode 100644 sub_crates/data_tree/Cargo.toml create mode 100644 sub_crates/data_tree/src/lib.rs create mode 100644 sub_crates/data_tree/src/parse.rs diff --git a/Cargo.lock b/Cargo.lock index f7a2479..a03e509 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,6 +157,10 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19" +[[package]] +name = "data_tree" +version = "0.1.0" + [[package]] name = "fastapprox" version = "0.3.0" @@ -342,6 +346,7 @@ dependencies = [ "compact", "copy_in_place", "crossbeam", + "data_tree", "fastapprox", "half", "halton", diff --git a/Cargo.toml b/Cargo.toml index 521ed36..bdc1f14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "sub_crates/bvh_order", "sub_crates/color", "sub_crates/compact", + "sub_crates/data_tree", "sub_crates/halton", "sub_crates/rmath", "sub_crates/rrand", @@ -50,6 +51,9 @@ path = "sub_crates/color" [dependencies.compact] path = "sub_crates/compact" +[dependencies.data_tree] +path = "sub_crates/data_tree" + [dependencies.halton] path = "sub_crates/halton" diff --git a/sub_crates/data_tree/Cargo.toml b/sub_crates/data_tree/Cargo.toml new file mode 100644 index 0000000..8aa8679 --- /dev/null +++ b/sub_crates/data_tree/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "data_tree" +version = "0.1.0" +authors = ["Nathan Vegdahl "] +edition = "2018" +license = "MIT" + +[lib] +name = "data_tree" +path = "src/lib.rs" diff --git a/sub_crates/data_tree/src/lib.rs b/sub_crates/data_tree/src/lib.rs new file mode 100644 index 0000000..a42f590 --- /dev/null +++ b/sub_crates/data_tree/src/lib.rs @@ -0,0 +1,199 @@ +#![allow(clippy::redundant_field_names)] +#![allow(clippy::needless_lifetimes)] + +mod parse; + +use parse::{ParseError, ParseEvent, Parser}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Event<'a> { + InnerOpen { + type_name: &'a str, + byte_offset: usize, + }, + InnerClose { + byte_offset: usize, + }, + Leaf { + type_name: &'a str, + contents: &'a str, + byte_offset: usize, + }, + EOF, +} + +//---------------------------------------------------------------------------- + +#[derive(Debug)] +pub enum Error { + ExpectedNameOrClose(usize), + ExpectedOpen(usize), + UnexpectedClose(usize), + UnexpectedEOF, + IO(std::io::Error), +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} + +impl From for Error { + fn from(e: ParseError) -> Self { + match e { + ParseError::ExpectedNameOrClose(byte_offset) => Error::ExpectedNameOrClose(byte_offset), + ParseError::ExpectedOpen(byte_offset) => Error::ExpectedOpen(byte_offset), + ParseError::UnexpectedClose(byte_offset) => Error::UnexpectedClose(byte_offset), + } + } +} + +impl From for Error { + fn from(e: std::io::Error) -> Self { + Error::IO(e) + } +} + +//------------------------------------------------------------- + +#[derive(Debug)] +pub struct DataTreeReader { + parser: Parser, + reader: R, + buf: String, + eof: bool, +} + +impl DataTreeReader { + pub fn new(reader: R) -> Self { + Self { + parser: Parser::new(), + reader: reader, + buf: String::new(), + eof: false, + } + } + + pub fn next_event<'a>(&'a mut self) -> Result, Error> { + loop { + let valid_end = match self.parser.next_event()? { + ParseEvent::ValidEnd => true, + ParseEvent::NeedMoreInput => false, + + // The transmutes below are because the borrow checker is + // over-conservative about this. It thinks + // the liftime isn't valid, but since we aren't + // mutating self after returning (and in fact + // can't because of the borrow) there's no way for + // the references in this to become invalid. + ParseEvent::InnerOpen { + type_name, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerOpen { + type_name, + byte_offset, + }) + }); + } + ParseEvent::InnerClose { byte_offset } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerClose { byte_offset }) + }); + } + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::Leaf { + type_name, + contents, + byte_offset, + }) + }); + } + }; + + if !self.eof { + self.buf.clear(); + let read = self.reader.read_line(&mut self.buf)?; + self.parser.push_data(&self.buf); + if read == 0 { + self.eof = true; + } + } else if !valid_end { + return Err(Error::UnexpectedEOF); + } else { + return Ok(Event::EOF); + } + } + } + + pub fn peek_event<'a>(&'a mut self) -> Result, Error> { + loop { + let valid_end = match self.parser.peek_event()? { + ParseEvent::ValidEnd => true, + ParseEvent::NeedMoreInput => false, + + // The transmutes below are because the borrow checker is + // over-conservative about this. It thinks + // the liftime isn't valid, but since we aren't + // mutating self after returning (and in fact + // can't because of the borrow) there's no way for + // the references in this to become invalid. + ParseEvent::InnerOpen { + type_name, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerOpen { + type_name, + byte_offset, + }) + }); + } + ParseEvent::InnerClose { byte_offset } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerClose { byte_offset }) + }); + } + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::Leaf { + type_name, + contents, + byte_offset, + }) + }); + } + }; + + if !self.eof { + self.buf.clear(); + let read = self.reader.read_line(&mut self.buf)?; + self.parser.push_data(&self.buf); + if read == 0 { + self.eof = true; + } + } else if !valid_end { + return Err(Error::UnexpectedEOF); + } else { + return Ok(Event::EOF); + } + } + } + + pub fn byte_offset(&self) -> usize { + self.parser.byte_offset() + } +} diff --git a/sub_crates/data_tree/src/parse.rs b/sub_crates/data_tree/src/parse.rs new file mode 100644 index 0000000..489192d --- /dev/null +++ b/sub_crates/data_tree/src/parse.rs @@ -0,0 +1,762 @@ +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ParseError { + ExpectedNameOrClose(usize), + ExpectedOpen(usize), + UnexpectedClose(usize), +} + +impl std::error::Error for ParseError {} + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} + +//--------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ParseEvent<'a> { + InnerOpen { + type_name: &'a str, + byte_offset: usize, + }, + InnerClose { + byte_offset: usize, + }, + Leaf { + type_name: &'a str, + contents: &'a str, + byte_offset: usize, + }, + NeedMoreInput, + ValidEnd, // All data so far is consumed, and this is a + // valid place to finish the parse. +} + +impl<'a> ParseEvent<'a> { + fn add_to_byte_offset(&self, offset: usize) -> ParseEvent<'a> { + match *self { + ParseEvent::InnerOpen { + type_name, + byte_offset, + } => ParseEvent::InnerOpen { + type_name: type_name, + byte_offset: byte_offset + offset, + }, + ParseEvent::InnerClose { byte_offset } => ParseEvent::InnerClose { + byte_offset: byte_offset + offset, + }, + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => ParseEvent::Leaf { + type_name: type_name, + contents: contents, + byte_offset: byte_offset + offset, + }, + ParseEvent::NeedMoreInput => *self, + ParseEvent::ValidEnd => *self, + } + } +} + +//--------------------------------------------------------------------- + +#[derive(Debug)] +pub struct Parser { + buffer: String, + buf_consumed_idx: usize, + total_bytes_processed: usize, + inner_opens: usize, +} + +impl Parser { + pub fn new() -> Parser { + Parser { + buffer: String::with_capacity(1024), + buf_consumed_idx: 0, + total_bytes_processed: 0, + inner_opens: 0, + } + } + + pub fn push_data(&mut self, text: &str) { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Add the new data. + self.buffer.push_str(text); + } + + pub fn next_event<'a>(&'a mut self) -> Result, ParseError> { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Try to parse an event from the valid prefix. + match try_parse_event(&self.buffer) { + ParseEventParse::Ok(event, bytes_consumed) => { + // Update internal state. + if let ParseEvent::InnerOpen { .. } = event { + self.inner_opens += 1; + } else if let ParseEvent::InnerClose { byte_offset, .. } = event { + if self.inner_opens == 0 { + return Err(ParseError::UnexpectedClose( + byte_offset + self.total_bytes_processed, + )); + } else { + self.inner_opens -= 1; + } + } + self.buf_consumed_idx += bytes_consumed; + self.total_bytes_processed += bytes_consumed; + + Ok(event.add_to_byte_offset(self.total_bytes_processed - self.buf_consumed_idx)) + } + ParseEventParse::ReachedEnd => { + // If we consumed all data, then if all nodes are properly + // closed we're done. Otherwise we need more input. + if self.inner_opens == 0 { + Ok(ParseEvent::ValidEnd) + } else { + Ok(ParseEvent::NeedMoreInput) + } + } + ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput), + + // Hard errors. + ParseEventParse::ExpectedNameOrInnerClose(byte_offset) => Err( + ParseError::ExpectedNameOrClose(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedOpen(byte_offset) => Err(ParseError::ExpectedOpen( + byte_offset + self.total_bytes_processed, + )), + } + } + + pub fn peek_event<'a>(&'a mut self) -> Result, ParseError> { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Try to parse an event from the valid prefix. + match try_parse_event(&self.buffer) { + ParseEventParse::Ok(event, _bytes_consumed) => { + if let ParseEvent::InnerClose { byte_offset, .. } = event { + if self.inner_opens == 0 { + return Err(ParseError::UnexpectedClose( + byte_offset + self.total_bytes_processed, + )); + } + } + Ok(event.add_to_byte_offset(self.total_bytes_processed)) + } + ParseEventParse::ReachedEnd => { + // If we consumed all data, then if all nodes are properly + // closed we're done. Otherwise we need more input. + if self.inner_opens == 0 { + Ok(ParseEvent::ValidEnd) + } else { + Ok(ParseEvent::NeedMoreInput) + } + } + ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput), + + // Hard errors. + ParseEventParse::ExpectedNameOrInnerClose(byte_offset) => Err( + ParseError::ExpectedNameOrClose(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedOpen(byte_offset) => Err(ParseError::ExpectedOpen( + byte_offset + self.total_bytes_processed, + )), + } + } + + pub fn byte_offset(&self) -> usize { + self.total_bytes_processed + self.buf_consumed_idx + } +} + +//-------------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum ParseEventParse<'a> { + Ok(ParseEvent<'a>, usize), // (event, bytes consumed) + ReachedEnd, // Reached the end of the buffer in a valid state, with no event. + IncompleteData, // Need more data to parse. + + // ParseErrors. + ExpectedNameOrInnerClose(usize), + ExpectedOpen(usize), +} + +fn try_parse_event<'a>(text: &'a str) -> ParseEventParse<'a> { + // Remove leading whitespace and comments. + let mut source_text = skip_ws_and_comments((0, text)); + let start_idx = source_text.0; + + // First token. + let type_name = match next_token(source_text) { + // Type name, record and continue. + (Token::Name(tn), tail) => { + source_text = tail; + tn + } + + // Closing tag for inner node. Return. + (Token::CloseInner, tail) => { + return ParseEventParse::Ok( + ParseEvent::InnerClose { + byte_offset: start_idx, + }, + tail.0, + ); + } + + // We consumed everything as whitespace and/or + // comments. Return. + (Token::End, _) => { + return ParseEventParse::ReachedEnd; + } + + // Invalid. + _ => return ParseEventParse::ExpectedNameOrInnerClose(start_idx), + }; + + // Skip whitespace and comments to get the start of + // where there should be an open tag, for use later in error. + source_text = skip_ws_and_comments(source_text); + let open_start_idx = source_text.0; + + // Last part of the event. + match next_token(source_text) { + // Begining of an inner node. + (Token::OpenInner, tail) => ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: type_name, + byte_offset: start_idx, + }, + tail.0, + ), + + // Try to parse entire leaf node. + (Token::OpenLeaf, tail) => { + // Get contents. + let (contents, tail2) = parse_leaf_content(tail); + source_text = tail2; + + // Try to get closing tag. + match next_token(source_text) { + // If it's a leaf closing tag, we're done! + // Return the leaf event. + (Token::CloseLeaf, tail) => ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: type_name, + contents: contents, + byte_offset: start_idx, + }, + tail.0, + ), + + // Otherwise... + _ => { + if source_text.1.is_empty() { + // If there's no text left, we're just incomplete. + ParseEventParse::IncompleteData + } else { + // Otherwise, this would be a parse error... + // except that this shouldn't be reachable, + // since everything should be consumable for + // leaf content up until a close tag. + unreachable!("Expected leaf close tag.") + } + } + } + } + + // We consumed everything else as whitespace + // and/or comments, so we're incomplete. Return. + (Token::End, _) => ParseEventParse::IncompleteData, + + // Invalid. + _ => ParseEventParse::ExpectedOpen(open_start_idx), + } +} + +fn parse_leaf_content(source_text: (usize, &str)) -> (&str, (usize, &str)) { + let mut si = 1; + let mut escaped = false; + let mut reached_end = true; + for (i, c) in source_text.1.char_indices() { + si = i; + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == ']' { + reached_end = false; + break; + } + } + + if reached_end { + si = source_text.1.len(); + } + + ( + &source_text.1[0..si], + (source_text.0 + si, &source_text.1[si..]), + ) +} + +//-------------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Token<'a> { + OpenInner, + CloseInner, + OpenLeaf, + CloseLeaf, + Name(&'a str), + End, + Unknown, +} + +fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) { + let text1 = skip_ws_and_comments(source_text); + + if let Some(c) = text1.1.chars().next() { + let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]); + match c { + '{' => (Token::OpenInner, text2), + + '}' => (Token::CloseInner, text2), + + '[' => (Token::OpenLeaf, text2), + + ']' => (Token::CloseLeaf, text2), + + _ => { + if is_ident_char(c) { + // Parse type + let mut si = 0; + let mut reached_end = true; + for (i, c) in text1.1.char_indices() { + si = i; + if !is_ident_char(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text1.1.len(); + } + + (Token::Name(&text1.1[0..si]), (text1.0 + si, &text1.1[si..])) + } else { + (Token::Unknown, text1) + } + } + } + } else { + (Token::End, text1) + } +} + +fn is_ws(c: char) -> bool { + matches!(c, '\n' | '\r' | '\t' | ' ') +} + +fn is_nl(c: char) -> bool { + c == '\n' +} + +fn is_ident_char(c: char) -> bool { + c.is_alphanumeric() || c == '-' || c == '_' +} + +fn skip_ws(text: &str) -> &str { + let mut si = 0; + let mut reached_end = true; + for (i, c) in text.char_indices() { + si = i; + if !is_ws(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text.len(); + } + + &text[si..] +} + +fn skip_comment(text: &str) -> &str { + let mut si = 0; + if text.starts_with('#') { + let mut reached_end = true; + for (i, c) in text.char_indices() { + si = i; + if is_nl(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text.len(); + } + } + + &text[si..] +} + +fn skip_ws_and_comments(text: (usize, &str)) -> (usize, &str) { + let mut remaining_text = text.1; + + loop { + let tmp = skip_comment(skip_ws(remaining_text)); + + if tmp.len() == remaining_text.len() { + break; + } else { + remaining_text = tmp; + } + } + + let offset = text.0 + text.1.len() - remaining_text.len(); + (offset, remaining_text) +} + +//-------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use super::{next_token, Token}; + + #[test] + fn tokenize_01() { + let input = (0, "Thing"); + + assert_eq!(next_token(input), (Token::Name("Thing"), (5, ""))); + } + + #[test] + fn tokenize_02() { + let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing"); + + assert_eq!(next_token(input), (Token::Name("Thing"), (41, ""))); + } + + #[test] + fn tokenize_03() { + let input1 = (0, " Thing { }"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + + assert_eq!((token1, input2.1), (Token::Name("Thing"), " { }")); + assert_eq!((token2, input3.1), (Token::OpenInner, " }")); + assert_eq!((token3, input4.1), (Token::CloseInner, "")); + } + + #[test] + fn tokenize_04() { + let input1 = (0, " hi the[re"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + let (token4, input5) = next_token(input4); + let (token5, input6) = next_token(input5); + + assert_eq!((token1, input2), (Token::Name("hi"), (3, " the[re"))); + assert_eq!((token2, input3), (Token::Name("the"), (7, "[re"))); + assert_eq!((token3, input4), (Token::OpenLeaf, (8, "re"))); + assert_eq!((token4, input5), (Token::Name("re"), (10, ""))); + assert_eq!((token5, input6), (Token::End, (10, ""))); + } + + #[test] + fn tokenize_05() { + let input1 = (0, "Thing { # A comment\n\tThing2 []\n}"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + let (token4, input5) = next_token(input4); + let (token5, input6) = next_token(input5); + let (token6, input7) = next_token(input6); + let (token7, input8) = next_token(input7); + + assert_eq!( + (token1, input2), + (Token::Name("Thing"), (5, " { # A comment\n\tThing2 []\n}",)) + ); + assert_eq!( + (token2, input3), + (Token::OpenInner, (7, " # A comment\n\tThing2 []\n}",)) + ); + assert_eq!((token3, input4), (Token::Name("Thing2"), (27, " []\n}"))); + assert_eq!((token4, input5), (Token::OpenLeaf, (29, "]\n}"))); + assert_eq!((token5, input6), (Token::CloseLeaf, (30, "\n}"))); + assert_eq!((token6, input7), (Token::CloseInner, (32, ""))); + assert_eq!((token7, input8), (Token::End, (32, ""))); + } + + #[test] + fn try_parse_event_01() { + assert_eq!(try_parse_event("H"), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_02() { + assert_eq!(try_parse_event("Hello "), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_03() { + assert_eq!( + try_parse_event("Hello {"), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + byte_offset: 0, + }, + 7 + ), + ); + } + + #[test] + fn try_parse_event_04() { + assert_eq!( + try_parse_event(" Hello {"), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + byte_offset: 2, + }, + 9 + ), + ); + } + + #[test] + fn try_parse_event_05() { + assert_eq!( + try_parse_event("Hello { "), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + byte_offset: 0, + }, + 7 + ), + ); + } + + #[test] + fn try_parse_event_06() { + assert_eq!(try_parse_event("Hello ["), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_07() { + assert_eq!( + try_parse_event("Hello [some contents"), + ParseEventParse::IncompleteData, + ); + } + + #[test] + fn try_parse_event_08() { + assert_eq!( + try_parse_event("Hello [some contents]"), + ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: "Hello", + contents: "some contents", + byte_offset: 0, + }, + 21 + ), + ); + } + + #[test] + fn try_parse_event_09() { + assert_eq!( + try_parse_event("Hello [some contents] "), + ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: "Hello", + contents: "some contents", + byte_offset: 0, + }, + 21 + ), + ); + } + + #[test] + fn try_parse_event_10() { + assert_eq!( + try_parse_event(r#"Hello [some \co\]ntents]"#), + ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: "Hello", + contents: r#"some \co\]ntents"#, + byte_offset: 0, + }, + 24 + ), + ); + } + + #[test] + fn try_parse_event_11() { + assert_eq!( + try_parse_event(" # A comment\n\n "), + ParseEventParse::ReachedEnd, + ); + } + + #[test] + fn parser_01() { + let mut parser = Parser::new(); + + parser.push_data("Hello"); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("{"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerOpen { + type_name: "Hello", + byte_offset: 0, + }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("}"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerClose { byte_offset: 6 }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_02() { + let mut parser = Parser::new(); + + parser.push_data("Hello"); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("["); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("1.0 2.0 3."); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("0]"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::Leaf { + type_name: "Hello", + contents: "1.0 2.0 3.0", + byte_offset: 0, + }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_03() { + let mut parser = Parser::new(); + + parser.push_data("Hello { World [1.0 2.0 3.0] }"); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerOpen { + type_name: "Hello", + byte_offset: 0, + }) + ); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::Leaf { + type_name: "World", + contents: "1.0 2.0 3.0", + byte_offset: 8, + }) + ); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerClose { byte_offset: 28 }) + ); + + // Make sure repeated calls are stable. + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_04() { + let mut parser = Parser::new(); + + parser.push_data("$%^&"); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedNameOrClose(0))); + } + + #[test] + fn parser_05() { + let mut parser = Parser::new(); + + parser.push_data("Hello]"); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(5))); + } + + #[test] + fn parser_06() { + let mut parser = Parser::new(); + + parser.push_data("Hello}"); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(5))); + } + + #[test] + fn parser_07() { + let mut parser = Parser::new(); + + parser.push_data("Hello $*@^ ["); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(6))); + } + + #[test] + fn parser_08() { + let mut parser = Parser::new(); + + parser.push_data("}"); + assert_eq!(parser.next_event(), Err(ParseError::UnexpectedClose(0))); + } +}