From c1f8d21814a8d03b2aef8b15b44fb0fcb9b7f226 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Sat, 11 Jan 2020 08:04:11 +0900 Subject: [PATCH] Refactored data tree code to present a slightly nicer API. --- src/main.rs | 4 +- src/parse/parse_utils.rs | 2 +- src/parse/psy.rs | 11 +- src/parse/psy_assembly.rs | 2 +- src/parse/psy_light.rs | 2 +- src/parse/psy_mesh_surface.rs | 2 +- src/parse/psy_surface_shader.rs | 2 +- sub_crates/data_tree/src/lib.rs | 1020 +++++----------------------- sub_crates/data_tree/src/parse.rs | 911 +++++++++++++++++++++++++ sub_crates/data_tree/src/reader.rs | 121 ---- 10 files changed, 1081 insertions(+), 996 deletions(-) create mode 100644 sub_crates/data_tree/src/parse.rs delete mode 100644 sub_crates/data_tree/src/reader.rs diff --git a/src/main.rs b/src/main.rs index 2f1b2ba..377f6aa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,7 +47,7 @@ use nom::bytes::complete::take_until; use kioku::Arena; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use crate::{ accel::BVH4Node, @@ -371,7 +371,7 @@ fn main() { // } } - Ok(Event::ValidEnd) => { + Ok(Event::EOF) => { break; } diff --git a/src/parse/parse_utils.rs b/src/parse/parse_utils.rs index cd99827..d548a0c 100644 --- a/src/parse/parse_utils.rs +++ b/src/parse/parse_utils.rs @@ -14,7 +14,7 @@ use nom::{ IResult, }; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use super::psy::{PsyError, PsyResult}; diff --git a/src/parse/psy.rs b/src/parse/psy.rs index 05bf2c2..a0368f6 100644 --- a/src/parse/psy.rs +++ b/src/parse/psy.rs @@ -4,10 +4,7 @@ use std::{collections::HashMap, f32, io::BufRead, result::Result}; use nom::{combinator::all_consuming, sequence::tuple, IResult}; -use data_tree::{ - reader::{DataTreeReader, ReaderError}, - Event, -}; +use data_tree::{DataTreeReader, Event}; use kioku::Arena; use crate::{ @@ -44,7 +41,7 @@ pub enum PsyError { WrongNodeCount(usize, String), // Error message InstancedMissingData(usize, String, String), // Error message, data name ExpectedInternalNodeClose(usize, String), - ReaderError(ReaderError), + ReaderError(data_tree::Error), } impl PsyError { @@ -112,8 +109,8 @@ impl std::fmt::Display for PsyError { } } -impl From for PsyError { - fn from(e: ReaderError) -> Self { +impl From for PsyError { + fn from(e: data_tree::Error) -> Self { PsyError::ReaderError(e) } } diff --git a/src/parse/psy_assembly.rs b/src/parse/psy_assembly.rs index 092932c..e97a38f 100644 --- a/src/parse/psy_assembly.rs +++ b/src/parse/psy_assembly.rs @@ -4,7 +4,7 @@ use std::{io::BufRead, result::Result}; use kioku::Arena; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use crate::scene::{Assembly, Object, ObjectData}; diff --git a/src/parse/psy_light.rs b/src/parse/psy_light.rs index 40ebc6f..689e233 100644 --- a/src/parse/psy_light.rs +++ b/src/parse/psy_light.rs @@ -6,7 +6,7 @@ use nom::{combinator::all_consuming, sequence::tuple, IResult}; use kioku::Arena; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use crate::{ light::{DistantDiskLight, RectangleLight, SphereLight}, diff --git a/src/parse/psy_mesh_surface.rs b/src/parse/psy_mesh_surface.rs index 6c8e167..7d9b019 100644 --- a/src/parse/psy_mesh_surface.rs +++ b/src/parse/psy_mesh_surface.rs @@ -6,7 +6,7 @@ use nom::{sequence::tuple, IResult}; use kioku::Arena; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use crate::{ math::{Normal, Point}, diff --git a/src/parse/psy_surface_shader.rs b/src/parse/psy_surface_shader.rs index dec28cd..e2db366 100644 --- a/src/parse/psy_surface_shader.rs +++ b/src/parse/psy_surface_shader.rs @@ -6,7 +6,7 @@ use nom::{combinator::all_consuming, IResult}; use kioku::Arena; -use data_tree::{reader::DataTreeReader, Event}; +use data_tree::{DataTreeReader, Event}; use crate::shading::{SimpleSurfaceShader, SurfaceShader}; diff --git a/sub_crates/data_tree/src/lib.rs b/sub_crates/data_tree/src/lib.rs index 09e45f9..48e93cf 100644 --- a/sub_crates/data_tree/src/lib.rs +++ b/sub_crates/data_tree/src/lib.rs @@ -1,25 +1,6 @@ -#![allow(dead_code)] +mod parse; -pub mod reader; - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum Error { - ExpectedTypeNameOrClose(usize), - ExpectedOpenOrIdent(usize), - ExpectedOpen(usize), - UnexpectedClose(usize), - UnexpectedIdent(usize), -} - -impl std::error::Error for Error {} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{:?}", self) - } -} - -//--------------------------------------------------------------------- +use parse::{ParseError, ParseEvent, Parser}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Event<'a> { @@ -36,874 +17,191 @@ pub enum Event<'a> { contents: &'a str, byte_offset: usize, }, - NeedMoreInput, - ValidEnd, // All data so far is consumed, and this is a - // valid place to finish the parse. + EOF, } -impl<'a> Event<'a> { - fn add_to_byte_offset(&self, offset: usize) -> Event<'a> { - match *self { - Event::InnerOpen { - type_name, - ident, - byte_offset, - } => Event::InnerOpen { - type_name: type_name, - ident: ident, - byte_offset: byte_offset + offset, - }, - Event::InnerClose { byte_offset } => Event::InnerClose { - byte_offset: byte_offset + offset, - }, - Event::Leaf { - type_name, - contents, - byte_offset, - } => Event::Leaf { - type_name: type_name, - contents: contents, - byte_offset: byte_offset + offset, - }, - Event::NeedMoreInput => *self, - Event::ValidEnd => *self, - } - } -} - -//--------------------------------------------------------------------- +//---------------------------------------------------------------------------- #[derive(Debug)] -pub struct Parser { - buffer: String, - buf_consumed_idx: usize, - total_bytes_processed: usize, - inner_opens: usize, +pub enum Error { + ExpectedTypeNameOrClose(usize), + ExpectedOpenOrIdent(usize), + ExpectedOpen(usize), + UnexpectedClose(usize), + UnexpectedIdent(usize), + UnexpectedEOF, + IO(std::io::Error), } -impl Parser { - pub fn new() -> Parser { - Parser { - buffer: String::with_capacity(1024), - buf_consumed_idx: 0, - total_bytes_processed: 0, - inner_opens: 0, +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} + +impl From for Error { + fn from(e: ParseError) -> Self { + match e { + ParseError::ExpectedTypeNameOrClose(byte_offset) => { + Error::ExpectedTypeNameOrClose(byte_offset) + } + ParseError::ExpectedOpenOrIdent(byte_offset) => Error::ExpectedOpenOrIdent(byte_offset), + ParseError::ExpectedOpen(byte_offset) => Error::ExpectedOpen(byte_offset), + ParseError::UnexpectedClose(byte_offset) => Error::UnexpectedClose(byte_offset), + ParseError::UnexpectedIdent(byte_offset) => Error::UnexpectedIdent(byte_offset), } } +} - pub fn push_data(&mut self, text: &str) { - // Remove any consumed data. - if self.buf_consumed_idx > 0 { - self.buffer.replace_range(..self.buf_consumed_idx, ""); - self.buf_consumed_idx = 0; +impl From for Error { + fn from(e: std::io::Error) -> Self { + Error::IO(e) + } +} + +//------------------------------------------------------------- + +#[derive(Debug)] +pub struct DataTreeReader { + parser: Parser, + reader: R, + buf: String, + eof: bool, +} + +impl DataTreeReader { + pub fn new(reader: R) -> Self { + Self { + parser: Parser::new(), + reader: reader, + buf: String::new(), + eof: false, } - - // Add the new data. - self.buffer.push_str(text); } pub fn next_event<'a>(&'a mut self) -> Result, Error> { - // Remove any consumed data. - if self.buf_consumed_idx > 0 { - self.buffer.replace_range(..self.buf_consumed_idx, ""); - self.buf_consumed_idx = 0; - } + loop { + let valid_end = match self.parser.next_event()? { + ParseEvent::ValidEnd => true, + ParseEvent::NeedMoreInput => false, - // Try to parse an event from the valid prefix. - match try_parse_event(&self.buffer) { - EventParse::Ok(event, bytes_consumed) => { - // Update internal state. - if let Event::InnerOpen { .. } = event { - self.inner_opens += 1; - } else if let Event::InnerClose { byte_offset, .. } = event { - if self.inner_opens == 0 { - return Err(Error::UnexpectedClose( - byte_offset + self.total_bytes_processed, - )); - } else { - self.inner_opens -= 1; - } + // The transmutes below are because the borrow checker is + // over-conservative about this. It thinks + // the liftime isn't valid, but since we aren't + // mutating self after returning (and in fact + // can't because of the borrow) there's no way for + // the references in this to become invalid. + ParseEvent::InnerOpen { + type_name, + ident, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerOpen { + type_name, + ident, + byte_offset, + }) + }); } - self.buf_consumed_idx += bytes_consumed; - self.total_bytes_processed += bytes_consumed; - - Ok(event.add_to_byte_offset(self.total_bytes_processed - self.buf_consumed_idx)) - } - EventParse::ReachedEnd => { - // If we consumed all data, then if all nodes are properly - // closed we're done. Otherwise we need more input. - if self.inner_opens == 0 { - Ok(Event::ValidEnd) - } else { - Ok(Event::NeedMoreInput) + ParseEvent::InnerClose { byte_offset } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerClose { byte_offset }) + }); } - } - EventParse::IncompleteData => Ok(Event::NeedMoreInput), + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::Leaf { + type_name, + contents, + byte_offset, + }) + }); + } + }; - // Hard errors. - EventParse::ExpectedTypeNameOrInnerClose(byte_offset) => Err( - Error::ExpectedTypeNameOrClose(byte_offset + self.total_bytes_processed), - ), - EventParse::ExpectedOpenOrIdent(byte_offset) => Err(Error::ExpectedOpenOrIdent( - byte_offset + self.total_bytes_processed, - )), - EventParse::ExpectedInnerOpen(byte_offset) => Err(Error::ExpectedOpen( - byte_offset + self.total_bytes_processed, - )), - EventParse::UnexpectedIdent(byte_offset) => Err(Error::UnexpectedIdent( - byte_offset + self.total_bytes_processed, - )), + if !self.eof { + self.buf.clear(); + let read = self.reader.read_line(&mut self.buf)?; + self.parser.push_data(&self.buf); + if read == 0 { + self.eof = true; + } + } else if !valid_end { + return Err(Error::UnexpectedEOF); + } else { + return Ok(Event::EOF); + } } } pub fn peek_event<'a>(&'a mut self) -> Result, Error> { - // Remove any consumed data. - if self.buf_consumed_idx > 0 { - self.buffer.replace_range(..self.buf_consumed_idx, ""); - self.buf_consumed_idx = 0; - } + loop { + let valid_end = match self.parser.peek_event()? { + ParseEvent::ValidEnd => true, + ParseEvent::NeedMoreInput => false, - // Try to parse an event from the valid prefix. - match try_parse_event(&self.buffer) { - EventParse::Ok(event, _bytes_consumed) => { - if let Event::InnerClose { byte_offset, .. } = event { - if self.inner_opens == 0 { - return Err(Error::UnexpectedClose( - byte_offset + self.total_bytes_processed, - )); - } + // The transmutes below are because the borrow checker is + // over-conservative about this. It thinks + // the liftime isn't valid, but since we aren't + // mutating self after returning (and in fact + // can't because of the borrow) there's no way for + // the references in this to become invalid. + ParseEvent::InnerOpen { + type_name, + ident, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerOpen { + type_name, + ident, + byte_offset, + }) + }); } - Ok(event.add_to_byte_offset(self.total_bytes_processed)) - } - EventParse::ReachedEnd => { - // If we consumed all data, then if all nodes are properly - // closed we're done. Otherwise we need more input. - if self.inner_opens == 0 { - Ok(Event::ValidEnd) - } else { - Ok(Event::NeedMoreInput) + ParseEvent::InnerClose { byte_offset } => { + return Ok(unsafe { + std::mem::transmute::(Event::InnerClose { byte_offset }) + }); } - } - EventParse::IncompleteData => Ok(Event::NeedMoreInput), + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => { + return Ok(unsafe { + std::mem::transmute::(Event::Leaf { + type_name, + contents, + byte_offset, + }) + }); + } + }; - // Hard errors. - EventParse::ExpectedTypeNameOrInnerClose(byte_offset) => Err( - Error::ExpectedTypeNameOrClose(byte_offset + self.total_bytes_processed), - ), - EventParse::ExpectedOpenOrIdent(byte_offset) => Err(Error::ExpectedOpenOrIdent( - byte_offset + self.total_bytes_processed, - )), - EventParse::ExpectedInnerOpen(byte_offset) => Err(Error::ExpectedOpen( - byte_offset + self.total_bytes_processed, - )), - EventParse::UnexpectedIdent(byte_offset) => Err(Error::UnexpectedIdent( - byte_offset + self.total_bytes_processed, - )), + if !self.eof { + self.buf.clear(); + let read = self.reader.read_line(&mut self.buf)?; + self.parser.push_data(&self.buf); + if read == 0 { + self.eof = true; + } + } else if !valid_end { + return Err(Error::UnexpectedEOF); + } else { + return Ok(Event::EOF); + } } } pub fn byte_offset(&self) -> usize { - self.total_bytes_processed + self.buf_consumed_idx - } -} - -//-------------------------------------------------------------------------- - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum EventParse<'a> { - Ok(Event<'a>, usize), // (event, bytes consumed) - ReachedEnd, // Reached the end of the buffer in a valid state, with no event. - IncompleteData, // Need more data to parse. - - // Errors. - ExpectedTypeNameOrInnerClose(usize), - ExpectedOpenOrIdent(usize), - ExpectedInnerOpen(usize), - UnexpectedIdent(usize), -} - -fn try_parse_event<'a>(text: &'a str) -> EventParse<'a> { - // Remove leading whitespace and comments. - let mut source_text = skip_ws_and_comments((0, text)); - let start_idx = source_text.0; - - // First token. - let type_name = match next_token(source_text) { - // Type name, record and continue. - (Token::TypeName(tn), tail) => { - source_text = tail; - tn - } - - // Closing tag for inner node. Return. - (Token::CloseInner, tail) => { - return EventParse::Ok( - Event::InnerClose { - byte_offset: start_idx, - }, - tail.0, - ); - } - - // We consumed everything as whitespace and/or - // comments. Return. - (Token::End, _) => { - return EventParse::ReachedEnd; - } - - // Invalid. - _ => return EventParse::ExpectedTypeNameOrInnerClose(start_idx), - }; - - // Skip whitespace and comments to get the start of the - // (possible) ident, for use later in error. - source_text = skip_ws_and_comments(source_text); - let ident_start_idx = source_text.0; - - // Possible second token: optional ident. - let ident = if let (Token::Ident(id), tail) = next_token(source_text) { - source_text = tail; - Some(id) - } else { - None - }; - - // Skip whitespace and comments to get the start of the - // where there should be an open tag, for use later in error. - source_text = skip_ws_and_comments(source_text); - let open_start_idx = source_text.0; - - // Last part of the event. - match next_token(source_text) { - // Begining of an inner node. - (Token::OpenInner, tail) => { - return EventParse::Ok( - Event::InnerOpen { - type_name: type_name, - ident: ident, - byte_offset: start_idx, - }, - tail.0, - ); - } - - // Try to parse entire leaf node. - (Token::OpenLeaf, tail) => { - if ident != None { - return EventParse::UnexpectedIdent(ident_start_idx); - } - - // Get contents. - let (contents, tail2) = parse_leaf_content(tail); - source_text = tail2; - - // Try to get closing tag. - match next_token(source_text) { - // If it's a leaf closing tag, we're done! - // Return the leaf event. - (Token::CloseLeaf, tail) => { - return EventParse::Ok( - Event::Leaf { - type_name: type_name, - contents: contents, - byte_offset: start_idx, - }, - tail.0, - ); - } - - // Otherwise... - _ => { - if source_text.1.is_empty() { - // If there's no text left, we're just incomplete. - return EventParse::IncompleteData; - } else { - // Otherwise, this would be a parse error... - // except that this shouldn't be reachable, - // since everything should be consumable for - // leaf content up until a close tag. - unreachable!("Expected leaf close tag.") - } - } - } - } - - // We consumed everything else as whitespace - // and/or comments, so we're incomplete. Return. - (Token::End, _) => { - return EventParse::IncompleteData; - } - - // Invalid. - _ => { - if ident == None { - return EventParse::ExpectedOpenOrIdent(open_start_idx); - } else { - return EventParse::ExpectedInnerOpen(open_start_idx); - } - } - } -} - -fn parse_leaf_content(source_text: (usize, &str)) -> (&str, (usize, &str)) { - let mut si = 1; - let mut escaped = false; - let mut reached_end = true; - for (i, c) in source_text.1.char_indices() { - si = i; - if escaped { - escaped = false; - } else if c == '\\' { - escaped = true; - } else if c == ']' { - reached_end = false; - break; - } - } - - if reached_end { - si = source_text.1.len(); - } - - return ( - &source_text.1[0..si], - (source_text.0 + si, &source_text.1[si..]), - ); -} - -//-------------------------------------------------------------------------- - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum Token<'a> { - OpenInner, - CloseInner, - OpenLeaf, - CloseLeaf, - TypeName(&'a str), - Ident(&'a str), - End, - Unknown, -} - -fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) { - let text1 = skip_ws_and_comments(source_text); - - if let Some(c) = text1.1.chars().nth(0) { - let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]); - match c { - '{' => { - return (Token::OpenInner, text2); - } - - '}' => { - return (Token::CloseInner, text2); - } - - '[' => { - return (Token::OpenLeaf, text2); - } - - ']' => { - return (Token::CloseLeaf, text2); - } - - '$' => { - // Parse name - let mut si = 1; - let mut escaped = false; - let mut reached_end = true; - for (i, c) in text1.1.char_indices().skip(1) { - si = i; - if escaped { - escaped = false; - } else if c == '\\' { - escaped = true; - } else if !is_ident_char(c) { - reached_end = false; - break; - } - } - - if reached_end { - si = text1.1.len(); - } - - return ( - Token::Ident(&text1.1[0..si]), - (text1.0 + si, &text1.1[si..]), - ); - } - - _ => { - if is_ident_char(c) { - // Parse type - let mut si = 0; - let mut reached_end = true; - for (i, c) in text1.1.char_indices() { - si = i; - if !is_ident_char(c) { - reached_end = false; - break; - } - } - - if reached_end { - si = text1.1.len(); - } - - return ( - Token::TypeName(&text1.1[0..si]), - (text1.0 + si, &text1.1[si..]), - ); - } - } - } - } else { - return (Token::End, text1); - } - - return (Token::Unknown, text1); -} - -fn is_ws(c: char) -> bool { - match c { - '\n' | '\r' | '\t' | ' ' => true, - _ => false, - } -} - -fn is_nl(c: char) -> bool { - match c { - '\n' => true, - _ => false, - } -} - -fn is_reserved_char(c: char) -> bool { - match c { - '{' | '}' | '[' | ']' | '$' | '#' | '\\' => true, - _ => false, - } -} - -fn is_ident_char(c: char) -> bool { - // Anything that isn't whitespace or a reserved character - !is_ws(c) && !is_reserved_char(c) -} - -fn skip_ws(text: &str) -> &str { - let mut si = 0; - let mut reached_end = true; - for (i, c) in text.char_indices() { - si = i; - if !is_ws(c) { - reached_end = false; - break; - } - } - - if reached_end { - si = text.len(); - } - - return &text[si..]; -} - -fn skip_comment(text: &str) -> &str { - let mut si = 0; - if Some('#') == text.chars().nth(0) { - let mut reached_end = true; - for (i, c) in text.char_indices() { - si = i; - if is_nl(c) { - reached_end = false; - break; - } - } - - if reached_end { - si = text.len(); - } - } - - return &text[si..]; -} - -fn skip_ws_and_comments(text: (usize, &str)) -> (usize, &str) { - let mut remaining_text = text.1; - - loop { - let tmp = skip_comment(skip_ws(remaining_text)); - - if tmp.len() == remaining_text.len() { - break; - } else { - remaining_text = tmp; - } - } - - let offset = text.0 + text.1.len() - remaining_text.len(); - return (offset, remaining_text); -} - -//-------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - use super::{next_token, Token}; - - #[test] - fn tokenize_1() { - let input = (0, "Thing"); - - assert_eq!(next_token(input), (Token::TypeName("Thing"), (5, ""))); - } - - #[test] - fn tokenize_2() { - let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing"); - - assert_eq!(next_token(input), (Token::TypeName("Thing"), (41, ""))); - } - - #[test] - fn tokenize_3() { - let input1 = (0, " Thing { }"); - let (token1, input2) = next_token(input1); - let (token2, input3) = next_token(input2); - let (token3, input4) = next_token(input3); - - assert_eq!((token1, input2.1), (Token::TypeName("Thing"), " { }")); - assert_eq!((token2, input3.1), (Token::OpenInner, " }")); - assert_eq!((token3, input4.1), (Token::CloseInner, "")); - } - - #[test] - fn tokenize_4() { - let input = (0, " $hi_there "); - - assert_eq!(next_token(input), (Token::Ident("$hi_there"), (10, " "))); - } - - #[test] - fn tokenize_5() { - let input = (0, " $hi\\ t\\#he\\[re "); - - assert_eq!( - next_token(input), - (Token::Ident("$hi\\ t\\#he\\[re"), (15, " "),) - ); - } - - #[test] - fn tokenize_6() { - let input1 = (0, " $hi the[re"); - let (token1, input2) = next_token(input1); - let (token2, input3) = next_token(input2); - let (token3, input4) = next_token(input3); - let (token4, input5) = next_token(input4); - let (token5, input6) = next_token(input5); - - assert_eq!((token1, input2), (Token::Ident("$hi"), (4, " the[re"))); - assert_eq!((token2, input3), (Token::TypeName("the"), (8, "[re"))); - assert_eq!((token3, input4), (Token::OpenLeaf, (9, "re"))); - assert_eq!((token4, input5), (Token::TypeName("re"), (11, ""))); - assert_eq!((token5, input6), (Token::End, (11, ""))); - } - - #[test] - fn tokenize_7() { - let input1 = (0, "Thing $yar { # A comment\n\tThing2 []\n}"); - let (token1, input2) = next_token(input1); - let (token2, input3) = next_token(input2); - let (token3, input4) = next_token(input3); - let (token4, input5) = next_token(input4); - let (token5, input6) = next_token(input5); - let (token6, input7) = next_token(input6); - let (token7, input8) = next_token(input7); - let (token8, input9) = next_token(input8); - - assert_eq!( - (token1, input2), - ( - Token::TypeName("Thing"), - (5, " $yar { # A comment\n\tThing2 []\n}",) - ) - ); - assert_eq!( - (token2, input3), - ( - Token::Ident("$yar"), - (10, " { # A comment\n\tThing2 []\n}",) - ) - ); - assert_eq!( - (token3, input4), - (Token::OpenInner, (12, " # A comment\n\tThing2 []\n}",)) - ); - assert_eq!( - (token4, input5), - (Token::TypeName("Thing2"), (32, " []\n}")) - ); - assert_eq!((token5, input6), (Token::OpenLeaf, (34, "]\n}"))); - assert_eq!((token6, input7), (Token::CloseLeaf, (35, "\n}"))); - assert_eq!((token7, input8), (Token::CloseInner, (37, ""))); - assert_eq!((token8, input9), (Token::End, (37, ""))); - } - - #[test] - fn try_parse_event_01() { - assert_eq!(try_parse_event("H"), EventParse::IncompleteData,); - } - - #[test] - fn try_parse_event_02() { - assert_eq!(try_parse_event("Hello $"), EventParse::IncompleteData,); - } - - #[test] - fn try_parse_event_03() { - assert_eq!(try_parse_event("Hello $id "), EventParse::IncompleteData,); - } - - #[test] - fn try_parse_event_04() { - assert_eq!( - try_parse_event("Hello $id {"), - EventParse::Ok( - Event::InnerOpen { - type_name: "Hello", - ident: Some("$id"), - byte_offset: 0, - }, - 11 - ), - ); - } - - #[test] - fn try_parse_event_05() { - assert_eq!( - try_parse_event(" Hello $id {"), - EventParse::Ok( - Event::InnerOpen { - type_name: "Hello", - ident: Some("$id"), - byte_offset: 2, - }, - 13 - ), - ); - } - - #[test] - fn try_parse_event_06() { - assert_eq!( - try_parse_event("Hello {"), - EventParse::Ok( - Event::InnerOpen { - type_name: "Hello", - ident: None, - byte_offset: 0, - }, - 7 - ), - ); - } - - #[test] - fn try_parse_event_07() { - assert_eq!( - try_parse_event("Hello { "), - EventParse::Ok( - Event::InnerOpen { - type_name: "Hello", - ident: None, - byte_offset: 0, - }, - 7 - ), - ); - } - - #[test] - fn try_parse_event_08() { - assert_eq!(try_parse_event("Hello ["), EventParse::IncompleteData,); - } - - #[test] - fn try_parse_event_09() { - assert_eq!( - try_parse_event("Hello [some contents"), - EventParse::IncompleteData, - ); - } - - #[test] - fn try_parse_event_10() { - assert_eq!( - try_parse_event("Hello [some contents]"), - EventParse::Ok( - Event::Leaf { - type_name: "Hello", - contents: "some contents", - byte_offset: 0, - }, - 21 - ), - ); - } - - #[test] - fn try_parse_event_11() { - assert_eq!( - try_parse_event("Hello [some contents] "), - EventParse::Ok( - Event::Leaf { - type_name: "Hello", - contents: "some contents", - byte_offset: 0, - }, - 21 - ), - ); - } - - #[test] - fn try_parse_event_12() { - assert_eq!( - try_parse_event(" # A comment\n\n "), - EventParse::ReachedEnd, - ); - } - - #[test] - fn parser_01() { - let mut parser = Parser::new(); - - parser.push_data("Hello"); - assert_eq!(parser.next_event(), Ok(Event::NeedMoreInput)); - - parser.push_data("{"); - assert_eq!( - parser.next_event(), - Ok(Event::InnerOpen { - type_name: "Hello", - ident: None, - byte_offset: 0, - }) - ); - - assert_eq!(parser.next_event(), Ok(Event::NeedMoreInput)); - - parser.push_data("}"); - assert_eq!( - parser.next_event(), - Ok(Event::InnerClose { byte_offset: 6 }) - ); - - assert_eq!(parser.next_event(), Ok(Event::ValidEnd)); - } - - #[test] - fn parser_02() { - let mut parser = Parser::new(); - - parser.push_data("Hello"); - assert_eq!(parser.next_event(), Ok(Event::NeedMoreInput)); - - parser.push_data("["); - assert_eq!(parser.next_event(), Ok(Event::NeedMoreInput)); - - parser.push_data("1.0 2.0 3."); - assert_eq!(parser.next_event(), Ok(Event::NeedMoreInput)); - - parser.push_data("0]"); - assert_eq!( - parser.next_event(), - Ok(Event::Leaf { - type_name: "Hello", - contents: "1.0 2.0 3.0", - byte_offset: 0, - }) - ); - - assert_eq!(parser.next_event(), Ok(Event::ValidEnd)); - } - - #[test] - fn parser_03() { - let mut parser = Parser::new(); - - parser.push_data("Hello $big_boy { World [1.0 2.0 3.0] }"); - - assert_eq!( - parser.next_event(), - Ok(Event::InnerOpen { - type_name: "Hello", - ident: Some("$big_boy"), - byte_offset: 0, - }) - ); - - assert_eq!( - parser.next_event(), - Ok(Event::Leaf { - type_name: "World", - contents: "1.0 2.0 3.0", - byte_offset: 17, - }) - ); - - assert_eq!( - parser.next_event(), - Ok(Event::InnerClose { byte_offset: 37 }) - ); - - // Make sure repeated calls are stable. - assert_eq!(parser.next_event(), Ok(Event::ValidEnd)); - assert_eq!(parser.next_event(), Ok(Event::ValidEnd)); - assert_eq!(parser.next_event(), Ok(Event::ValidEnd)); - } - - #[test] - fn parser_04() { - let mut parser = Parser::new(); - - parser.push_data("$Hello"); - assert_eq!(parser.next_event(), Err(Error::ExpectedTypeNameOrClose(0))); - } - - #[test] - fn parser_05() { - let mut parser = Parser::new(); - - parser.push_data("Hello]"); - assert_eq!(parser.next_event(), Err(Error::ExpectedOpenOrIdent(5))); - } - - #[test] - fn parser_06() { - let mut parser = Parser::new(); - - parser.push_data("Hello}"); - assert_eq!(parser.next_event(), Err(Error::ExpectedOpenOrIdent(5))); - } - - #[test] - fn parser_07() { - let mut parser = Parser::new(); - - parser.push_data("Hello $yar ["); - assert_eq!(parser.next_event(), Err(Error::UnexpectedIdent(6))); - } - - #[test] - fn parser_08() { - let mut parser = Parser::new(); - - parser.push_data("}"); - assert_eq!(parser.next_event(), Err(Error::UnexpectedClose(0))); + self.parser.byte_offset() } } diff --git a/sub_crates/data_tree/src/parse.rs b/sub_crates/data_tree/src/parse.rs new file mode 100644 index 0000000..9290a2a --- /dev/null +++ b/sub_crates/data_tree/src/parse.rs @@ -0,0 +1,911 @@ +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ParseError { + ExpectedTypeNameOrClose(usize), + ExpectedOpenOrIdent(usize), + ExpectedOpen(usize), + UnexpectedClose(usize), + UnexpectedIdent(usize), +} + +impl std::error::Error for ParseError {} + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} + +//--------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ParseEvent<'a> { + InnerOpen { + type_name: &'a str, + ident: Option<&'a str>, + byte_offset: usize, + }, + InnerClose { + byte_offset: usize, + }, + Leaf { + type_name: &'a str, + contents: &'a str, + byte_offset: usize, + }, + NeedMoreInput, + ValidEnd, // All data so far is consumed, and this is a + // valid place to finish the parse. +} + +impl<'a> ParseEvent<'a> { + fn add_to_byte_offset(&self, offset: usize) -> ParseEvent<'a> { + match *self { + ParseEvent::InnerOpen { + type_name, + ident, + byte_offset, + } => ParseEvent::InnerOpen { + type_name: type_name, + ident: ident, + byte_offset: byte_offset + offset, + }, + ParseEvent::InnerClose { byte_offset } => ParseEvent::InnerClose { + byte_offset: byte_offset + offset, + }, + ParseEvent::Leaf { + type_name, + contents, + byte_offset, + } => ParseEvent::Leaf { + type_name: type_name, + contents: contents, + byte_offset: byte_offset + offset, + }, + ParseEvent::NeedMoreInput => *self, + ParseEvent::ValidEnd => *self, + } + } +} + +//--------------------------------------------------------------------- + +#[derive(Debug)] +pub struct Parser { + buffer: String, + buf_consumed_idx: usize, + total_bytes_processed: usize, + inner_opens: usize, +} + +impl Parser { + pub fn new() -> Parser { + Parser { + buffer: String::with_capacity(1024), + buf_consumed_idx: 0, + total_bytes_processed: 0, + inner_opens: 0, + } + } + + pub fn push_data(&mut self, text: &str) { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Add the new data. + self.buffer.push_str(text); + } + + pub fn next_event<'a>(&'a mut self) -> Result, ParseError> { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Try to parse an event from the valid prefix. + match try_parse_event(&self.buffer) { + ParseEventParse::Ok(event, bytes_consumed) => { + // Update internal state. + if let ParseEvent::InnerOpen { .. } = event { + self.inner_opens += 1; + } else if let ParseEvent::InnerClose { byte_offset, .. } = event { + if self.inner_opens == 0 { + return Err(ParseError::UnexpectedClose( + byte_offset + self.total_bytes_processed, + )); + } else { + self.inner_opens -= 1; + } + } + self.buf_consumed_idx += bytes_consumed; + self.total_bytes_processed += bytes_consumed; + + Ok(event.add_to_byte_offset(self.total_bytes_processed - self.buf_consumed_idx)) + } + ParseEventParse::ReachedEnd => { + // If we consumed all data, then if all nodes are properly + // closed we're done. Otherwise we need more input. + if self.inner_opens == 0 { + Ok(ParseEvent::ValidEnd) + } else { + Ok(ParseEvent::NeedMoreInput) + } + } + ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput), + + // Hard errors. + ParseEventParse::ExpectedTypeNameOrInnerClose(byte_offset) => Err( + ParseError::ExpectedTypeNameOrClose(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedOpenOrIdent(byte_offset) => Err( + ParseError::ExpectedOpenOrIdent(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedInnerOpen(byte_offset) => Err(ParseError::ExpectedOpen( + byte_offset + self.total_bytes_processed, + )), + ParseEventParse::UnexpectedIdent(byte_offset) => Err(ParseError::UnexpectedIdent( + byte_offset + self.total_bytes_processed, + )), + } + } + + pub fn peek_event<'a>(&'a mut self) -> Result, ParseError> { + // Remove any consumed data. + if self.buf_consumed_idx > 0 { + self.buffer.replace_range(..self.buf_consumed_idx, ""); + self.buf_consumed_idx = 0; + } + + // Try to parse an event from the valid prefix. + match try_parse_event(&self.buffer) { + ParseEventParse::Ok(event, _bytes_consumed) => { + if let ParseEvent::InnerClose { byte_offset, .. } = event { + if self.inner_opens == 0 { + return Err(ParseError::UnexpectedClose( + byte_offset + self.total_bytes_processed, + )); + } + } + Ok(event.add_to_byte_offset(self.total_bytes_processed)) + } + ParseEventParse::ReachedEnd => { + // If we consumed all data, then if all nodes are properly + // closed we're done. Otherwise we need more input. + if self.inner_opens == 0 { + Ok(ParseEvent::ValidEnd) + } else { + Ok(ParseEvent::NeedMoreInput) + } + } + ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput), + + // Hard errors. + ParseEventParse::ExpectedTypeNameOrInnerClose(byte_offset) => Err( + ParseError::ExpectedTypeNameOrClose(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedOpenOrIdent(byte_offset) => Err( + ParseError::ExpectedOpenOrIdent(byte_offset + self.total_bytes_processed), + ), + ParseEventParse::ExpectedInnerOpen(byte_offset) => Err(ParseError::ExpectedOpen( + byte_offset + self.total_bytes_processed, + )), + ParseEventParse::UnexpectedIdent(byte_offset) => Err(ParseError::UnexpectedIdent( + byte_offset + self.total_bytes_processed, + )), + } + } + + pub fn byte_offset(&self) -> usize { + self.total_bytes_processed + self.buf_consumed_idx + } +} + +//-------------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum ParseEventParse<'a> { + Ok(ParseEvent<'a>, usize), // (event, bytes consumed) + ReachedEnd, // Reached the end of the buffer in a valid state, with no event. + IncompleteData, // Need more data to parse. + + // ParseErrors. + ExpectedTypeNameOrInnerClose(usize), + ExpectedOpenOrIdent(usize), + ExpectedInnerOpen(usize), + UnexpectedIdent(usize), +} + +fn try_parse_event<'a>(text: &'a str) -> ParseEventParse<'a> { + // Remove leading whitespace and comments. + let mut source_text = skip_ws_and_comments((0, text)); + let start_idx = source_text.0; + + // First token. + let type_name = match next_token(source_text) { + // Type name, record and continue. + (Token::TypeName(tn), tail) => { + source_text = tail; + tn + } + + // Closing tag for inner node. Return. + (Token::CloseInner, tail) => { + return ParseEventParse::Ok( + ParseEvent::InnerClose { + byte_offset: start_idx, + }, + tail.0, + ); + } + + // We consumed everything as whitespace and/or + // comments. Return. + (Token::End, _) => { + return ParseEventParse::ReachedEnd; + } + + // Invalid. + _ => return ParseEventParse::ExpectedTypeNameOrInnerClose(start_idx), + }; + + // Skip whitespace and comments to get the start of the + // (possible) ident, for use later in error. + source_text = skip_ws_and_comments(source_text); + let ident_start_idx = source_text.0; + + // Possible second token: optional ident. + let ident = if let (Token::Ident(id), tail) = next_token(source_text) { + source_text = tail; + Some(id) + } else { + None + }; + + // Skip whitespace and comments to get the start of the + // where there should be an open tag, for use later in error. + source_text = skip_ws_and_comments(source_text); + let open_start_idx = source_text.0; + + // Last part of the event. + match next_token(source_text) { + // Begining of an inner node. + (Token::OpenInner, tail) => { + return ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: type_name, + ident: ident, + byte_offset: start_idx, + }, + tail.0, + ); + } + + // Try to parse entire leaf node. + (Token::OpenLeaf, tail) => { + if ident != None { + return ParseEventParse::UnexpectedIdent(ident_start_idx); + } + + // Get contents. + let (contents, tail2) = parse_leaf_content(tail); + source_text = tail2; + + // Try to get closing tag. + match next_token(source_text) { + // If it's a leaf closing tag, we're done! + // Return the leaf event. + (Token::CloseLeaf, tail) => { + return ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: type_name, + contents: contents, + byte_offset: start_idx, + }, + tail.0, + ); + } + + // Otherwise... + _ => { + if source_text.1.is_empty() { + // If there's no text left, we're just incomplete. + return ParseEventParse::IncompleteData; + } else { + // Otherwise, this would be a parse error... + // except that this shouldn't be reachable, + // since everything should be consumable for + // leaf content up until a close tag. + unreachable!("Expected leaf close tag.") + } + } + } + } + + // We consumed everything else as whitespace + // and/or comments, so we're incomplete. Return. + (Token::End, _) => { + return ParseEventParse::IncompleteData; + } + + // Invalid. + _ => { + if ident == None { + return ParseEventParse::ExpectedOpenOrIdent(open_start_idx); + } else { + return ParseEventParse::ExpectedInnerOpen(open_start_idx); + } + } + } +} + +fn parse_leaf_content(source_text: (usize, &str)) -> (&str, (usize, &str)) { + let mut si = 1; + let mut escaped = false; + let mut reached_end = true; + for (i, c) in source_text.1.char_indices() { + si = i; + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == ']' { + reached_end = false; + break; + } + } + + if reached_end { + si = source_text.1.len(); + } + + return ( + &source_text.1[0..si], + (source_text.0 + si, &source_text.1[si..]), + ); +} + +//-------------------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum Token<'a> { + OpenInner, + CloseInner, + OpenLeaf, + CloseLeaf, + TypeName(&'a str), + Ident(&'a str), + End, + Unknown, +} + +fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) { + let text1 = skip_ws_and_comments(source_text); + + if let Some(c) = text1.1.chars().nth(0) { + let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]); + match c { + '{' => { + return (Token::OpenInner, text2); + } + + '}' => { + return (Token::CloseInner, text2); + } + + '[' => { + return (Token::OpenLeaf, text2); + } + + ']' => { + return (Token::CloseLeaf, text2); + } + + '$' => { + // Parse name + let mut si = 1; + let mut escaped = false; + let mut reached_end = true; + for (i, c) in text1.1.char_indices().skip(1) { + si = i; + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if !is_ident_char(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text1.1.len(); + } + + return ( + Token::Ident(&text1.1[0..si]), + (text1.0 + si, &text1.1[si..]), + ); + } + + _ => { + if is_ident_char(c) { + // Parse type + let mut si = 0; + let mut reached_end = true; + for (i, c) in text1.1.char_indices() { + si = i; + if !is_ident_char(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text1.1.len(); + } + + return ( + Token::TypeName(&text1.1[0..si]), + (text1.0 + si, &text1.1[si..]), + ); + } + } + } + } else { + return (Token::End, text1); + } + + return (Token::Unknown, text1); +} + +fn is_ws(c: char) -> bool { + match c { + '\n' | '\r' | '\t' | ' ' => true, + _ => false, + } +} + +fn is_nl(c: char) -> bool { + match c { + '\n' => true, + _ => false, + } +} + +fn is_reserved_char(c: char) -> bool { + match c { + '{' | '}' | '[' | ']' | '$' | '#' | '\\' => true, + _ => false, + } +} + +fn is_ident_char(c: char) -> bool { + // Anything that isn't whitespace or a reserved character + !is_ws(c) && !is_reserved_char(c) +} + +fn skip_ws(text: &str) -> &str { + let mut si = 0; + let mut reached_end = true; + for (i, c) in text.char_indices() { + si = i; + if !is_ws(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text.len(); + } + + return &text[si..]; +} + +fn skip_comment(text: &str) -> &str { + let mut si = 0; + if Some('#') == text.chars().nth(0) { + let mut reached_end = true; + for (i, c) in text.char_indices() { + si = i; + if is_nl(c) { + reached_end = false; + break; + } + } + + if reached_end { + si = text.len(); + } + } + + return &text[si..]; +} + +fn skip_ws_and_comments(text: (usize, &str)) -> (usize, &str) { + let mut remaining_text = text.1; + + loop { + let tmp = skip_comment(skip_ws(remaining_text)); + + if tmp.len() == remaining_text.len() { + break; + } else { + remaining_text = tmp; + } + } + + let offset = text.0 + text.1.len() - remaining_text.len(); + return (offset, remaining_text); +} + +//-------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use super::{next_token, Token}; + + #[test] + fn tokenize_1() { + let input = (0, "Thing"); + + assert_eq!(next_token(input), (Token::TypeName("Thing"), (5, ""))); + } + + #[test] + fn tokenize_2() { + let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing"); + + assert_eq!(next_token(input), (Token::TypeName("Thing"), (41, ""))); + } + + #[test] + fn tokenize_3() { + let input1 = (0, " Thing { }"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + + assert_eq!((token1, input2.1), (Token::TypeName("Thing"), " { }")); + assert_eq!((token2, input3.1), (Token::OpenInner, " }")); + assert_eq!((token3, input4.1), (Token::CloseInner, "")); + } + + #[test] + fn tokenize_4() { + let input = (0, " $hi_there "); + + assert_eq!(next_token(input), (Token::Ident("$hi_there"), (10, " "))); + } + + #[test] + fn tokenize_5() { + let input = (0, " $hi\\ t\\#he\\[re "); + + assert_eq!( + next_token(input), + (Token::Ident("$hi\\ t\\#he\\[re"), (15, " "),) + ); + } + + #[test] + fn tokenize_6() { + let input1 = (0, " $hi the[re"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + let (token4, input5) = next_token(input4); + let (token5, input6) = next_token(input5); + + assert_eq!((token1, input2), (Token::Ident("$hi"), (4, " the[re"))); + assert_eq!((token2, input3), (Token::TypeName("the"), (8, "[re"))); + assert_eq!((token3, input4), (Token::OpenLeaf, (9, "re"))); + assert_eq!((token4, input5), (Token::TypeName("re"), (11, ""))); + assert_eq!((token5, input6), (Token::End, (11, ""))); + } + + #[test] + fn tokenize_7() { + let input1 = (0, "Thing $yar { # A comment\n\tThing2 []\n}"); + let (token1, input2) = next_token(input1); + let (token2, input3) = next_token(input2); + let (token3, input4) = next_token(input3); + let (token4, input5) = next_token(input4); + let (token5, input6) = next_token(input5); + let (token6, input7) = next_token(input6); + let (token7, input8) = next_token(input7); + let (token8, input9) = next_token(input8); + + assert_eq!( + (token1, input2), + ( + Token::TypeName("Thing"), + (5, " $yar { # A comment\n\tThing2 []\n}",) + ) + ); + assert_eq!( + (token2, input3), + ( + Token::Ident("$yar"), + (10, " { # A comment\n\tThing2 []\n}",) + ) + ); + assert_eq!( + (token3, input4), + (Token::OpenInner, (12, " # A comment\n\tThing2 []\n}",)) + ); + assert_eq!( + (token4, input5), + (Token::TypeName("Thing2"), (32, " []\n}")) + ); + assert_eq!((token5, input6), (Token::OpenLeaf, (34, "]\n}"))); + assert_eq!((token6, input7), (Token::CloseLeaf, (35, "\n}"))); + assert_eq!((token7, input8), (Token::CloseInner, (37, ""))); + assert_eq!((token8, input9), (Token::End, (37, ""))); + } + + #[test] + fn try_parse_event_01() { + assert_eq!(try_parse_event("H"), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_02() { + assert_eq!(try_parse_event("Hello $"), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_03() { + assert_eq!( + try_parse_event("Hello $id "), + ParseEventParse::IncompleteData, + ); + } + + #[test] + fn try_parse_event_04() { + assert_eq!( + try_parse_event("Hello $id {"), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + ident: Some("$id"), + byte_offset: 0, + }, + 11 + ), + ); + } + + #[test] + fn try_parse_event_05() { + assert_eq!( + try_parse_event(" Hello $id {"), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + ident: Some("$id"), + byte_offset: 2, + }, + 13 + ), + ); + } + + #[test] + fn try_parse_event_06() { + assert_eq!( + try_parse_event("Hello {"), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + ident: None, + byte_offset: 0, + }, + 7 + ), + ); + } + + #[test] + fn try_parse_event_07() { + assert_eq!( + try_parse_event("Hello { "), + ParseEventParse::Ok( + ParseEvent::InnerOpen { + type_name: "Hello", + ident: None, + byte_offset: 0, + }, + 7 + ), + ); + } + + #[test] + fn try_parse_event_08() { + assert_eq!(try_parse_event("Hello ["), ParseEventParse::IncompleteData,); + } + + #[test] + fn try_parse_event_09() { + assert_eq!( + try_parse_event("Hello [some contents"), + ParseEventParse::IncompleteData, + ); + } + + #[test] + fn try_parse_event_10() { + assert_eq!( + try_parse_event("Hello [some contents]"), + ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: "Hello", + contents: "some contents", + byte_offset: 0, + }, + 21 + ), + ); + } + + #[test] + fn try_parse_event_11() { + assert_eq!( + try_parse_event("Hello [some contents] "), + ParseEventParse::Ok( + ParseEvent::Leaf { + type_name: "Hello", + contents: "some contents", + byte_offset: 0, + }, + 21 + ), + ); + } + + #[test] + fn try_parse_event_12() { + assert_eq!( + try_parse_event(" # A comment\n\n "), + ParseEventParse::ReachedEnd, + ); + } + + #[test] + fn parser_01() { + let mut parser = Parser::new(); + + parser.push_data("Hello"); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("{"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerOpen { + type_name: "Hello", + ident: None, + byte_offset: 0, + }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("}"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerClose { byte_offset: 6 }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_02() { + let mut parser = Parser::new(); + + parser.push_data("Hello"); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("["); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("1.0 2.0 3."); + assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput)); + + parser.push_data("0]"); + assert_eq!( + parser.next_event(), + Ok(ParseEvent::Leaf { + type_name: "Hello", + contents: "1.0 2.0 3.0", + byte_offset: 0, + }) + ); + + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_03() { + let mut parser = Parser::new(); + + parser.push_data("Hello $big_boy { World [1.0 2.0 3.0] }"); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerOpen { + type_name: "Hello", + ident: Some("$big_boy"), + byte_offset: 0, + }) + ); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::Leaf { + type_name: "World", + contents: "1.0 2.0 3.0", + byte_offset: 17, + }) + ); + + assert_eq!( + parser.next_event(), + Ok(ParseEvent::InnerClose { byte_offset: 37 }) + ); + + // Make sure repeated calls are stable. + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd)); + } + + #[test] + fn parser_04() { + let mut parser = Parser::new(); + + parser.push_data("$Hello"); + assert_eq!( + parser.next_event(), + Err(ParseError::ExpectedTypeNameOrClose(0)) + ); + } + + #[test] + fn parser_05() { + let mut parser = Parser::new(); + + parser.push_data("Hello]"); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpenOrIdent(5))); + } + + #[test] + fn parser_06() { + let mut parser = Parser::new(); + + parser.push_data("Hello}"); + assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpenOrIdent(5))); + } + + #[test] + fn parser_07() { + let mut parser = Parser::new(); + + parser.push_data("Hello $yar ["); + assert_eq!(parser.next_event(), Err(ParseError::UnexpectedIdent(6))); + } + + #[test] + fn parser_08() { + let mut parser = Parser::new(); + + parser.push_data("}"); + assert_eq!(parser.next_event(), Err(ParseError::UnexpectedClose(0))); + } +} diff --git a/sub_crates/data_tree/src/reader.rs b/sub_crates/data_tree/src/reader.rs deleted file mode 100644 index 3942391..0000000 --- a/sub_crates/data_tree/src/reader.rs +++ /dev/null @@ -1,121 +0,0 @@ -use super::{Error, Event, Parser}; - -//------------------------------------------------------------- - -#[derive(Debug)] -pub enum ReaderError { - UnexpectedEOF, - Parse(Error), - IO(std::io::Error), -} - -impl std::error::Error for ReaderError {} - -impl std::fmt::Display for ReaderError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - write!(f, "{:?}", self) - } -} - -impl From for ReaderError { - fn from(e: Error) -> Self { - ReaderError::Parse(e) - } -} - -impl From for ReaderError { - fn from(e: std::io::Error) -> Self { - ReaderError::IO(e) - } -} - -//------------------------------------------------------------- - -#[derive(Debug)] -pub struct DataTreeReader { - parser: Parser, - reader: R, - buf: String, - eof: bool, -} - -impl DataTreeReader { - pub fn new(reader: R) -> Self { - Self { - parser: Parser::new(), - reader: reader, - buf: String::new(), - eof: false, - } - } - - pub fn next_event<'a>(&'a mut self) -> Result, ReaderError> { - loop { - let valid_end = match self.parser.next_event()? { - Event::ValidEnd => true, - Event::NeedMoreInput => false, - e => { - return Ok(unsafe { - // Transmute because the borrow checker is - // over-conservative about this. It thinks - // the liftime isn't valid, but since we aren't - // mutating self after returning (and in fact - // can't because of the borrow) there's no way for - // the references in this to become invalid. - std::mem::transmute::(e) - }); - } - }; - - if !self.eof { - self.buf.clear(); - let read = self.reader.read_line(&mut self.buf)?; - self.parser.push_data(&self.buf); - if read == 0 { - self.eof = true; - } - } else if !valid_end { - return Err(ReaderError::UnexpectedEOF); - } else { - return Ok(Event::ValidEnd); - } - } - } - - pub fn peek_event<'a>(&'a mut self) -> Result, ReaderError> { - loop { - let valid_end = match self.parser.peek_event()? { - Event::ValidEnd => true, - Event::NeedMoreInput => false, - e => { - return Ok(unsafe { - // Transmute because the borrow checker is - // over-conservative about this. It thinks - // the liftime isn't valid, but since we aren't - // mutating self after returning (and in fact - // can't because of the borrow) there's no way for - // the references in this to become invalid. - std::mem::transmute::(e) - }); - } - }; - - if !self.eof { - self.buf.clear(); - let read = self.reader.read_line(&mut self.buf)?; - self.parser.push_data(&self.buf); - if read == 0 { - self.eof = true; - } - } else if !valid_end { - return Err(ReaderError::UnexpectedEOF); - } else { - return Ok(Event::ValidEnd); - } - } - } - - pub fn byte_offset(&self) -> usize { - self.parser.byte_offset() - } -}