From 9d5bc63fa583a6814e467975c3b2ae5508ad021a Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Tue, 7 Jan 2020 13:41:16 +0900 Subject: [PATCH] Created a separate reader type for streaming data tree reading. --- src/parse/data_tree.rs | 35 +++++++++---- sub_crates/data_tree/src/lib.rs | 2 + sub_crates/data_tree/src/reader.rs | 84 ++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 sub_crates/data_tree/src/reader.rs diff --git a/src/parse/data_tree.rs b/src/parse/data_tree.rs index bd02226..e63f84d 100644 --- a/src/parse/data_tree.rs +++ b/src/parse/data_tree.rs @@ -2,7 +2,10 @@ use std::{io::Cursor, iter::Iterator, result::Result, slice}; -use data_tree::{Event, Parser}; +use data_tree::{ + reader::{DataTreeReader, ReaderError}, + Event, +}; #[derive(Debug, Eq, PartialEq)] pub enum DataTree { @@ -40,12 +43,11 @@ pub enum ParseError { impl<'a> DataTree { pub fn from_str(source_text: &'a str) -> Result { - let mut parser = Parser::new(Cursor::new(source_text)); + let mut parser = DataTreeReader::new(Cursor::new(source_text)); let mut items = Vec::new(); loop { let event = parser.next_event(); - println!("{:?}", event); match event { Ok(Event::InnerOpen { type_name, @@ -64,11 +66,13 @@ impl<'a> DataTree { Ok(Event::InnerClose { .. }) => { return Err(ParseError::Other("Unexpected closing tag.")) } - Ok(Event::Done) => { + Ok(Event::ValidEnd) => { break; } - Err(_) => return Err(ParseError::Other("Some error happened.")), + Ok(Event::NeedMoreInput) | Err(_) => { + return Err(ParseError::Other("Some error happened.")) + } } } @@ -208,8 +212,8 @@ impl<'a> DataTree { } } -fn parse_node( - parser: &mut Parser, +fn parse_node( + parser: &mut DataTreeReader, type_name: String, ident: Option, byte_offset: usize, @@ -238,10 +242,10 @@ fn parse_node( }); } Ok(Event::InnerClose { .. }) => break, - Ok(Event::Done) => { + Ok(Event::ValidEnd) => { return Err(ParseError::Other("Unexpected end of contents.")); } - Err(_) => { + Ok(Event::NeedMoreInput) | Err(_) => { return Err(ParseError::Other("Some error happened.")); } } @@ -255,6 +259,19 @@ fn parse_node( }) } +/// Splits text at approximately the given byte index, +/// shifting it as needed to stay in bounds and be on a +/// valid `char` break. +fn aprx_split_at(text: &str, idx: usize) -> (&str, &str) { + let mut idx = text.len().min(idx); + + while !text.is_char_boundary(idx) { + idx += 1; + } + + (&text[..idx], &text[idx..]) +} + /// An iterator over the children of a `DataTree` node that filters out the /// children not matching a specified type name. pub struct DataTreeFilterIter<'a> { diff --git a/sub_crates/data_tree/src/lib.rs b/sub_crates/data_tree/src/lib.rs index 6d233c0..7cd6dc5 100644 --- a/sub_crates/data_tree/src/lib.rs +++ b/sub_crates/data_tree/src/lib.rs @@ -1,5 +1,7 @@ #![allow(dead_code)] +pub mod reader; + #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Error { ExpectedTypeNameOrClose(usize), diff --git a/sub_crates/data_tree/src/reader.rs b/sub_crates/data_tree/src/reader.rs new file mode 100644 index 0000000..9d6246e --- /dev/null +++ b/sub_crates/data_tree/src/reader.rs @@ -0,0 +1,84 @@ +use super::{Error, Event, Parser}; + +//------------------------------------------------------------- + +#[derive(Debug)] +pub enum ReaderError { + UnexpectedEOF, + Parse(Error), + IO(std::io::Error), +} + +impl std::error::Error for ReaderError {} + +impl std::fmt::Display for ReaderError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} + +impl From for ReaderError { + fn from(e: Error) -> Self { + ReaderError::Parse(e) + } +} + +impl From for ReaderError { + fn from(e: std::io::Error) -> Self { + ReaderError::IO(e) + } +} + +//------------------------------------------------------------- + +#[derive(Debug)] +pub struct DataTreeReader { + parser: Parser, + reader: R, + buf: String, + eof: bool, +} + +impl DataTreeReader { + pub fn new(reader: R) -> Self { + Self { + parser: Parser::new(), + reader: reader, + buf: String::new(), + eof: false, + } + } + + pub fn next_event<'a>(&'a mut self) -> Result, ReaderError> { + loop { + let valid_end = match self.parser.next_event()? { + Event::ValidEnd => true, + Event::NeedMoreInput => false, + e => { + return Ok(unsafe { + // Transmute because the borrow checker is + // over-conservative about this. It thinks + // the liftime isn't valid, but since we aren't + // mutating self after returning (and in fact + // can't because of the borrow) there's no way for + // the references in this to become invalid. + std::mem::transmute::(e) + }); + } + }; + + if !self.eof { + self.buf.clear(); + let read = self.reader.read_line(&mut self.buf)?; + self.parser.push_data(&self.buf); + if read == 0 { + self.eof = true; + } + } else if !valid_end { + return Err(ReaderError::UnexpectedEOF); + } else { + return Ok(Event::ValidEnd); + } + } + } +}