diff --git a/src/lib.rs b/src/lib.rs index 1806ddb..99abf96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,189 +1,149 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::env::args_os; -use std::ffi::{OsStr, OsString}; -use std::ops::RangeBounds; +use std::ffi::OsString; -/// A command line argument parser. -#[derive(Debug, Clone)] -pub struct Parser { - args: Vec, +mod spec; - // Used to ensure we don't get duplicate arguments. - id_set: HashSet, - long_set: HashSet, - short_set: HashSet, -} +pub use spec::Spec; -impl Parser { - pub fn new() -> Parser { - Parser { - args: Vec::new(), - id_set: HashSet::new(), - long_set: HashSet::new(), - short_set: HashSet::new(), - } - } +pub fn parse(mut spec: Spec) -> ParsedArguments { + // Split into non-positional and positional arguments. + let (args, pos_args): (Vec<_>, Vec<_>) = spec + .args + .drain(..) + .partition(|arg| arg.arg_type != spec::ArgType::Pos); - /// Add a flag (bool) argument. - /// - /// - `id`: the argument identifier, used for fetching argument - /// matches. - /// - `flags`: the long and/or short argument flag strings. Must be - /// in the form "-f" or "--flag". You can pass as many as you - /// like, all of which will be considered equivalent during - /// parsing. But there must be at least one. - /// - `doc`: the documentation string to use in the generated help. - /// Pass an empty string to indicate no documentation. - pub fn add_flag(&mut self, id: &str, flags: &[&str], doc: &str) { - let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); + // Validate positional arguments: + // - All required positional arguments should precede any optional + // positional arguments. + // - There should be at most a single positional multi-argument, and + // it must be at the end. + { + let mut met_optional = false; + let mut met_multi = false; + for arg in pos_args.iter() { + if arg.arg_type == spec::ArgType::Pos { + let is_optional = arg.acceptable_count.0 == 0; + let is_multi = + arg.acceptable_count.1.is_none() || arg.acceptable_count.1.unwrap() > 1; - self.args.push(Arg { - arg_type: ArgType::Flag, - id: id.into(), - value_label: String::new(), - long_flags: long_flags, - short_flags: short_flags, - acceptable_count: (0, None), - doc: doc.into(), - }); - } + if !is_optional && met_optional { + panic!("All required positional arguments must precede all optional positional arguments in the argument spec.") + } + if met_multi { + panic!("There must be at most one positional multi-argument in the argument spec, and it must come last.") + } - /// Add a standard argument, that takes a value. - pub fn add_argument( - &mut self, - id: &str, - flags: &[&str], - doc: &str, - value_label: &str, - required: bool, - ) { - let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); - - self.args.push(Arg { - arg_type: ArgType::Arg, - id: id.into(), - value_label: value_label.into(), - long_flags: long_flags, - short_flags: short_flags, - acceptable_count: (if required { 1 } else { 0 }, None), - doc: doc.into(), - }); - } - - /// Add a positional argument. - /// - /// Unlike flags and standard arguments, positional arguments are - /// parsed in the order they're added. Because of their nature, - /// they have some additional considerations: - /// - /// - All required positional arguments must precede all optional - /// positional arguments. - /// - There can at most be a single positional multi-argument, - /// which must come last. (See `add_positional_multi_argument()`.) - pub fn add_positional_argument( - &mut self, - id: &str, - doc: &str, - value_label: &str, - required: bool, - ) { - let (_, _) = self.validate_and_process_arg(id, &[]); - - self.args.push(Arg { - arg_type: ArgType::PosArg, - id: id.into(), - value_label: value_label.into(), - long_flags: Vec::new(), - short_flags: Vec::new(), - acceptable_count: (if required { 1 } else { 0 }, Some(1)), - doc: doc.into(), - }); - } - - pub fn add_positional_multi_argument( - &mut self, - id: &str, - doc: &str, - value_label: &str, - required: bool, - ) { - let (_, _) = self.validate_and_process_arg(id, &[]); - - self.args.push(Arg { - arg_type: ArgType::PosArg, - id: id.into(), - value_label: value_label.into(), - long_flags: Vec::new(), - short_flags: Vec::new(), - acceptable_count: (if required { 1 } else { 0 }, None), - doc: doc.into(), - }); - } - - //---------------- - - pub fn parse(self) -> ParsedArguments { - todo!() - } - - //---------------- - - /// Returns (long, short) pair, each of which is a Vec of argument strings with - /// the leading hyphens stripped off. - fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec, Vec) { - if self.id_set.contains(id) { - panic!( - "Error: attempted to add argument with a duplicate ID \"{}\".", - id - ); - } - self.id_set.insert(id.into()); - - let mut long_flags = Vec::new(); - let mut short_flags = Vec::new(); - - for &flag in flags { - // Ensure no whitespace. - if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 { - panic!( - "Error: attempted to add argument \"{}\" which contains whitespace.", - flag - ); + met_optional |= is_optional; + met_multi |= is_multi; } - // Long flags. - else if flag.starts_with("--") && flag.len() > 2 { - if self.long_set.contains(flag) { - panic!( - "Error: attempted to add duplicate long argument \"{}\".", - flag + } + } + + // Parse! + // TODO: optimize by first creating a hash map from flag strings to + // argument indices. Right now this is an `O(NM)` algorithm, with N + // being the number of arguments in the spec and M being the number + // of arguments passed by the user. We can even further optimize it + // by first checking against the maximum length of our long arguments, + // so we don't end up hashing really long user arguments + // unnecessarily for the check. + let mut pos_i = 0; // Index of the positional argument we're at. + let mut pos_i_count = 0; // Number of positional arguments we've parsed at the current positional argument index. + let mut parsed = ParsedArguments { + arguments: Vec::new(), + id_map: HashMap::new(), + }; + let mut args_in = args_os(); + let _ = args_in.next(); // Skip the first argument, which is the call to the executable. + + 'outer: while let Some(arg_in) = args_in.next() { + // Check for flags and non-positional arguments. + if let Some(arg_in_str) = arg_in.to_str() { + if arg_in_str.starts_with("--") { + // Long. + for arg in args.iter() { + for long_flag in arg.long_flags.iter() { + if arg_in_str == long_flag { + match arg.arg_type { + spec::ArgType::Flag => parsed.push_arg(arg.id.clone(), None), + + spec::ArgType::Arg => { + if let Some(value) = args_in.next() { + parsed.push_arg(arg.id.clone(), Some(value)); + } else { + todo!("Handle error: expected value after argument flag."); + } + } + + spec::ArgType::Pos => unreachable!(), + } + continue 'outer; + } + } + } + todo!("Handle error: no long argument matched the passed argument."); + } else if arg_in_str.starts_with("-") { + // Short. + let mut remainder = &arg_in_str[1..]; + + // First check arguments that take values. + for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Arg) { + for short_flag in arg.short_flags.iter() { + if remainder == short_flag { + if let Some(value) = args_in.next() { + parsed.push_arg(arg.id.clone(), Some(value)); + } else { + todo!("Handle error: expected value after argument flag."); + } + continue 'outer; + } + } + } + + // Then check boolean flags. There can be multiple + // present, so we progressively chop off the front as we + // find matches until nothing remains. + 'restart_args: while !remainder.is_empty() { + for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Flag) { + for short_flag in arg.short_flags.iter() { + if remainder.starts_with(short_flag) { + remainder = &remainder[short_flag.len()..]; + parsed.push_arg(arg.id.clone(), None); + continue 'restart_args; + } + } + } + todo!( + "Handle error: no short argument matches the next flag in \"{}\".", + remainder ); } - self.long_set.insert(flag.into()); - long_flags.push((&flag[2..]).into()); - } - // Check if it's a valid short flag (should only have one character - // after the hyphen). - else if flag.starts_with("-") && flag.chars().count() == 2 { - if self.short_set.contains(flag) { - panic!( - "Error: attempted to add duplicate short argument \"{}\".", - flag - ); - } - self.short_set.insert(flag.into()); - short_flags.push((&flag[1..]).into()); - } - // Not a valid flag. - else { - panic!( - "Error: attempted to add argument \"{}\", which isn't a valid argument string.", - flag - ) + continue 'outer; } } - (long_flags, short_flags) + if pos_i < pos_args.len() { + let arg = &pos_args[pos_i]; + parsed.push_arg(arg.id.clone(), Some(arg_in)); + pos_i_count += 1; + + if let Some(max_count) = arg.acceptable_count.1 { + if pos_i_count == max_count { + pos_i += 1; + pos_i_count = 0; + } + } + } else { + todo!("Handle error: too many positional arguments."); + } } + + if pos_i < pos_args.len() && pos_i_count < pos_args[pos_i].acceptable_count.0 { + todo!("Handle error: not enough positional arguments."); + } + + parsed } /// Parsed command line arguments. @@ -199,41 +159,15 @@ pub struct ParsedArguments { id_map: HashMap>, // Argument ID -> index list } -//------------------------------------------------------------- +impl ParsedArguments { + fn push_arg(&mut self, id: String, value: Option) { + assert!(!id.is_empty()); -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -enum ArgType { - Flag, - Arg, - PosArg, -} - -/// Argument specification. -#[derive(Debug, Clone)] -struct Arg { - arg_type: ArgType, - id: String, - value_label: String, - - // Long and short versions of the argument flag. E.g. "--curve" and - // "-c", but without the leading dashes. - long_flags: Vec, - short_flags: Vec, - - // How many instances of the argument can be present, specified - // as a range. - // - // For example: - // - (0, None): An argument that can show up any number of times, - // including not at all. - // - (0, 1): An argument that can either be absent or show up - // precisely once. - // - (1, 1): An argument that must show up precisely once. - // - (1, None): An argument that must show up at least once. - // - (2, 9): An argument that must show up at least twice, but - // no more than 9 times. - acceptable_count: (usize, Option), - - // Documentation string, for generated help. - doc: String, + if !self.id_map.contains_key(&id) { + self.id_map.insert(id.clone(), Vec::new()); + } + self.id_map.get_mut(&id).unwrap().push(self.arguments.len()); + + self.arguments.push((id, value)); + } } diff --git a/src/spec.rs b/src/spec.rs new file mode 100644 index 0000000..4bc733c --- /dev/null +++ b/src/spec.rs @@ -0,0 +1,244 @@ +use std::collections::HashSet; + +/// Command line argument specification. +#[derive(Debug, Clone)] +pub struct Spec { + pub(crate) name: String, // Application name. + pub(crate) version: String, // Application version. + pub(crate) args: Vec, + + // Used to ensure we don't get duplicate arguments. + id_set: HashSet, + long_set: HashSet, + short_set: HashSet, +} + +impl Spec { + /// Create a new argument specification. + /// + /// `name` and `version` are the name and version of the software, + /// respectively. + #[must_use] + pub fn new(name: String, version: String) -> Spec { + Spec { + name: name, + version: version, + + args: Vec::new(), + + id_set: HashSet::new(), + long_set: HashSet::new(), + short_set: HashSet::new(), + } + } + + /// Add a flag (bool) argument. + /// + /// - `id`: the argument identifier, used for fetching argument + /// matches. + /// - `flags`: the long and/or short argument flag strings. Must be + /// in the form "-f" or "--flag". You can pass as many as you + /// like, all of which will be considered equivalent during + /// parsing. But there must be at least one. + /// - `doc`: the documentation string to use in the generated help. + /// Pass an empty string to indicate no documentation. + #[must_use] + pub fn add_flag(mut self, id: &str, flags: &[&str], doc: &str) -> Self { + let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); + + self.args.push(Arg { + arg_type: ArgType::Flag, + id: id.into(), + value_label: String::new(), + long_flags: long_flags, + short_flags: short_flags, + acceptable_count: (0, None), + doc: doc.into(), + }); + + self + } + + /// Add a standard argument, that takes a value. + #[must_use] + pub fn add_argument( + mut self, + id: &str, + flags: &[&str], + doc: &str, + value_label: &str, + required: bool, + ) -> Self { + let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); + + self.args.push(Arg { + arg_type: ArgType::Arg, + id: id.into(), + value_label: value_label.into(), + long_flags: long_flags, + short_flags: short_flags, + acceptable_count: (if required { 1 } else { 0 }, None), + doc: doc.into(), + }); + + self + } + + /// Add a positional argument. + /// + /// Unlike flags and standard arguments, positional arguments are + /// parsed in the order they're added. Because of their nature, + /// they have some additional considerations: + /// + /// - All required positional arguments must precede all optional + /// positional arguments. + /// - There can at most be a single positional multi-argument, + /// which must come last. (See `add_positional_multi_argument()`.) + #[must_use] + pub fn add_positional_argument( + mut self, + id: &str, + doc: &str, + value_label: &str, + required: bool, + ) -> Self { + let (_, _) = self.validate_and_process_arg(id, &[]); + + self.args.push(Arg { + arg_type: ArgType::Pos, + id: id.into(), + value_label: value_label.into(), + long_flags: Vec::new(), + short_flags: Vec::new(), + acceptable_count: (if required { 1 } else { 0 }, Some(1)), + doc: doc.into(), + }); + + self + } + + #[must_use] + pub fn add_positional_multi_argument( + mut self, + id: &str, + doc: &str, + value_label: &str, + required: bool, + ) -> Self { + let (_, _) = self.validate_and_process_arg(id, &[]); + + self.args.push(Arg { + arg_type: ArgType::Pos, + id: id.into(), + value_label: value_label.into(), + long_flags: Vec::new(), + short_flags: Vec::new(), + acceptable_count: (if required { 1 } else { 0 }, None), + doc: doc.into(), + }); + + self + } + + //---------------- + + /// Returns (long, short) pair, each of which is a Vec of argument strings with + /// the leading hyphens stripped off. + fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec, Vec) { + if self.id_set.contains(id) { + panic!( + "Error: attempted to add argument with a duplicate ID \"{}\".", + id + ); + } + self.id_set.insert(id.into()); + + let mut long_flags = Vec::new(); + let mut short_flags = Vec::new(); + + for &flag in flags { + // Ensure no whitespace. + if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 { + panic!( + "Error: attempted to add argument \"{}\" which contains whitespace.", + flag + ); + } + // Long flags. + else if flag.starts_with("--") && flag.len() > 2 { + if self.long_set.contains(flag) { + panic!( + "Error: attempted to add duplicate long argument \"{}\".", + flag + ); + } + self.long_set.insert(flag.into()); + long_flags.push(flag.into()); + } + // Check if it's a valid short flag. + // Note: in theory we should be checking to verify that + // the flag is only one character long. But because of + // graphemes that's complicated, and it's not really + // worth all the code. So instead we just rely on client + // code doing the right thing. + else if flag.starts_with("-") && flag.len() > 1 { + if self.short_set.contains(flag) { + panic!( + "Error: attempted to add duplicate short argument \"{}\".", + flag + ); + } + self.short_set.insert(flag.into()); + short_flags.push((&flag[1..]).into()); + } + // Not a valid flag. + else { + panic!( + "Error: attempted to add argument \"{}\", which isn't a valid argument string.", + flag + ) + } + } + + (long_flags, short_flags) + } +} + +//------------------------------------------------------------- + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(crate) enum ArgType { + Flag, // Boolean flag (present or absent) + Arg, // Standard flag+value argument, like `-i input_file`. + Pos, // Positional argument. +} + +/// Argument specification. +#[derive(Debug, Clone)] +pub(crate) struct Arg { + pub(crate) arg_type: ArgType, + pub(crate) id: String, + pub(crate) value_label: String, + + // Long and short versions of the argument flag. E.g. "--curve" and + // "-c", but without the leading dashes. + pub(crate) long_flags: Vec, + pub(crate) short_flags: Vec, + + // How many instances of the argument can be present, specified + // as a range. + // + // For example: + // - (0, None): An argument that can show up any number of times, + // including not at all. + // - (0, 1): An argument that can either be absent or show up + // precisely once. + // - (1, 1): An argument that must show up precisely once. + // - (1, None): An argument that must show up at least once. + // - (2, 9): An argument that must show up at least twice, but + // no more than 9 times. + pub(crate) acceptable_count: (usize, Option), + + // Documentation string, for generated help. + pub(crate) doc: String, +}