First implementation of the actual parser.

It doesn't do proper error handling yet, and is completely untested so it probably has tons of bugs in it.
2022-06-19 17:11:43 -07:00 · 2022-06-19 17:11:43 -07:00 · 82b6c5a5de
commit 82b6c5a5de
parent 838707d114
2 changed files with 386 additions and 208 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,189 +1,149 @@
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::env::args_os;
-use std::ffi::{OsStr, OsString};
+use std::ffi::OsString;
 use std::ops::RangeBounds;
-/// A command line argument parser.
+mod spec;
 #[derive(Debug, Clone)]
 pub struct Parser {
    args: Vec<Arg>,
-    // Used to ensure we don't get duplicate arguments.
+pub use spec::Spec;
    id_set: HashSet<String>,
    long_set: HashSet<String>,
    short_set: HashSet<String>,
 }
-impl Parser {
+pub fn parse(mut spec: Spec) -> ParsedArguments {
-    pub fn new() -> Parser {
+    // Split into non-positional and positional arguments.
-        Parser {
+    let (args, pos_args): (Vec<_>, Vec<_>) = spec
-            args: Vec::new(),
+        .args
-            id_set: HashSet::new(),
+        .drain(..)
-            long_set: HashSet::new(),
+        .partition(|arg| arg.arg_type != spec::ArgType::Pos);
-            short_set: HashSet::new(),
+
    // Validate positional arguments:
    // - All required positional arguments should precede any optional
    //   positional arguments.
    // - There should be at most a single positional multi-argument, and
    //   it must be at the end.
    {
        let mut met_optional = false;
        let mut met_multi = false;
        for arg in pos_args.iter() {
            if arg.arg_type == spec::ArgType::Pos {
                let is_optional = arg.acceptable_count.0 == 0;
                let is_multi =
                    arg.acceptable_count.1.is_none() || arg.acceptable_count.1.unwrap() > 1;
                if !is_optional && met_optional {
                    panic!("All required positional arguments must precede all optional positional arguments in the argument spec.")
                }
                if met_multi {
                    panic!("There must be at most one positional multi-argument in the argument spec, and it must come last.")
                }
                met_optional |= is_optional;
                met_multi |= is_multi;
            }
        }
    }
-    /// Add a flag (bool) argument.
+    // Parse!
-    ///
+    // TODO: optimize by first creating a hash map from flag strings to
-    /// - `id`: the argument identifier, used for fetching argument
+    // argument indices.  Right now this is an `O(NM)` algorithm, with N
-    ///   matches.
+    // being the number of arguments in the spec and M being the number
-    /// - `flags`: the long and/or short argument flag strings.  Must be
+    // of arguments passed by the user.  We can even further optimize it
-    ///   in the form "-f" or "--flag".  You can pass as many as you
+    // by first checking against the maximum length of our long arguments,
-    ///   like, all of which will be considered equivalent during
+    // so we don't end up hashing really long user arguments
-    ///   parsing.  But there must be at least one.
+    // unnecessarily for the check.
-    /// - `doc`: the documentation string to use in the generated help.
+    let mut pos_i = 0; // Index of the positional argument we're at.
-    ///   Pass an empty string to indicate no documentation.
+    let mut pos_i_count = 0; // Number of positional arguments we've parsed at the current positional argument index.
-    pub fn add_flag(&mut self, id: &str, flags: &[&str], doc: &str) {
+    let mut parsed = ParsedArguments {
-        let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
+        arguments: Vec::new(),
        id_map: HashMap::new(),
    };
    let mut args_in = args_os();
    let _ = args_in.next(); // Skip the first argument, which is the call to the executable.
-        self.args.push(Arg {
+    'outer: while let Some(arg_in) = args_in.next() {
-            arg_type: ArgType::Flag,
+        // Check for flags and non-positional arguments.
-            id: id.into(),
+        if let Some(arg_in_str) = arg_in.to_str() {
-            value_label: String::new(),
+            if arg_in_str.starts_with("--") {
-            long_flags: long_flags,
+                // Long.
-            short_flags: short_flags,
+                for arg in args.iter() {
-            acceptable_count: (0, None),
+                    for long_flag in arg.long_flags.iter() {
-            doc: doc.into(),
+                        if arg_in_str == long_flag {
-        });
+                            match arg.arg_type {
                                spec::ArgType::Flag => parsed.push_arg(arg.id.clone(), None),
                                spec::ArgType::Arg => {
                                    if let Some(value) = args_in.next() {
                                        parsed.push_arg(arg.id.clone(), Some(value));
                                    } else {
                                        todo!("Handle error: expected value after argument flag.");
                                    }
                                }
-    /// Add a standard argument, that takes a value.
+                                spec::ArgType::Pos => unreachable!(),
-    pub fn add_argument(
+                            }
-        &mut self,
+                            continue 'outer;
-        id: &str,
+                        }
-        flags: &[&str],
+                    }
-        doc: &str,
+                }
-        value_label: &str,
+                todo!("Handle error: no long argument matched the passed argument.");
-        required: bool,
+            } else if arg_in_str.starts_with("-") {
-    ) {
+                // Short.
-        let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
+                let mut remainder = &arg_in_str[1..];
-        self.args.push(Arg {
+                // First check arguments that take values.
-            arg_type: ArgType::Arg,
+                for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Arg) {
-            id: id.into(),
+                    for short_flag in arg.short_flags.iter() {
-            value_label: value_label.into(),
+                        if remainder == short_flag {
-            long_flags: long_flags,
+                            if let Some(value) = args_in.next() {
-            short_flags: short_flags,
+                                parsed.push_arg(arg.id.clone(), Some(value));
-            acceptable_count: (if required { 1 } else { 0 }, None),
+                            } else {
-            doc: doc.into(),
+                                todo!("Handle error: expected value after argument flag.");
-        });
+                            }
                            continue 'outer;
                        }
                    }
                }
-    /// Add a positional argument.
+                // Then check boolean flags.  There can be multiple
-    ///
+                // present, so we progressively chop off the front as we
-    /// Unlike flags and standard arguments, positional arguments are
+                // find matches until nothing remains.
-    /// parsed in the order they're added.  Because of their nature,
+                'restart_args: while !remainder.is_empty() {
-    /// they have some additional considerations:
+                    for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Flag) {
-    ///
+                        for short_flag in arg.short_flags.iter() {
-    /// - All required positional arguments must precede all optional
+                            if remainder.starts_with(short_flag) {
-    ///   positional arguments.
+                                remainder = &remainder[short_flag.len()..];
-    /// - There can at most be a single positional multi-argument,
+                                parsed.push_arg(arg.id.clone(), None);
-    ///   which must come last.  (See `add_positional_multi_argument()`.)
+                                continue 'restart_args;
    pub fn add_positional_argument(
        &mut self,
        id: &str,
        doc: &str,
        value_label: &str,
        required: bool,
    ) {
        let (_, _) = self.validate_and_process_arg(id, &[]);
        self.args.push(Arg {
            arg_type: ArgType::PosArg,
            id: id.into(),
            value_label: value_label.into(),
            long_flags: Vec::new(),
            short_flags: Vec::new(),
            acceptable_count: (if required { 1 } else { 0 }, Some(1)),
            doc: doc.into(),
        });
                            }
    pub fn add_positional_multi_argument(
        &mut self,
        id: &str,
        doc: &str,
        value_label: &str,
        required: bool,
    ) {
        let (_, _) = self.validate_and_process_arg(id, &[]);
        self.args.push(Arg {
            arg_type: ArgType::PosArg,
            id: id.into(),
            value_label: value_label.into(),
            long_flags: Vec::new(),
            short_flags: Vec::new(),
            acceptable_count: (if required { 1 } else { 0 }, None),
            doc: doc.into(),
        });
                        }
    //----------------
    pub fn parse(self) -> ParsedArguments {
        todo!()
                    }
-
+                    todo!(
-    //----------------
+                        "Handle error: no short argument matches the next flag in \"{}\".",
-
+                        remainder
    /// Returns (long, short) pair, each of which is a Vec of argument strings with
    /// the leading hyphens stripped off.
    fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec<String>, Vec<String>) {
        if self.id_set.contains(id) {
            panic!(
                "Error: attempted to add argument with a duplicate ID \"{}\".",
                id
                    );
                }
-        self.id_set.insert(id.into());
+                continue 'outer;
        let mut long_flags = Vec::new();
        let mut short_flags = Vec::new();
        for &flag in flags {
            // Ensure no whitespace.
            if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 {
                panic!(
                    "Error: attempted to add argument \"{}\" which contains whitespace.",
                    flag
                );
            }
            // Long flags.
            else if flag.starts_with("--") && flag.len() > 2 {
                if self.long_set.contains(flag) {
                    panic!(
                        "Error: attempted to add duplicate long argument \"{}\".",
                        flag
                    );
                }
                self.long_set.insert(flag.into());
                long_flags.push((&flag[2..]).into());
            }
            // Check if it's a valid short flag (should only have one character
            // after the hyphen).
            else if flag.starts_with("-") && flag.chars().count() == 2 {
                if self.short_set.contains(flag) {
                    panic!(
                        "Error: attempted to add duplicate short argument \"{}\".",
                        flag
                    );
                }
                self.short_set.insert(flag.into());
                short_flags.push((&flag[1..]).into());
            }
            // Not a valid flag.
            else {
                panic!(
                    "Error: attempted to add argument \"{}\", which isn't a valid argument string.",
                    flag
                )
            }
        }
-        (long_flags, short_flags)
+        if pos_i < pos_args.len() {
            let arg = &pos_args[pos_i];
            parsed.push_arg(arg.id.clone(), Some(arg_in));
            pos_i_count += 1;
            if let Some(max_count) = arg.acceptable_count.1 {
                if pos_i_count == max_count {
                    pos_i += 1;
                    pos_i_count = 0;
                }
            }
        } else {
            todo!("Handle error: too many positional arguments.");
        }
    }
    if pos_i < pos_args.len() && pos_i_count < pos_args[pos_i].acceptable_count.0 {
        todo!("Handle error: not enough positional arguments.");
    }
    parsed
 }
 /// Parsed command line arguments.
@ -199,41 +159,15 @@ pub struct ParsedArguments {
    id_map: HashMap<String, Vec<usize>>, // Argument ID -> index list
 }
-//-------------------------------------------------------------
+impl ParsedArguments {
    fn push_arg(&mut self, id: String, value: Option<OsString>) {
        assert!(!id.is_empty());
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+        if !self.id_map.contains_key(&id) {
-enum ArgType {
+            self.id_map.insert(id.clone(), Vec::new());
-    Flag,
+        }
-    Arg,
+        self.id_map.get_mut(&id).unwrap().push(self.arguments.len());
-    PosArg,
+
-}
+        self.arguments.push((id, value));
-
+    }
 /// Argument specification.
 #[derive(Debug, Clone)]
 struct Arg {
    arg_type: ArgType,
    id: String,
    value_label: String,
    // Long and short versions of the argument flag.  E.g. "--curve" and
    // "-c", but without the leading dashes.
    long_flags: Vec<String>,
    short_flags: Vec<String>,
    // How many instances of the argument can be present, specified
    // as a range.
    //
    // For example:
    // - (0, None): An argument that can show up any number of times,
    //              including not at all.
    // - (0, 1):    An argument that can either be absent or show up
    //              precisely once.
    // - (1, 1):    An argument that must show up precisely once.
    // - (1, None): An argument that must show up at least once.
    // - (2, 9):    An argument that must show up at least twice, but
    //              no more than 9 times.
    acceptable_count: (usize, Option<usize>),
    // Documentation string, for generated help.
    doc: String,
 }
--- a/src/spec.rs
+++ b/src/spec.rs
@ -0,0 +1,244 @@
 use std::collections::HashSet;
 /// Command line argument specification.
 #[derive(Debug, Clone)]
 pub struct Spec {
    pub(crate) name: String,    // Application name.
    pub(crate) version: String, // Application version.
    pub(crate) args: Vec<Arg>,
    // Used to ensure we don't get duplicate arguments.
    id_set: HashSet<String>,
    long_set: HashSet<String>,
    short_set: HashSet<String>,
 }
 impl Spec {
    /// Create a new argument specification.
    ///
    /// `name` and `version` are the name and version of the software,
    /// respectively.
    #[must_use]
    pub fn new(name: String, version: String) -> Spec {
        Spec {
            name: name,
            version: version,
            args: Vec::new(),
            id_set: HashSet::new(),
            long_set: HashSet::new(),
            short_set: HashSet::new(),
        }
    }
    /// Add a flag (bool) argument.
    ///
    /// - `id`: the argument identifier, used for fetching argument
    ///   matches.
    /// - `flags`: the long and/or short argument flag strings.  Must be
    ///   in the form "-f" or "--flag".  You can pass as many as you
    ///   like, all of which will be considered equivalent during
    ///   parsing.  But there must be at least one.
    /// - `doc`: the documentation string to use in the generated help.
    ///   Pass an empty string to indicate no documentation.
    #[must_use]
    pub fn add_flag(mut self, id: &str, flags: &[&str], doc: &str) -> Self {
        let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
        self.args.push(Arg {
            arg_type: ArgType::Flag,
            id: id.into(),
            value_label: String::new(),
            long_flags: long_flags,
            short_flags: short_flags,
            acceptable_count: (0, None),
            doc: doc.into(),
        });
        self
    }
    /// Add a standard argument, that takes a value.
    #[must_use]
    pub fn add_argument(
        mut self,
        id: &str,
        flags: &[&str],
        doc: &str,
        value_label: &str,
        required: bool,
    ) -> Self {
        let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
        self.args.push(Arg {
            arg_type: ArgType::Arg,
            id: id.into(),
            value_label: value_label.into(),
            long_flags: long_flags,
            short_flags: short_flags,
            acceptable_count: (if required { 1 } else { 0 }, None),
            doc: doc.into(),
        });
        self
    }
    /// Add a positional argument.
    ///
    /// Unlike flags and standard arguments, positional arguments are
    /// parsed in the order they're added.  Because of their nature,
    /// they have some additional considerations:
    ///
    /// - All required positional arguments must precede all optional
    ///   positional arguments.
    /// - There can at most be a single positional multi-argument,
    ///   which must come last.  (See `add_positional_multi_argument()`.)
    #[must_use]
    pub fn add_positional_argument(
        mut self,
        id: &str,
        doc: &str,
        value_label: &str,
        required: bool,
    ) -> Self {
        let (_, _) = self.validate_and_process_arg(id, &[]);
        self.args.push(Arg {
            arg_type: ArgType::Pos,
            id: id.into(),
            value_label: value_label.into(),
            long_flags: Vec::new(),
            short_flags: Vec::new(),
            acceptable_count: (if required { 1 } else { 0 }, Some(1)),
            doc: doc.into(),
        });
        self
    }
    #[must_use]
    pub fn add_positional_multi_argument(
        mut self,
        id: &str,
        doc: &str,
        value_label: &str,
        required: bool,
    ) -> Self {
        let (_, _) = self.validate_and_process_arg(id, &[]);
        self.args.push(Arg {
            arg_type: ArgType::Pos,
            id: id.into(),
            value_label: value_label.into(),
            long_flags: Vec::new(),
            short_flags: Vec::new(),
            acceptable_count: (if required { 1 } else { 0 }, None),
            doc: doc.into(),
        });
        self
    }
    //----------------
    /// Returns (long, short) pair, each of which is a Vec of argument strings with
    /// the leading hyphens stripped off.
    fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec<String>, Vec<String>) {
        if self.id_set.contains(id) {
            panic!(
                "Error: attempted to add argument with a duplicate ID \"{}\".",
                id
            );
        }
        self.id_set.insert(id.into());
        let mut long_flags = Vec::new();
        let mut short_flags = Vec::new();
        for &flag in flags {
            // Ensure no whitespace.
            if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 {
                panic!(
                    "Error: attempted to add argument \"{}\" which contains whitespace.",
                    flag
                );
            }
            // Long flags.
            else if flag.starts_with("--") && flag.len() > 2 {
                if self.long_set.contains(flag) {
                    panic!(
                        "Error: attempted to add duplicate long argument \"{}\".",
                        flag
                    );
                }
                self.long_set.insert(flag.into());
                long_flags.push(flag.into());
            }
            // Check if it's a valid short flag.
            // Note: in theory we should be checking to verify that
            // the flag is only one character long.  But because of
            // graphemes that's complicated, and it's not really
            // worth all the code.  So instead we just rely on client
            // code doing the right thing.
            else if flag.starts_with("-") && flag.len() > 1 {
                if self.short_set.contains(flag) {
                    panic!(
                        "Error: attempted to add duplicate short argument \"{}\".",
                        flag
                    );
                }
                self.short_set.insert(flag.into());
                short_flags.push((&flag[1..]).into());
            }
            // Not a valid flag.
            else {
                panic!(
                    "Error: attempted to add argument \"{}\", which isn't a valid argument string.",
                    flag
                )
            }
        }
        (long_flags, short_flags)
    }
 }
 //-------------------------------------------------------------
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub(crate) enum ArgType {
    Flag, // Boolean flag (present or absent)
    Arg,  // Standard flag+value argument, like `-i input_file`.
    Pos,  // Positional argument.
 }
 /// Argument specification.
 #[derive(Debug, Clone)]
 pub(crate) struct Arg {
    pub(crate) arg_type: ArgType,
    pub(crate) id: String,
    pub(crate) value_label: String,
    // Long and short versions of the argument flag.  E.g. "--curve" and
    // "-c", but without the leading dashes.
    pub(crate) long_flags: Vec<String>,
    pub(crate) short_flags: Vec<String>,
    // How many instances of the argument can be present, specified
    // as a range.
    //
    // For example:
    // - (0, None): An argument that can show up any number of times,
    //              including not at all.
    // - (0, 1):    An argument that can either be absent or show up
    //              precisely once.
    // - (1, 1):    An argument that must show up precisely once.
    // - (1, None): An argument that must show up at least once.
    // - (2, 9):    An argument that must show up at least twice, but
    //              no more than 9 times.
    pub(crate) acceptable_count: (usize, Option<usize>),
    // Documentation string, for generated help.
    pub(crate) doc: String,
 }