First implementation of the actual parser.

It doesn't do proper error handling yet, and is completely untested
so it probably has tons of bugs in it.
This commit is contained in:
Nathan Vegdahl 2022-06-19 17:11:43 -07:00
parent 838707d114
commit 82b6c5a5de
2 changed files with 386 additions and 208 deletions

View File

@ -1,189 +1,149 @@
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use std::env::args_os; use std::env::args_os;
use std::ffi::{OsStr, OsString}; use std::ffi::OsString;
use std::ops::RangeBounds;
/// A command line argument parser. mod spec;
#[derive(Debug, Clone)]
pub struct Parser {
args: Vec<Arg>,
// Used to ensure we don't get duplicate arguments. pub use spec::Spec;
id_set: HashSet<String>,
long_set: HashSet<String>,
short_set: HashSet<String>,
}
impl Parser { pub fn parse(mut spec: Spec) -> ParsedArguments {
pub fn new() -> Parser { // Split into non-positional and positional arguments.
Parser { let (args, pos_args): (Vec<_>, Vec<_>) = spec
args: Vec::new(), .args
id_set: HashSet::new(), .drain(..)
long_set: HashSet::new(), .partition(|arg| arg.arg_type != spec::ArgType::Pos);
short_set: HashSet::new(),
// Validate positional arguments:
// - All required positional arguments should precede any optional
// positional arguments.
// - There should be at most a single positional multi-argument, and
// it must be at the end.
{
let mut met_optional = false;
let mut met_multi = false;
for arg in pos_args.iter() {
if arg.arg_type == spec::ArgType::Pos {
let is_optional = arg.acceptable_count.0 == 0;
let is_multi =
arg.acceptable_count.1.is_none() || arg.acceptable_count.1.unwrap() > 1;
if !is_optional && met_optional {
panic!("All required positional arguments must precede all optional positional arguments in the argument spec.")
}
if met_multi {
panic!("There must be at most one positional multi-argument in the argument spec, and it must come last.")
}
met_optional |= is_optional;
met_multi |= is_multi;
}
} }
} }
/// Add a flag (bool) argument. // Parse!
/// // TODO: optimize by first creating a hash map from flag strings to
/// - `id`: the argument identifier, used for fetching argument // argument indices. Right now this is an `O(NM)` algorithm, with N
/// matches. // being the number of arguments in the spec and M being the number
/// - `flags`: the long and/or short argument flag strings. Must be // of arguments passed by the user. We can even further optimize it
/// in the form "-f" or "--flag". You can pass as many as you // by first checking against the maximum length of our long arguments,
/// like, all of which will be considered equivalent during // so we don't end up hashing really long user arguments
/// parsing. But there must be at least one. // unnecessarily for the check.
/// - `doc`: the documentation string to use in the generated help. let mut pos_i = 0; // Index of the positional argument we're at.
/// Pass an empty string to indicate no documentation. let mut pos_i_count = 0; // Number of positional arguments we've parsed at the current positional argument index.
pub fn add_flag(&mut self, id: &str, flags: &[&str], doc: &str) { let mut parsed = ParsedArguments {
let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); arguments: Vec::new(),
id_map: HashMap::new(),
};
let mut args_in = args_os();
let _ = args_in.next(); // Skip the first argument, which is the call to the executable.
self.args.push(Arg { 'outer: while let Some(arg_in) = args_in.next() {
arg_type: ArgType::Flag, // Check for flags and non-positional arguments.
id: id.into(), if let Some(arg_in_str) = arg_in.to_str() {
value_label: String::new(), if arg_in_str.starts_with("--") {
long_flags: long_flags, // Long.
short_flags: short_flags, for arg in args.iter() {
acceptable_count: (0, None), for long_flag in arg.long_flags.iter() {
doc: doc.into(), if arg_in_str == long_flag {
}); match arg.arg_type {
spec::ArgType::Flag => parsed.push_arg(arg.id.clone(), None),
spec::ArgType::Arg => {
if let Some(value) = args_in.next() {
parsed.push_arg(arg.id.clone(), Some(value));
} else {
todo!("Handle error: expected value after argument flag.");
}
} }
/// Add a standard argument, that takes a value. spec::ArgType::Pos => unreachable!(),
pub fn add_argument( }
&mut self, continue 'outer;
id: &str, }
flags: &[&str], }
doc: &str, }
value_label: &str, todo!("Handle error: no long argument matched the passed argument.");
required: bool, } else if arg_in_str.starts_with("-") {
) { // Short.
let (long_flags, short_flags) = self.validate_and_process_arg(id, flags); let mut remainder = &arg_in_str[1..];
self.args.push(Arg { // First check arguments that take values.
arg_type: ArgType::Arg, for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Arg) {
id: id.into(), for short_flag in arg.short_flags.iter() {
value_label: value_label.into(), if remainder == short_flag {
long_flags: long_flags, if let Some(value) = args_in.next() {
short_flags: short_flags, parsed.push_arg(arg.id.clone(), Some(value));
acceptable_count: (if required { 1 } else { 0 }, None), } else {
doc: doc.into(), todo!("Handle error: expected value after argument flag.");
}); }
continue 'outer;
}
}
} }
/// Add a positional argument. // Then check boolean flags. There can be multiple
/// // present, so we progressively chop off the front as we
/// Unlike flags and standard arguments, positional arguments are // find matches until nothing remains.
/// parsed in the order they're added. Because of their nature, 'restart_args: while !remainder.is_empty() {
/// they have some additional considerations: for arg in args.iter().filter(|a| a.arg_type == spec::ArgType::Flag) {
/// for short_flag in arg.short_flags.iter() {
/// - All required positional arguments must precede all optional if remainder.starts_with(short_flag) {
/// positional arguments. remainder = &remainder[short_flag.len()..];
/// - There can at most be a single positional multi-argument, parsed.push_arg(arg.id.clone(), None);
/// which must come last. (See `add_positional_multi_argument()`.) continue 'restart_args;
pub fn add_positional_argument(
&mut self,
id: &str,
doc: &str,
value_label: &str,
required: bool,
) {
let (_, _) = self.validate_and_process_arg(id, &[]);
self.args.push(Arg {
arg_type: ArgType::PosArg,
id: id.into(),
value_label: value_label.into(),
long_flags: Vec::new(),
short_flags: Vec::new(),
acceptable_count: (if required { 1 } else { 0 }, Some(1)),
doc: doc.into(),
});
} }
pub fn add_positional_multi_argument(
&mut self,
id: &str,
doc: &str,
value_label: &str,
required: bool,
) {
let (_, _) = self.validate_and_process_arg(id, &[]);
self.args.push(Arg {
arg_type: ArgType::PosArg,
id: id.into(),
value_label: value_label.into(),
long_flags: Vec::new(),
short_flags: Vec::new(),
acceptable_count: (if required { 1 } else { 0 }, None),
doc: doc.into(),
});
} }
//----------------
pub fn parse(self) -> ParsedArguments {
todo!()
} }
todo!(
//---------------- "Handle error: no short argument matches the next flag in \"{}\".",
remainder
/// Returns (long, short) pair, each of which is a Vec of argument strings with
/// the leading hyphens stripped off.
fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec<String>, Vec<String>) {
if self.id_set.contains(id) {
panic!(
"Error: attempted to add argument with a duplicate ID \"{}\".",
id
); );
} }
self.id_set.insert(id.into()); continue 'outer;
let mut long_flags = Vec::new();
let mut short_flags = Vec::new();
for &flag in flags {
// Ensure no whitespace.
if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 {
panic!(
"Error: attempted to add argument \"{}\" which contains whitespace.",
flag
);
}
// Long flags.
else if flag.starts_with("--") && flag.len() > 2 {
if self.long_set.contains(flag) {
panic!(
"Error: attempted to add duplicate long argument \"{}\".",
flag
);
}
self.long_set.insert(flag.into());
long_flags.push((&flag[2..]).into());
}
// Check if it's a valid short flag (should only have one character
// after the hyphen).
else if flag.starts_with("-") && flag.chars().count() == 2 {
if self.short_set.contains(flag) {
panic!(
"Error: attempted to add duplicate short argument \"{}\".",
flag
);
}
self.short_set.insert(flag.into());
short_flags.push((&flag[1..]).into());
}
// Not a valid flag.
else {
panic!(
"Error: attempted to add argument \"{}\", which isn't a valid argument string.",
flag
)
} }
} }
(long_flags, short_flags) if pos_i < pos_args.len() {
let arg = &pos_args[pos_i];
parsed.push_arg(arg.id.clone(), Some(arg_in));
pos_i_count += 1;
if let Some(max_count) = arg.acceptable_count.1 {
if pos_i_count == max_count {
pos_i += 1;
pos_i_count = 0;
} }
}
} else {
todo!("Handle error: too many positional arguments.");
}
}
if pos_i < pos_args.len() && pos_i_count < pos_args[pos_i].acceptable_count.0 {
todo!("Handle error: not enough positional arguments.");
}
parsed
} }
/// Parsed command line arguments. /// Parsed command line arguments.
@ -199,41 +159,15 @@ pub struct ParsedArguments {
id_map: HashMap<String, Vec<usize>>, // Argument ID -> index list id_map: HashMap<String, Vec<usize>>, // Argument ID -> index list
} }
//------------------------------------------------------------- impl ParsedArguments {
fn push_arg(&mut self, id: String, value: Option<OsString>) {
assert!(!id.is_empty());
#[derive(Debug, Copy, Clone, Eq, PartialEq)] if !self.id_map.contains_key(&id) {
enum ArgType { self.id_map.insert(id.clone(), Vec::new());
Flag, }
Arg, self.id_map.get_mut(&id).unwrap().push(self.arguments.len());
PosArg,
} self.arguments.push((id, value));
}
/// Argument specification.
#[derive(Debug, Clone)]
struct Arg {
arg_type: ArgType,
id: String,
value_label: String,
// Long and short versions of the argument flag. E.g. "--curve" and
// "-c", but without the leading dashes.
long_flags: Vec<String>,
short_flags: Vec<String>,
// How many instances of the argument can be present, specified
// as a range.
//
// For example:
// - (0, None): An argument that can show up any number of times,
// including not at all.
// - (0, 1): An argument that can either be absent or show up
// precisely once.
// - (1, 1): An argument that must show up precisely once.
// - (1, None): An argument that must show up at least once.
// - (2, 9): An argument that must show up at least twice, but
// no more than 9 times.
acceptable_count: (usize, Option<usize>),
// Documentation string, for generated help.
doc: String,
} }

244
src/spec.rs Normal file
View File

@ -0,0 +1,244 @@
use std::collections::HashSet;
/// Command line argument specification.
#[derive(Debug, Clone)]
pub struct Spec {
pub(crate) name: String, // Application name.
pub(crate) version: String, // Application version.
pub(crate) args: Vec<Arg>,
// Used to ensure we don't get duplicate arguments.
id_set: HashSet<String>,
long_set: HashSet<String>,
short_set: HashSet<String>,
}
impl Spec {
/// Create a new argument specification.
///
/// `name` and `version` are the name and version of the software,
/// respectively.
#[must_use]
pub fn new(name: String, version: String) -> Spec {
Spec {
name: name,
version: version,
args: Vec::new(),
id_set: HashSet::new(),
long_set: HashSet::new(),
short_set: HashSet::new(),
}
}
/// Add a flag (bool) argument.
///
/// - `id`: the argument identifier, used for fetching argument
/// matches.
/// - `flags`: the long and/or short argument flag strings. Must be
/// in the form "-f" or "--flag". You can pass as many as you
/// like, all of which will be considered equivalent during
/// parsing. But there must be at least one.
/// - `doc`: the documentation string to use in the generated help.
/// Pass an empty string to indicate no documentation.
#[must_use]
pub fn add_flag(mut self, id: &str, flags: &[&str], doc: &str) -> Self {
let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
self.args.push(Arg {
arg_type: ArgType::Flag,
id: id.into(),
value_label: String::new(),
long_flags: long_flags,
short_flags: short_flags,
acceptable_count: (0, None),
doc: doc.into(),
});
self
}
/// Add a standard argument, that takes a value.
#[must_use]
pub fn add_argument(
mut self,
id: &str,
flags: &[&str],
doc: &str,
value_label: &str,
required: bool,
) -> Self {
let (long_flags, short_flags) = self.validate_and_process_arg(id, flags);
self.args.push(Arg {
arg_type: ArgType::Arg,
id: id.into(),
value_label: value_label.into(),
long_flags: long_flags,
short_flags: short_flags,
acceptable_count: (if required { 1 } else { 0 }, None),
doc: doc.into(),
});
self
}
/// Add a positional argument.
///
/// Unlike flags and standard arguments, positional arguments are
/// parsed in the order they're added. Because of their nature,
/// they have some additional considerations:
///
/// - All required positional arguments must precede all optional
/// positional arguments.
/// - There can at most be a single positional multi-argument,
/// which must come last. (See `add_positional_multi_argument()`.)
#[must_use]
pub fn add_positional_argument(
mut self,
id: &str,
doc: &str,
value_label: &str,
required: bool,
) -> Self {
let (_, _) = self.validate_and_process_arg(id, &[]);
self.args.push(Arg {
arg_type: ArgType::Pos,
id: id.into(),
value_label: value_label.into(),
long_flags: Vec::new(),
short_flags: Vec::new(),
acceptable_count: (if required { 1 } else { 0 }, Some(1)),
doc: doc.into(),
});
self
}
#[must_use]
pub fn add_positional_multi_argument(
mut self,
id: &str,
doc: &str,
value_label: &str,
required: bool,
) -> Self {
let (_, _) = self.validate_and_process_arg(id, &[]);
self.args.push(Arg {
arg_type: ArgType::Pos,
id: id.into(),
value_label: value_label.into(),
long_flags: Vec::new(),
short_flags: Vec::new(),
acceptable_count: (if required { 1 } else { 0 }, None),
doc: doc.into(),
});
self
}
//----------------
/// Returns (long, short) pair, each of which is a Vec of argument strings with
/// the leading hyphens stripped off.
fn validate_and_process_arg(&mut self, id: &str, flags: &[&str]) -> (Vec<String>, Vec<String>) {
if self.id_set.contains(id) {
panic!(
"Error: attempted to add argument with a duplicate ID \"{}\".",
id
);
}
self.id_set.insert(id.into());
let mut long_flags = Vec::new();
let mut short_flags = Vec::new();
for &flag in flags {
// Ensure no whitespace.
if flag.len() != flag.trim().len() || flag.split_whitespace().count() > 1 {
panic!(
"Error: attempted to add argument \"{}\" which contains whitespace.",
flag
);
}
// Long flags.
else if flag.starts_with("--") && flag.len() > 2 {
if self.long_set.contains(flag) {
panic!(
"Error: attempted to add duplicate long argument \"{}\".",
flag
);
}
self.long_set.insert(flag.into());
long_flags.push(flag.into());
}
// Check if it's a valid short flag.
// Note: in theory we should be checking to verify that
// the flag is only one character long. But because of
// graphemes that's complicated, and it's not really
// worth all the code. So instead we just rely on client
// code doing the right thing.
else if flag.starts_with("-") && flag.len() > 1 {
if self.short_set.contains(flag) {
panic!(
"Error: attempted to add duplicate short argument \"{}\".",
flag
);
}
self.short_set.insert(flag.into());
short_flags.push((&flag[1..]).into());
}
// Not a valid flag.
else {
panic!(
"Error: attempted to add argument \"{}\", which isn't a valid argument string.",
flag
)
}
}
(long_flags, short_flags)
}
}
//-------------------------------------------------------------
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum ArgType {
Flag, // Boolean flag (present or absent)
Arg, // Standard flag+value argument, like `-i input_file`.
Pos, // Positional argument.
}
/// Argument specification.
#[derive(Debug, Clone)]
pub(crate) struct Arg {
pub(crate) arg_type: ArgType,
pub(crate) id: String,
pub(crate) value_label: String,
// Long and short versions of the argument flag. E.g. "--curve" and
// "-c", but without the leading dashes.
pub(crate) long_flags: Vec<String>,
pub(crate) short_flags: Vec<String>,
// How many instances of the argument can be present, specified
// as a range.
//
// For example:
// - (0, None): An argument that can show up any number of times,
// including not at all.
// - (0, 1): An argument that can either be absent or show up
// precisely once.
// - (1, 1): An argument that must show up precisely once.
// - (1, None): An argument that must show up at least once.
// - (2, 9): An argument that must show up at least twice, but
// no more than 9 times.
pub(crate) acceptable_count: (usize, Option<usize>),
// Documentation string, for generated help.
pub(crate) doc: String,
}