1
0

feat(windows): add command line argument parsing utilities

Implement CmdLexer, CmdArg and CmdArgs for proper Windows command line argument parsing and quoting. Add itertools dependency for string joining functionality.
This commit is contained in:
2025-10-22 12:11:42 +08:00
parent 84a29c862b
commit ffd58ff677
3 changed files with 299 additions and 13 deletions

16
Cargo.lock generated
View File

@ -187,6 +187,12 @@ dependencies = [
"litrs", "litrs",
] ]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.2" version = "1.0.2"
@ -249,6 +255,15 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.15" version = "1.0.15"
@ -736,6 +751,7 @@ name = "wfassoc"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"itertools",
"regex", "regex",
"thiserror", "thiserror",
"uuid", "uuid",

View File

@ -19,5 +19,6 @@ windows-sys = { version = "0.60.2", features = [
winreg = { version = "0.55.0", features = ["transactions"] } winreg = { version = "0.55.0", features = ["transactions"] }
widestring = "1.2.1" widestring = "1.2.1"
indexmap = "2.11.4" indexmap = "2.11.4"
itertools = "0.14.0"
regex = "1.11.3" regex = "1.11.3"
uuid = "1.18.1" uuid = "1.18.1"

View File

@ -2,6 +2,7 @@
//! These features are not implemented in any crates (as I known scope) //! These features are not implemented in any crates (as I known scope)
//! and should be manually implemented for our file association use. //! and should be manually implemented for our file association use.
use itertools::Itertools;
use regex::Regex; use regex::Regex;
use std::fmt::Display; use std::fmt::Display;
use std::path::Path; use std::path::Path;
@ -9,8 +10,7 @@ use std::str::FromStr;
use std::sync::LazyLock; use std::sync::LazyLock;
use thiserror::Error as TeError; use thiserror::Error as TeError;
use widestring::{WideCStr, WideCString, WideChar}; use widestring::{WideCStr, WideCString, WideChar};
use windows_sys::Win32::UI::Shell::ExtractIconExW; use windows_sys::Win32::UI::WindowsAndMessaging::HICON;
use windows_sys::Win32::UI::WindowsAndMessaging::{DestroyIcon, HICON};
// region: Expand String // region: Expand String
@ -69,24 +69,21 @@ impl ExpandString {
// Fetch the size of expand result // Fetch the size of expand result
let source = WideCString::from_str(self.inner.as_str())?; let source = WideCString::from_str(self.inner.as_str())?;
let size = unsafe { let size = unsafe { ExpandEnvironmentStringsW(source.as_ptr(), Default::default(), 0) };
ExpandEnvironmentStringsW(source.as_ptr(), Default::default(), 0)
};
if size == 0 { if size == 0 {
return Err(ExpandEnvVarError::ExpandFunction) return Err(ExpandEnvVarError::ExpandFunction);
} }
let size_no_nul = size.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?; let size_no_nul = size.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?;
// Allocate buffer for it. // Allocate buffer for it.
let len: usize = size.try_into()?; let len: usize = size.try_into()?;
let len_no_nul = len.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?; let len_no_nul = len.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?;
let mut buffer= vec![0; len]; let mut buffer = vec![0; len];
// Receive result // Receive result
let size = unsafe { let size =
ExpandEnvironmentStringsW(source.as_ptr(), buffer.as_mut_ptr(), size_no_nul) unsafe { ExpandEnvironmentStringsW(source.as_ptr(), buffer.as_mut_ptr(), size_no_nul) };
};
if size == 0 { if size == 0 {
return Err(ExpandEnvVarError::ExpandFunction) return Err(ExpandEnvVarError::ExpandFunction);
} }
// Cast result as Rust string // Cast result as Rust string
@ -153,6 +150,8 @@ pub struct Icon {
impl Icon { impl Icon {
pub fn new(file: &Path, index: i32, kind: IconSizeKind) -> Result<Self, LoadIconError> { pub fn new(file: &Path, index: i32, kind: IconSizeKind) -> Result<Self, LoadIconError> {
use windows_sys::Win32::UI::Shell::ExtractIconExW;
let mut icon = HICON::default(); let mut icon = HICON::default();
let icon_ptr = &mut icon as *mut HICON; let icon_ptr = &mut icon as *mut HICON;
let file = WideCString::from_os_str(file.as_os_str())?; let file = WideCString::from_os_str(file.as_os_str())?;
@ -192,6 +191,8 @@ impl Icon {
impl Drop for Icon { impl Drop for Icon {
fn drop(&mut self) { fn drop(&mut self) {
use windows_sys::Win32::UI::WindowsAndMessaging::DestroyIcon;
if !self.icon.is_null() { if !self.icon.is_null() {
unsafe { unsafe {
DestroyIcon(self.icon); DestroyIcon(self.icon);
@ -202,14 +203,282 @@ impl Drop for Icon {
// endregion // endregion
// region: Windows Commandline
// region Cmd Lexer
/// The lexer for Windows commandline argument split.
///
/// Reference: https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
pub struct CmdLexer<I: Iterator<Item = char>> {
chars: std::iter::Peekable<I>,
finished: bool,
}
impl<I: Iterator<Item = char>> CmdLexer<I> {
pub fn new(iter: I) -> Self {
Self {
chars: iter.peekable(),
finished: false,
}
}
}
impl<I: Iterator<Item = char>> Iterator for CmdLexer<I> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
let mut token = String::new();
let mut in_quotes = false;
loop {
match self.chars.next() {
Some(c) => match c {
// Handle whitespace
' ' | '\t' | '\n' | '\x0b' if !in_quotes => {
// Skip leading whitespace before token
if token.is_empty() {
continue;
} else {
// End of current token
break;
}
}
// Handle backslash
'\\' => {
if let Some(pc) = self.chars.peek().copied() {
match pc {
'\\' => {
// Double backslash are treated as single backslash
self.chars.next();
token.push(pc);
}
'"' => {
// Backslash with an quote is interpreted as an literal quote
self.chars.next();
token.push(pc);
}
_ => {
// Treated as normal backslash
token.push(c);
}
}
} else {
// There is no more chars, treat it as normal.
token.push(c);
}
}
// Handle quote
'"' => {
// Check if it's an escaped quote inside quotes: "" becomes "
if in_quotes && self.chars.peek() == Some(&'"') {
self.chars.next(); // consume second "
token.push('"');
} else {
// Toggle quote state
in_quotes = !in_quotes;
}
}
// Regular character
_ => {
token.push(c);
}
},
None => {
self.finished = true;
break;
}
}
}
// If we're at EOF and token is empty, return None
if token.is_empty() && self.finished {
None
} else {
Some(token)
}
}
}
// endregion
// region: Cmd Path // region: Cmd Path
pub struct CmdPath {} /// The struct representing a single commandline argument.
#[derive(Debug)]
pub struct CmdArg {
/// The not quoted value hold by this argument.
inner: String,
}
impl CmdArg {
/// Construct a commandline argument from user input string (may quoted string).
pub fn new(s: &str) -> Result<Self, ParseCmdArgError> {
Self::from_str(s)
}
/// Construct a commandline argument with direct inner value (not quoted string).
pub fn with_inner(s: &str) -> Self {
Self {
inner: s.to_string(),
}
}
/// Get the real value hold by this commandline argument (not quoted string).
pub fn get_inner(&self) -> &str {
&self.inner
}
/// Get the quoted string of this argument
/// so that you can append it into your built full commandline string.
///
/// `force` is an indication of whether we should quote the argument
/// even if it does not contain any characters that would ordinarily require quoting.
///
/// If you just want to get the stored string of this,
/// please use `to_string()` instead.
///
/// Reference: https://learn.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way
pub fn to_quoted_string(&self, force: bool) -> String {
// Unless forced, don't quote if the argument doesn't contain special characters
let mut quoted_arg = String::with_capacity(self.inner.len());
if !force
&& !self.inner.is_empty()
&& !self
.inner
.chars()
.any(|c| matches!(c, ' ' | '\t' | '\n' | '\x0b' | '"'))
{
quoted_arg.push_str(&self.inner);
} else {
quoted_arg.push('"');
let mut chars = self.inner.chars();
loop {
let mut c = chars.next();
let mut backslash_count: usize = 0;
// Count consecutive backslashes
while c == Some('\\') {
c = chars.next();
backslash_count += 1;
}
if let None = c {
// Escape all backslashes, but let the terminating
// double quotation mark we add below be interpreted
// as a metacharacter.
quoted_arg.push_str(&"\\".repeat(backslash_count * 2));
break;
} else if c == Some('"') {
// Escape all backslashes and the following
// double quotation mark.
quoted_arg.push_str(&"\\".repeat(backslash_count * 2 + 1));
quoted_arg.push(c.unwrap());
} else {
// Backslashes aren't special here.
quoted_arg.push_str(&"\\".repeat(backslash_count));
quoted_arg.push(c.unwrap());
}
}
quoted_arg.push('"');
}
return quoted_arg;
}
}
/// Error occurs when creating commandline argument.
#[derive(Debug, TeError)]
pub enum ParseCmdArgError {
#[error("given string is not a commandline argument")]
NoArg,
#[error("given string may contain multiple commandline arguments")]
MultiArg,
}
impl FromStr for CmdArg {
type Err = ParseCmdArgError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut lexer = CmdLexer::new(s.chars());
let inner = match lexer.next() {
Some(v) => v,
None => return Err(ParseCmdArgError::NoArg),
};
if let Some(_) = lexer.next() {
return Err(ParseCmdArgError::MultiArg);
}
Ok(Self { inner })
}
}
impl Display for CmdArg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
// endregion // endregion
// region: Cmd Arguments // region: Cmd Arguments
pub struct CmdArgs {} /// The struct representing a single commandline argument.
#[derive(Debug)]
pub struct CmdArgs {
/// The list of arguments
args: Vec<CmdArg>,
}
impl CmdArgs {
pub fn new(s: &str) -> Self {
Self::from_str(s).expect("Infallible failed")
}
pub fn with_inner(args: impl Iterator<Item = CmdArg>) -> Self {
Self {
args: args.collect(),
}
}
pub fn get_inner(&self) -> &[CmdArg] {
&self.args
}
/// Build the string which can be recognised by Windows Cmd
/// with proper escape.
pub fn to_quoted_string(&self) -> String {
self.args
.iter()
// We set "force" to false to prevent any switches are quoted.
.map(|a| a.to_quoted_string(false))
.join(" ")
}
}
impl FromStr for CmdArgs {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self {
args: CmdLexer::new(s.chars())
.map(|a| CmdArg::with_inner(a.as_str()))
.collect(),
})
}
}
// endregion
// endregion // endregion