From ffd58ff677a118e9e0b2a33fe5849b1c054101b8 Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Wed, 22 Oct 2025 12:11:42 +0800 Subject: [PATCH] feat(windows): add command line argument parsing utilities Implement CmdLexer, CmdArg and CmdArgs for proper Windows command line argument parsing and quoting. Add itertools dependency for string joining functionality. --- Cargo.lock | 16 ++ wfassoc/Cargo.toml | 1 + wfassoc/src/extra/windows.rs | 295 +++++++++++++++++++++++++++++++++-- 3 files changed, 299 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e372e77..ce95fca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -187,6 +187,12 @@ dependencies = [ "litrs", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" @@ -249,6 +255,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -736,6 +751,7 @@ name = "wfassoc" version = "0.1.0" dependencies = [ "indexmap", + "itertools", "regex", "thiserror", "uuid", diff --git a/wfassoc/Cargo.toml b/wfassoc/Cargo.toml index f1a18cd..97bd06a 100644 --- a/wfassoc/Cargo.toml +++ b/wfassoc/Cargo.toml @@ -19,5 +19,6 @@ windows-sys = { version = "0.60.2", features = [ winreg = { version = "0.55.0", features = ["transactions"] } widestring = "1.2.1" indexmap = "2.11.4" +itertools = "0.14.0" regex = "1.11.3" uuid = "1.18.1" diff --git a/wfassoc/src/extra/windows.rs b/wfassoc/src/extra/windows.rs index ebc2bf3..2175e05 100644 --- a/wfassoc/src/extra/windows.rs +++ b/wfassoc/src/extra/windows.rs @@ -2,6 +2,7 @@ //! These features are not implemented in any crates (as I known scope) //! and should be manually implemented for our file association use. +use itertools::Itertools; use regex::Regex; use std::fmt::Display; use std::path::Path; @@ -9,8 +10,7 @@ use std::str::FromStr; use std::sync::LazyLock; use thiserror::Error as TeError; use widestring::{WideCStr, WideCString, WideChar}; -use windows_sys::Win32::UI::Shell::ExtractIconExW; -use windows_sys::Win32::UI::WindowsAndMessaging::{DestroyIcon, HICON}; +use windows_sys::Win32::UI::WindowsAndMessaging::HICON; // region: Expand String @@ -69,24 +69,21 @@ impl ExpandString { // Fetch the size of expand result let source = WideCString::from_str(self.inner.as_str())?; - let size = unsafe { - ExpandEnvironmentStringsW(source.as_ptr(), Default::default(), 0) - }; + let size = unsafe { ExpandEnvironmentStringsW(source.as_ptr(), Default::default(), 0) }; if size == 0 { - return Err(ExpandEnvVarError::ExpandFunction) + return Err(ExpandEnvVarError::ExpandFunction); } let size_no_nul = size.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?; // Allocate buffer for it. let len: usize = size.try_into()?; let len_no_nul = len.checked_sub(1).ok_or(ExpandEnvVarError::Underflow)?; - let mut buffer= vec![0; len]; + let mut buffer = vec![0; len]; // Receive result - let size = unsafe { - ExpandEnvironmentStringsW(source.as_ptr(), buffer.as_mut_ptr(), size_no_nul) - }; + let size = + unsafe { ExpandEnvironmentStringsW(source.as_ptr(), buffer.as_mut_ptr(), size_no_nul) }; if size == 0 { - return Err(ExpandEnvVarError::ExpandFunction) + return Err(ExpandEnvVarError::ExpandFunction); } // Cast result as Rust string @@ -153,6 +150,8 @@ pub struct Icon { impl Icon { pub fn new(file: &Path, index: i32, kind: IconSizeKind) -> Result { + use windows_sys::Win32::UI::Shell::ExtractIconExW; + let mut icon = HICON::default(); let icon_ptr = &mut icon as *mut HICON; let file = WideCString::from_os_str(file.as_os_str())?; @@ -192,6 +191,8 @@ impl Icon { impl Drop for Icon { fn drop(&mut self) { + use windows_sys::Win32::UI::WindowsAndMessaging::DestroyIcon; + if !self.icon.is_null() { unsafe { DestroyIcon(self.icon); @@ -202,14 +203,282 @@ impl Drop for Icon { // endregion +// region: Windows Commandline + +// region Cmd Lexer + +/// The lexer for Windows commandline argument split. +/// +/// Reference: https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments +pub struct CmdLexer> { + chars: std::iter::Peekable, + finished: bool, +} + +impl> CmdLexer { + pub fn new(iter: I) -> Self { + Self { + chars: iter.peekable(), + finished: false, + } + } +} + +impl> Iterator for CmdLexer { + type Item = String; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let mut token = String::new(); + let mut in_quotes = false; + + loop { + match self.chars.next() { + Some(c) => match c { + // Handle whitespace + ' ' | '\t' | '\n' | '\x0b' if !in_quotes => { + // Skip leading whitespace before token + if token.is_empty() { + continue; + } else { + // End of current token + break; + } + } + + // Handle backslash + '\\' => { + if let Some(pc) = self.chars.peek().copied() { + match pc { + '\\' => { + // Double backslash are treated as single backslash + self.chars.next(); + token.push(pc); + } + '"' => { + // Backslash with an quote is interpreted as an literal quote + self.chars.next(); + token.push(pc); + } + _ => { + // Treated as normal backslash + token.push(c); + } + } + } else { + // There is no more chars, treat it as normal. + token.push(c); + } + } + + // Handle quote + '"' => { + // Check if it's an escaped quote inside quotes: "" becomes " + if in_quotes && self.chars.peek() == Some(&'"') { + self.chars.next(); // consume second " + token.push('"'); + } else { + // Toggle quote state + in_quotes = !in_quotes; + } + } + + // Regular character + _ => { + token.push(c); + } + }, + + None => { + self.finished = true; + break; + } + } + } + + // If we're at EOF and token is empty, return None + if token.is_empty() && self.finished { + None + } else { + Some(token) + } + } +} + +// endregion + // region: Cmd Path -pub struct CmdPath {} +/// The struct representing a single commandline argument. +#[derive(Debug)] +pub struct CmdArg { + /// The not quoted value hold by this argument. + inner: String, +} + +impl CmdArg { + /// Construct a commandline argument from user input string (may quoted string). + pub fn new(s: &str) -> Result { + Self::from_str(s) + } + + /// Construct a commandline argument with direct inner value (not quoted string). + pub fn with_inner(s: &str) -> Self { + Self { + inner: s.to_string(), + } + } + + /// Get the real value hold by this commandline argument (not quoted string). + pub fn get_inner(&self) -> &str { + &self.inner + } + + /// Get the quoted string of this argument + /// so that you can append it into your built full commandline string. + /// + /// `force` is an indication of whether we should quote the argument + /// even if it does not contain any characters that would ordinarily require quoting. + /// + /// If you just want to get the stored string of this, + /// please use `to_string()` instead. + /// + /// Reference: https://learn.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way + pub fn to_quoted_string(&self, force: bool) -> String { + // Unless forced, don't quote if the argument doesn't contain special characters + let mut quoted_arg = String::with_capacity(self.inner.len()); + + if !force + && !self.inner.is_empty() + && !self + .inner + .chars() + .any(|c| matches!(c, ' ' | '\t' | '\n' | '\x0b' | '"')) + { + quoted_arg.push_str(&self.inner); + } else { + quoted_arg.push('"'); + + let mut chars = self.inner.chars(); + loop { + let mut c = chars.next(); + let mut backslash_count: usize = 0; + + // Count consecutive backslashes + while c == Some('\\') { + c = chars.next(); + backslash_count += 1; + } + + if let None = c { + // Escape all backslashes, but let the terminating + // double quotation mark we add below be interpreted + // as a metacharacter. + quoted_arg.push_str(&"\\".repeat(backslash_count * 2)); + break; + } else if c == Some('"') { + // Escape all backslashes and the following + // double quotation mark. + quoted_arg.push_str(&"\\".repeat(backslash_count * 2 + 1)); + quoted_arg.push(c.unwrap()); + } else { + // Backslashes aren't special here. + quoted_arg.push_str(&"\\".repeat(backslash_count)); + quoted_arg.push(c.unwrap()); + } + } + + quoted_arg.push('"'); + } + + return quoted_arg; + } +} + +/// Error occurs when creating commandline argument. +#[derive(Debug, TeError)] +pub enum ParseCmdArgError { + #[error("given string is not a commandline argument")] + NoArg, + #[error("given string may contain multiple commandline arguments")] + MultiArg, +} + +impl FromStr for CmdArg { + type Err = ParseCmdArgError; + + fn from_str(s: &str) -> Result { + let mut lexer = CmdLexer::new(s.chars()); + let inner = match lexer.next() { + Some(v) => v, + None => return Err(ParseCmdArgError::NoArg), + }; + if let Some(_) = lexer.next() { + return Err(ParseCmdArgError::MultiArg); + } + + Ok(Self { inner }) + } +} + +impl Display for CmdArg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.inner) + } +} // endregion // region: Cmd Arguments -pub struct CmdArgs {} +/// The struct representing a single commandline argument. +#[derive(Debug)] +pub struct CmdArgs { + /// The list of arguments + args: Vec, +} + +impl CmdArgs { + pub fn new(s: &str) -> Self { + Self::from_str(s).expect("Infallible failed") + } + + pub fn with_inner(args: impl Iterator) -> Self { + Self { + args: args.collect(), + } + } + + pub fn get_inner(&self) -> &[CmdArg] { + &self.args + } + + /// Build the string which can be recognised by Windows Cmd + /// with proper escape. + pub fn to_quoted_string(&self) -> String { + self.args + .iter() + // We set "force" to false to prevent any switches are quoted. + .map(|a| a.to_quoted_string(false)) + .join(" ") + } +} + +impl FromStr for CmdArgs { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self { + args: CmdLexer::new(s.chars()) + .map(|a| CmdArg::with_inner(a.as_str())) + .collect(), + }) + } +} + +// endregion // endregion