Skip to content

Commit

Permalink
Response file parsing for GCC and Clang
Browse files Browse the repository at this point in the history
This fixes mozilla#1780

This moves the response file parsing code implemented by @temportalflux out of the MSVC compiler and shares it for all compilers. This enables GCC and Clang to use response files with quoted arguments.

I've added some lib tests to the response file module, so that it can be reasoned about, separately from the compilers using it.
  • Loading branch information
Alexei-Barnes authored and sylvestre committed Jun 1, 2023
1 parent 7416904 commit b985902
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 130 deletions.
11 changes: 6 additions & 5 deletions src/compiler/gcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::compiler::args::*;
use crate::compiler::c::{
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
};
use crate::compiler::response_file::SplitResponseFileArgs;
use crate::compiler::{clang, Cacheable, ColorMode, CompileCommand, CompilerArguments};
use crate::mock_command::{CommandCreatorSync, RunCommand};
use crate::util::{run_input_output, OsStrExt};
Expand Down Expand Up @@ -887,11 +888,11 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
debug!("failed to read @-file `{}`: {}", file.display(), e);
return Some(arg);
}
if contents.contains('"') || contents.contains('\'') {
return Some(arg);
}
let new_args = contents.split_whitespace().collect::<Vec<_>>();
self.stack.extend(new_args.iter().rev().map(|s| s.into()));
// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
let resp_file_args = SplitResponseFileArgs::from(&contents).collect::<Vec<_>>();
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
self.stack.extend(rev_args);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ mod diab;
mod gcc;
mod msvc;
mod nvcc;
mod response_file;
mod rust;
mod tasking_vx;
#[macro_use]
Expand Down
127 changes: 2 additions & 125 deletions src/compiler/msvc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::compiler::args::*;
use crate::compiler::c::{
ArtifactDescriptor, CCompilerImpl, CCompilerKind, Language, ParsedArguments,
};
use crate::compiler::response_file::SplitResponseFileArgs;
use crate::compiler::{
clang, gcc, write_temp_file, Cacheable, ColorMode, CompileCommand, CompilerArguments,
};
Expand Down Expand Up @@ -1221,8 +1222,7 @@ impl<'a> Iterator for ExpandIncludeFile<'a> {
trace!("Expanded response file {:?} to {:?}", file_path, content);

// Parse the response file contents, taking into account quote-wrapped strings and new-line separators.
// Special implementation to account for MSVC response file format.
let resp_file_args = SplitMsvcResponseFileArgs::from(&content).collect::<Vec<_>>();
let resp_file_args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
// Pump arguments back to the stack, in reverse order so we can `Vec::pop` and visit in original front-to-back order.
let rev_args = resp_file_args.iter().rev().map(|s| s.into());
self.stack.extend(rev_args);
Expand All @@ -1247,129 +1247,6 @@ where
result.map_err(|err| io::Error::new(io::ErrorKind::Other, err.into_owned()))
}

/// An iterator over the arguments in a Windows command line.
///
/// This produces results identical to `CommandLineToArgvW` except in the
/// following cases:
///
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
/// current executable file. Here, the iterator will simply be empty.
/// 2. CommandLineToArgvW interprets the first argument differently than the
/// rest. Here, all arguments are treated in identical fashion.
///
/// Parsing rules:
///
/// - Arguments are delimited by whitespace (either a space or tab).
/// - A string surrounded by double quotes is interpreted as a single argument.
/// - Backslashes are interpreted literally unless followed by a double quote.
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
/// enter the "in quote" state.
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
/// we do *not* enter the "in quote" state, and the double quote is
/// interpreted literally.
///
/// References:
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
#[derive(Clone, Debug)]
struct SplitMsvcResponseFileArgs<'a> {
/// String slice of the file content that is being parsed.
/// Slice is mutated as this iterator is executed.
file_content: &'a str,
}

impl<'a, T> From<&'a T> for SplitMsvcResponseFileArgs<'a>
where
T: AsRef<str> + 'static,
{
fn from(file_content: &'a T) -> Self {
Self {
file_content: file_content.as_ref(),
}
}
}

impl<'a> SplitMsvcResponseFileArgs<'a> {
/// Appends backslashes to `target` by decrementing `count`.
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
while *count >= step {
target.push('\\');
*count -= step;
}
}
}

impl<'a> Iterator for SplitMsvcResponseFileArgs<'a> {
type Item = String;

fn next(&mut self) -> Option<String> {
let mut in_quotes = false;
let mut backslash_count: usize = 0;

// Strip any leading whitespace before relevant characters
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
self.file_content = self.file_content.trim_start_matches(is_whitespace);

if self.file_content.is_empty() {
return None;
}

// The argument string to return, built by analyzing the current slice in the iterator.
let mut arg = String::new();
// All characters still in the string slice. Will be mutated by consuming
// values until the current arg is built.
let mut chars = self.file_content.chars();
// Build the argument by evaluating each character in the string slice.
for c in &mut chars {
match c {
// In order to handle the escape character based on the char(s) which come after it,
// they are counted instead of appended literally, until a non-backslash character is encountered.
'\\' => backslash_count += 1,
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
'"' => {
// Only append half the number of backslashes encountered, because this is an escaped string.
// This will reduce `backslash_count` to either 0 or 1.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
match backslash_count == 0 {
// If there are no remaining encountered backslashes,
// then we have found either the start or end of a quoted argument.
true => in_quotes = !in_quotes,
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
false => {
backslash_count = 0;
arg.push('"');
}
}
}
// If whitespace is encountered, only preserve it if we are currently in quotes.
// Otherwise it marks the end of the current argument.
' ' | '\t' | '\n' | '\r' => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// If not in a quoted string, then this is the end of the argument.
if !in_quotes {
break;
}
// Otherwise, the whitespace must be preserved in the argument.
arg.push(c);
}
// All other characters treated as is
_ => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
arg.push(c);
}
}
}

// Flush any backslashes at the end of the string.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// Save the current remaining characters for the next step in the iterator.
self.file_content = chars.as_str();

Some(arg)
}
}

#[cfg(test)]
mod test {
use std::str::FromStr;
Expand Down
165 changes: 165 additions & 0 deletions src/compiler/response_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/// An iterator over the arguments in a response file.
///
/// This produces results identical to `CommandLineToArgvW` except in the
/// following cases:
///
/// 1. When passed an empty string, CommandLineToArgvW returns the path to the
/// current executable file. Here, the iterator will simply be empty.
/// 2. CommandLineToArgvW interprets the first argument differently than the
/// rest. Here, all arguments are treated in identical fashion.
///
/// Parsing rules:
///
/// - Arguments are delimited by whitespace (either a space or tab).
/// - A string surrounded by double quotes is interpreted as a single argument.
/// - Backslashes are interpreted literally unless followed by a double quote.
/// - 2n backslashes followed by a double quote reduce to n backslashes and we
/// enter the "in quote" state.
/// - 2n+1 backslashes followed by a double quote reduces to n backslashes,
/// we do *not* enter the "in quote" state, and the double quote is
/// interpreted literally.
///
/// References:
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/bb776391(v=vs.85).aspx
/// - https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
#[derive(Clone, Debug)]
pub struct SplitResponseFileArgs<'a> {
/// String slice of the file content that is being parsed.
/// Slice is mutated as this iterator is executed.
file_content: &'a str,
}

impl<'a, T> From<&'a T> for SplitResponseFileArgs<'a>
where
T: AsRef<str> + 'static,
{
fn from(file_content: &'a T) -> Self {
Self {
file_content: file_content.as_ref(),
}
}
}

impl<'a> SplitResponseFileArgs<'a> {
/// Appends backslashes to `target` by decrementing `count`.
/// If `step` is >1, then `count` is decremented by `step`, resulting in 1 backslash appended for every `step`.
fn append_backslashes_to(target: &mut String, count: &mut usize, step: usize) {
while *count >= step {
target.push('\\');
*count -= step;
}
}
}

impl<'a> Iterator for SplitResponseFileArgs<'a> {
type Item = String;

fn next(&mut self) -> Option<String> {
let mut in_quotes = false;
let mut backslash_count: usize = 0;

// Strip any leading whitespace before relevant characters
let is_whitespace = |c| matches!(c, ' ' | '\t' | '\n' | '\r');
self.file_content = self.file_content.trim_start_matches(is_whitespace);

if self.file_content.is_empty() {
return None;
}

// The argument string to return, built by analyzing the current slice in the iterator.
let mut arg = String::new();
// All characters still in the string slice. Will be mutated by consuming
// values until the current arg is built.
let mut chars = self.file_content.chars();
// Build the argument by evaluating each character in the string slice.
for c in &mut chars {
match c {
// In order to handle the escape character based on the char(s) which come after it,
// they are counted instead of appended literally, until a non-backslash character is encountered.
'\\' => backslash_count += 1,
// Either starting or ending a quoted argument, or appending a literal character (if the quote was escaped).
'"' => {
// Only append half the number of backslashes encountered, because this is an escaped string.
// This will reduce `backslash_count` to either 0 or 1.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 2);
match backslash_count == 0 {
// If there are no remaining encountered backslashes,
// then we have found either the start or end of a quoted argument.
true => in_quotes = !in_quotes,
// The quote character is escaped, so it is treated as a literal and appended to the arg string.
false => {
backslash_count = 0;
arg.push('"');
}
}
}
// If whitespace is encountered, only preserve it if we are currently in quotes.
// Otherwise it marks the end of the current argument.
' ' | '\t' | '\n' | '\r' => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// If not in a quoted string, then this is the end of the argument.
if !in_quotes {
break;
}
// Otherwise, the whitespace must be preserved in the argument.
arg.push(c);
}
// All other characters treated as is
_ => {
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
arg.push(c);
}
}
}

// Flush any backslashes at the end of the string.
Self::append_backslashes_to(&mut arg, &mut backslash_count, 1);
// Save the current remaining characters for the next step in the iterator.
self.file_content = chars.as_str();

Some(arg)
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn parse_simple_args() {
let content = "-A1 -A2 -A3 -I ../includes";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-A1");
assert_eq!(args[1], "-A2");
assert_eq!(args[2], "-A3");
assert_eq!(args[3], "-I");
assert_eq!(args[4], "../includes");
}

#[test]
fn parse_quoted_path_arg() {
let content = "-I \"../included headers\"";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-I");
assert_eq!(args[1], "../included headers");
}

#[test]
fn parse_escaped_quoted_path_arg() {
let content = "-I \"../included \\\"headers\\\"\"";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-I");
assert_eq!(args[1], "../included \"headers\"");
}

#[test]
fn parse_various_whitespace_characters() {
let content = "-A1 -A2\n-A3\n\r-A4\r-A5\n ";
let args = SplitResponseFileArgs::from(&content).collect::<Vec<_>>();
assert_eq!(args[0], "-A1");
assert_eq!(args[1], "-A2");
assert_eq!(args[2], "-A3");
assert_eq!(args[3], "-A4");
assert_eq!(args[4], "-A5");
}
}

0 comments on commit b985902

Please sign in to comment.