diff --git a/src/CommandLine/StringToCommandLine/CSharpStyleCommandLineParser.cs b/src/CommandLine/StringToCommandLine/CSharpStyleCommandLineParser.cs new file mode 100644 index 00000000..e24855ba --- /dev/null +++ b/src/CommandLine/StringToCommandLine/CSharpStyleCommandLineParser.cs @@ -0,0 +1,194 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text; + +namespace CommandLine.StringToCommandLine +{ + /// + /// Parse commandlines like C# would parse a string, splitting at each unquoted space: + /// * "" -> + /// * "abc" -> abc + /// * abc abc -> abc, abc + /// * "\"" -> " + /// * asd"asd -> error + /// * "asd -> error unterminated string + /// * \ -> error unterminated escape + /// * \['"\0abfnrtUuvx] -> https://msdn.microsoft.com/en-us/library/ms228362.aspx?f=255&MSPPError=-2147217396 + /// * \other -> error + /// + public class CSharpStyleCommandLineParser : StringToCommandLineParserBase + { + public override IEnumerable Parse(string commandLine) + { + if (string.IsNullOrWhiteSpace(commandLine)) + yield break; + var currentArg = new StringBuilder(); + var quoting = false; + + var pos = 0; + while (pos < commandLine.Length) + { + var c = commandLine[pos]; + if (c == '\\') + { + // --- Handle escape sequences + pos++; + if (pos >= commandLine.Length) throw new UnterminatedEscapeException(); + switch (commandLine[pos]) + { + case '\'': + c = '\''; + break; + case '\"': + c = '\"'; + break; + case '\\': + c = '\\'; + break; + case '0': + c = '\0'; + break; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = ' '; + break; + case 'r': + c = ' '; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'x': + // --- Hexa escape (1-4 digits) + var hexa = new StringBuilder(10); + pos++; + if (pos >= commandLine.Length) + throw new UnterminatedEscapeException(); + c = commandLine[pos]; + if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) + { + hexa.Append(c); + pos++; + if (pos < commandLine.Length) + { + c = commandLine[pos]; + if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) + { + hexa.Append(c); + pos++; + if (pos < commandLine.Length) + { + c = commandLine[pos]; + if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) + { + hexa.Append(c); + pos++; + if (pos < commandLine.Length) + { + c = commandLine[pos]; + if (char.IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) + { + hexa.Append(c); + pos++; + } + } + } + } + } + } + } + c = (char) int.Parse(hexa.ToString(), NumberStyles.HexNumber); + pos--; + break; + case 'u': + // Unicode hexa escape (exactly 4 digits) + pos++; + if (pos + 3 >= commandLine.Length) + throw new UnterminatedEscapeException(); + try + { + var charValue = uint.Parse(commandLine.Substring(pos, 4), NumberStyles.HexNumber); + c = (char) charValue; + pos += 3; + } + catch (SystemException) + { + throw new UnrecognizedEscapeSequenceException(); + } + break; + case 'U': + // Unicode hexa escape (exactly 8 digits, first four must be 0000) + pos++; + if (pos + 7 >= commandLine.Length) + throw new UnterminatedEscapeException(); + try + { + var charValue = uint.Parse(commandLine.Substring(pos, 8), NumberStyles.HexNumber); + if (charValue > 0xffff) + throw new UnrecognizedEscapeSequenceException(); + c = (char) charValue; + pos += 7; + } + catch (SystemException) + { + throw new UnrecognizedEscapeSequenceException(); + } + break; + default: + throw new UnrecognizedEscapeSequenceException(); + } + pos++; + currentArg.Append(c); + continue; + } + if (c == '"') + { + if (quoting) + { + pos++; //skip space + //check that it actually IS a space or EOF + if (pos < commandLine.Length && !char.IsWhiteSpace(commandLine[pos])) + throw new UnquotedQuoteException(); + yield return currentArg.ToString(); + currentArg.Clear(); + quoting = false; + } + else + { + if (currentArg.Length > 0) + throw new UnquotedQuoteException(); + quoting = true; + } + pos++; + continue; + } + if (char.IsWhiteSpace(c) && !quoting) + { + if (currentArg.Length > 0) + yield return currentArg.ToString(); + currentArg.Clear(); + pos++; + continue; + } + pos++; + currentArg.Append(c); + } + if (quoting && currentArg.Length > 0) + throw new UnterminatedStringException(); + if (currentArg.Length > 0) + yield return currentArg.ToString(); + } + } +} diff --git a/src/CommandLine/StringToCommandLine/DefaultWindowsCommandLineParser.cs b/src/CommandLine/StringToCommandLine/DefaultWindowsCommandLineParser.cs new file mode 100644 index 00000000..5d9e9a90 --- /dev/null +++ b/src/CommandLine/StringToCommandLine/DefaultWindowsCommandLineParser.cs @@ -0,0 +1,79 @@ +using System.Collections.Generic; +using System.Text; + +namespace CommandLine.StringToCommandLine +{ + /// + /// Parse commandlines like CommandLineToArgvW: + /// * 2n backslashes followed by a quotation mark produce n backslashes followed by a quotation mark. + /// * (2n) + 1 backslashes followed by a quotation mark again produce n backslashes followed by a quotation mark. + /// * n backslashes not followed by a quotation mark simply produce n backslashes. + /// * Unterminated quoted strings at the end of the line ignores the missing quote. + /// + public class DefaultWindowsCommandLineParser : StringToCommandLineParserBase + { + public override IEnumerable Parse(string commandLine) + { + if (string.IsNullOrWhiteSpace(commandLine)) + yield break; + var currentArg = new StringBuilder(); + var quoting = false; + var emptyIsAnArgument = false; + var lastC = '\0'; + // Iterate all characters from the input string + foreach (var c in commandLine) + { + if (c == '"') + { + var nrbackslash = 0; + for (var i = currentArg.Length - 1; i >= 0; i--) + { + if (currentArg[i] != '\\') break; + nrbackslash++; + } + //* 2n backslashes followed by a quotation mark produce n backslashes followed by a quotation mark. + //also cover nrbackslack == 0 + if (nrbackslash%2 == 0) + { + if (nrbackslash > 0) + currentArg.Length = currentArg.Length - nrbackslash/2; + // Toggle quoted range + quoting = !quoting; + emptyIsAnArgument = true; + if (quoting && lastC == '"') + { + // Doubled quote within a quoted range is like escaping + currentArg.Append(c); + lastC = '\0'; //prevent double quoting + continue; + } + } + else + { + // * (2n) + 1 backslashes followed by a quotation mark again produce n backslashes followed by a quotation mark. + currentArg.Length = currentArg.Length - nrbackslash/2 - 1; + currentArg.Append(c); + } + } + else if (!quoting && char.IsWhiteSpace(c)) + { + // Accept empty arguments only if they are quoted + if (currentArg.Length > 0 || emptyIsAnArgument) + yield return currentArg.ToString(); + // Reset for next argument + currentArg.Clear(); + emptyIsAnArgument = false; + } + else + { + // Copy character from input, no special meaning + currentArg.Append(c); + } + lastC = c; + } + // Save last argument + if (currentArg.Length > 0 || emptyIsAnArgument) + yield return currentArg.ToString(); + } + } +} diff --git a/src/CommandLine/StringToCommandLine/StringToCommandLineParserBase.cs b/src/CommandLine/StringToCommandLine/StringToCommandLineParserBase.cs new file mode 100644 index 00000000..8bab06d6 --- /dev/null +++ b/src/CommandLine/StringToCommandLine/StringToCommandLineParserBase.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; + +namespace CommandLine.StringToCommandLine +{ + public abstract class StringToCommandLineParserBase + { + public abstract IEnumerable Parse(string commandLine); + + public class UnterminatedStringException : ArgumentException {} + + public class UnrecognizedEscapeSequenceException : ArgumentException {} + + public class UnquotedQuoteException : ArgumentException {} + + public class UnterminatedEscapeException : ArgumentException {} + } +} diff --git a/tests/CommandLine.Tests/CommandLine.Tests.csproj b/tests/CommandLine.Tests/CommandLine.Tests.csproj index d9a9d62b..22da077f 100644 --- a/tests/CommandLine.Tests/CommandLine.Tests.csproj +++ b/tests/CommandLine.Tests/CommandLine.Tests.csproj @@ -116,6 +116,8 @@ + + @@ -455,4 +457,4 @@ - \ No newline at end of file + diff --git a/tests/CommandLine.Tests/Unit/StringToCommandLine/CSharpStyleCommandLineParserTests.cs b/tests/CommandLine.Tests/Unit/StringToCommandLine/CSharpStyleCommandLineParserTests.cs new file mode 100644 index 00000000..862711b2 --- /dev/null +++ b/tests/CommandLine.Tests/Unit/StringToCommandLine/CSharpStyleCommandLineParserTests.cs @@ -0,0 +1,44 @@ +using System.Collections; +using System.Linq; +using CommandLine.StringToCommandLine; +using Xunit; + +namespace CommandLine.Tests.Unit.StringToCommandLine +{ + public class CSharpStyleCommandLineParserTests + { + [Fact] + public void TestMethod1() { RunTest("test", new[] {"test"}); } + + [Fact] + public void TestMethod2() { RunTest("test test", new[] {"test", "test"}); } + + [Fact] + public void TestMethod3() { RunTest("test \"test\"", new[] {"test", "test"}); } + + [Fact] + public void TestMethod4() { RunTest("test \"te\\\"s\\\"t\"", new[] {"test", "te\"s\"t"}); } + + [Fact] + public void TestMethod4B() { RunTest("test \"te\\\"\\\"\\\"\\\"s\\\"t\"", new[] {"test", "te\"\"\"\"s\"t"}); } + + [Fact] + public void TestMethod5() { Assert.Throws(() => RunTest("\"abc d e", new[] {""})); } + + [Fact] + public void TestMethod6() { Assert.Throws(() => RunTest("asd\\", new[] {""})); } + + [Fact] + public void TestMethod7() { RunTest("\\\\\\a\\b\\'\\\"\\0\\f\\t\\v", new[] {"\\\a\b\'\"\0\f\t\v"}); } + + [Fact] + public void TestMethod8() { RunTest("Hello\\x1\\x12\\x123\\x1234", new[] {"Hello\x1\x12\x123\x1234"}); } + + private static void RunTest(string commandLine, ICollection expected) + { + var parser = new CSharpStyleCommandLineParser(); + var enumerable = parser.Parse(commandLine); + Assert.Equal(expected, enumerable.ToArray()); + } + } +} diff --git a/tests/CommandLine.Tests/Unit/StringToCommandLine/DefaultWindowsCommandLineParserTests b/tests/CommandLine.Tests/Unit/StringToCommandLine/DefaultWindowsCommandLineParserTests new file mode 100644 index 00000000..fbba447f --- /dev/null +++ b/tests/CommandLine.Tests/Unit/StringToCommandLine/DefaultWindowsCommandLineParserTests @@ -0,0 +1,44 @@ +using System.Collections; +using System.Linq; +using CommandLine.StringToCommandLine; +using Xunit; + +namespace CommandLine.Tests.Unit.StringToCommandLine +{ + public class DefaultWindowsCommandLineParserTests + { + [Fact] + public void TestMethod1() { RunTest("test", new[] {"test"}); } + + [Fact] + public void TestMethod2() { RunTest("test test", new[] {"test", "test"}); } + + [Fact] + public void TestMethod3() { RunTest("test \"test\"", new[] {"test", "test"}); } + + [Fact] + public void TestMethod4() { RunTest("test \"te\"s\"t\"", new[] {"test", "test"}); } + + [Fact] + public void TestMethod4B() { RunTest("test \"te\"\"\"\"s\"t\"", new[] {"test", "te\"\"st"}); } + + [Fact] + public void TestMethod5() { RunTest("\"abc\" d e", new[] {"abc", "d", "e"}); } + + [Fact] + public void TestMethod6() { RunTest("a\\\\b d\"e f\"g h", new[] {"a\\\\b", "de fg", "h"}); } + + [Fact] + public void TestMethod7() { RunTest("a\\\\\\\"b c d", new[] {"a\\\"b", "c", "d"}); } + + [Fact] + public void TestMethod8() { RunTest("a\\\\\\\\\"b c\" d e", new[] {"a\\\\b c", "d", "e"}); } + + private static void RunTest(string commandLine, ICollection expected) + { + var parser = new DefaultWindowsCommandLineParser(); + var enumerable = parser.Parse(commandLine); + Assert.Equal(expected, enumerable.ToArray()); + } + } +}