From ba267b837ee66298cf32d0aa574aa82e473bd006 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Fri, 11 Oct 2024 13:30:51 -0400 Subject: [PATCH 1/6] bugfix: fix splitting of examples like 'foo"^' mslex.split did not correctly split command lines lines like 'foo"^'. Also, unified the example lists in the test suite, and added a subprocess check to verify examples against cmd.exe. --- mslex/__init__.py | 6 +- tests/cmdline.py | 53 +++++ tests/test_mslex.py | 459 ++++++++++++++++++++++++++------------------ 3 files changed, 333 insertions(+), 185 deletions(-) create mode 100644 tests/cmdline.py diff --git a/mslex/__init__.py b/mslex/__init__.py index 083d59c..b054e31 100644 --- a/mslex/__init__.py +++ b/mslex/__init__.py @@ -82,15 +82,15 @@ def split(s: str, like_cmd: bool = True, check: bool = True) -> List[str]: def i() -> Iterator[str]: quote_mode = False - for m in re.finditer(r"(\^.)|(\")|([^\^\"]+)", s): + for m in re.finditer(r"(\^.?)|(\")|([^\^\"]+)", s): escaped, quote, text = m.groups() if escaped: if quote_mode: yield escaped - if escaped[1] == '"': + if len(escaped) > 1 and escaped[1] == '"': quote_mode = False else: - yield escaped[1] + yield escaped[1:] elif quote: yield '"' quote_mode = not quote_mode diff --git a/tests/cmdline.py b/tests/cmdline.py new file mode 100644 index 0000000..41f418d --- /dev/null +++ b/tests/cmdline.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# This is a helper script for creating test data for mslex. +# +# If python is a modern build from python.org, then it will be linked +# against UCRT, so sys.argv can be used to record how UCRT interprets the +# command line. +# +# CommandLineToArgvW() should match what msvcrt.dll does. +# +# The output of GetCommandLineW() is recorded here so we know exactly what +# cmd.exe did to the command line + +import sys +import json +import ctypes +from ctypes import windll, POINTER, c_int +from ctypes.wintypes import LPCWSTR, HLOCAL, LPWSTR + +kernel32 = windll.kernel32 +shell32 = windll.shell32 + +CommandLineToArgvW = shell32.CommandLineToArgvW +CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] +CommandLineToArgvW.restype = POINTER(LPWSTR) + +LocalFree = kernel32.LocalFree +LocalFree.argtypes = [HLOCAL] +LocalFree.restype = HLOCAL + +GetCommandLineW = kernel32.GetCommandLineW +GetCommandLineW.restype = ctypes.c_wchar_p + + +def main(): + cmdline = GetCommandLineW() + + argc = c_int() + argv = CommandLineToArgvW(cmdline, ctypes.byref(argc)) + args = [argv[i] for i in range(argc.value)] + LocalFree(argv) + + j = { + "GetCommandLineW": cmdline, + "CommandLineToArgvW": args, + "sys.argv": sys.argv, + } + json.dump(j, sys.stdout, indent=True) + sys.stdout.write("\n") + + +if __name__ == "__main__": + main() diff --git a/tests/test_mslex.py b/tests/test_mslex.py index f82d9cf..d4768d4 100644 --- a/tests/test_mslex.py +++ b/tests/test_mslex.py @@ -3,14 +3,18 @@ """Tests for `mslex` package.""" +import os import sys +import json import itertools import functools import unittest import subprocess +from typing import List, cast, Any, Optional from mslex import split, quote + if sys.platform == "win32": import ctypes from ctypes import windll, wintypes @@ -31,176 +35,259 @@ def ctypes_split(s): return result -examples = [ - (r"", []), - (r'"', [""]), - (r"x", ["x"]), - (r'x"', ["x"]), - (r"foo", ["foo"]), - (r'foo "bar baz"', ["foo", "bar baz"]), - (r'"abc" d e', ["abc", "d", "e"]), - (r'a\\\b d"e f"g h', [r"a\\\b", "de fg", "h"]), - (r"a\\\"b c d", [r"a\"b", "c", "d"]), - (r'a\\\\"b c" d e', [r"a\\b c", "d", "e"]), - ('"" "" ""', ["", "", ""]), - ('" x', [" x"]), - ('"" x', ["", "x"]), - ('""" x', ['"', "x"]), - ('"""" x', ['" x']), - ('""""" x', ['"', "x"]), - ('"""""" x', ['""', "x"]), - ('""""""" x', ['"" x']), - ('"""""""" x', ['""', "x"]), - ('""""""""" x', ['"""', "x"]), - ('"""""""""" x', ['""" x']), - ('""""""""""" x', ['"""', "x"]), - ('"""""""""""" x', ['""""', "x"]), - ('""""""""""""" x', ['"""" x']), - ('"aaa x', ["aaa x"]), - ('"aaa" x', ["aaa", "x"]), - ('"aaa"" x', ['aaa"', "x"]), - ('"aaa""" x', ['aaa" x']), - ('"aaa"""" x', ['aaa"', "x"]), - ('"aaa""""" x', ['aaa""', "x"]), - ('"aaa"""""" x', ['aaa"" x']), - ('"aaa""""""" x', ['aaa""', "x"]), - ('"aaa"""""""" x', ['aaa"""', "x"]), - ('"aaa""""""""" x', ['aaa""" x']), - ('"aaa"""""""""" x', ['aaa"""', "x"]), - ('"aaa""""""""""" x', ['aaa""""', "x"]), - ('"aaa"""""""""""" x', ['aaa"""" x']), - ('"aaa\\ x', ["aaa\\ x"]), - ('"aaa\\" x', ['aaa" x']), - ('"aaa\\"" x', ['aaa"', "x"]), - ('"aaa\\""" x', ['aaa""', "x"]), - ('"aaa\\"""" x', ['aaa"" x']), - ('"aaa\\""""" x', ['aaa""', "x"]), - ('"aaa\\"""""" x', ['aaa"""', "x"]), - ('"aaa\\""""""" x', ['aaa""" x']), - ('"aaa\\"""""""" x', ['aaa"""', "x"]), - ('"aaa\\""""""""" x', ['aaa""""', "x"]), - ('"aaa\\"""""""""" x', ['aaa"""" x']), - ('"aaa\\""""""""""" x', ['aaa""""', "x"]), - ('"aaa\\"""""""""""" x', ['aaa"""""', "x"]), - ('"aaa\\\\ x', ["aaa\\\\ x"]), - ('"aaa\\\\" x', ["aaa\\", "x"]), - ('"aaa\\\\"" x', ['aaa\\"', "x"]), - ('"aaa\\\\""" x', ['aaa\\" x']), - ('"aaa\\\\"""" x', ['aaa\\"', "x"]), - ('"aaa\\\\""""" x', ['aaa\\""', "x"]), - ('"aaa\\\\"""""" x', ['aaa\\"" x']), - ('"aaa\\\\""""""" x', ['aaa\\""', "x"]), - ('"aaa\\\\"""""""" x', ['aaa\\"""', "x"]), - ('"aaa\\\\""""""""" x', ['aaa\\""" x']), - ('"aaa\\\\"""""""""" x', ['aaa\\"""', "x"]), - ('"aaa\\\\""""""""""" x', ['aaa\\""""', "x"]), - ('"aaa\\\\"""""""""""" x', ['aaa\\"""" x']), - ('"aaa\\\\\\ x', ["aaa\\\\\\ x"]), - ('"aaa\\\\\\" x', ['aaa\\" x']), - ('"aaa\\\\\\"" x', ['aaa\\"', "x"]), - ('"aaa\\\\\\""" x', ['aaa\\""', "x"]), - ('"aaa\\\\\\"""" x', ['aaa\\"" x']), - ('"aaa\\\\\\""""" x', ['aaa\\""', "x"]), - ('"aaa\\\\\\"""""" x', ['aaa\\"""', "x"]), - ('"aaa\\\\\\""""""" x', ['aaa\\""" x']), - ('"aaa\\\\\\"""""""" x', ['aaa\\"""', "x"]), - ('"aaa\\\\\\""""""""" x', ['aaa\\""""', "x"]), - ('"aaa\\\\\\"""""""""" x', ['aaa\\"""" x']), - ('"aaa\\\\\\""""""""""" x', ['aaa\\""""', "x"]), - ('"aaa\\\\\\"""""""""""" x', ['aaa\\"""""', "x"]), - ('"aaa\\\\\\\\ x', ["aaa\\\\\\\\ x"]), - ('"aaa\\\\\\\\" x', ["aaa\\\\", "x"]), - ('"aaa\\\\\\\\"" x', ['aaa\\\\"', "x"]), - ('"aaa\\\\\\\\""" x', ['aaa\\\\" x']), - ('"aaa\\\\\\\\"""" x', ['aaa\\\\"', "x"]), - ('"aaa\\\\\\\\""""" x', ['aaa\\\\""', "x"]), - ('"aaa\\\\\\\\"""""" x', ['aaa\\\\"" x']), - ('"aaa\\\\\\\\""""""" x', ['aaa\\\\""', "x"]), - ('"aaa\\\\\\\\"""""""" x', ['aaa\\\\"""', "x"]), - ('"aaa\\\\\\\\""""""""" x', ['aaa\\\\""" x']), - ('"aaa\\\\\\\\"""""""""" x', ['aaa\\\\"""', "x"]), - ('"aaa\\\\\\\\""""""""""" x', ['aaa\\\\""""', "x"]), - ('"aaa\\\\\\\\"""""""""""" x', ['aaa\\\\"""" x']), - (" x", ["x"]), - ('" x', [" x"]), - ('"" x', ["", "x"]), - ('""" x', ['"', "x"]), - ('"""" x', ['" x']), - ('""""" x', ['"', "x"]), - ('"""""" x', ['""', "x"]), - ('""""""" x', ['"" x']), - ('"""""""" x', ['""', "x"]), - ('""""""""" x', ['"""', "x"]), - ('"""""""""" x', ['""" x']), - ('""""""""""" x', ['"""', "x"]), - ('"""""""""""" x', ['""""', "x"]), - ("\\ x", ["\\", "x"]), - ('\\" x', ['"', "x"]), - ('\\"" x', ['" x']), - ('\\""" x', ['"', "x"]), - ('\\"""" x', ['""', "x"]), - ('\\""""" x', ['"" x']), - ('\\"""""" x', ['""', "x"]), - ('\\""""""" x', ['"""', "x"]), - ('\\"""""""" x', ['""" x']), - ('\\""""""""" x', ['"""', "x"]), - ('\\"""""""""" x', ['""""', "x"]), - ('\\""""""""""" x', ['"""" x']), - ('\\"""""""""""" x', ['""""', "x"]), - ("\\\\ x", ["\\\\", "x"]), - ('\\\\" x', ["\\ x"]), - ('\\\\"" x', ["\\", "x"]), - ('\\\\""" x', ['\\"', "x"]), - ('\\\\"""" x', ['\\" x']), - ('\\\\""""" x', ['\\"', "x"]), - ('\\\\"""""" x', ['\\""', "x"]), - ('\\\\""""""" x', ['\\"" x']), - ('\\\\"""""""" x', ['\\""', "x"]), - ('\\\\""""""""" x', ['\\"""', "x"]), - ('\\\\"""""""""" x', ['\\""" x']), - ('\\\\""""""""""" x', ['\\"""', "x"]), - ('\\\\"""""""""""" x', ['\\""""', "x"]), - ("\\\\\\ x", ["\\\\\\", "x"]), - ('\\\\\\" x', ['\\"', "x"]), - ('\\\\\\"" x', ['\\" x']), - ('\\\\\\""" x', ['\\"', "x"]), - ('\\\\\\"""" x', ['\\""', "x"]), - ('\\\\\\""""" x', ['\\"" x']), - ('\\\\\\"""""" x', ['\\""', "x"]), - ('\\\\\\""""""" x', ['\\"""', "x"]), - ('\\\\\\"""""""" x', ['\\""" x']), - ('\\\\\\""""""""" x', ['\\"""', "x"]), - ('\\\\\\"""""""""" x', ['\\""""', "x"]), - ('\\\\\\""""""""""" x', ['\\"""" x']), - ('\\\\\\"""""""""""" x', ['\\""""', "x"]), - ("\\\\\\\\ x", ["\\\\\\\\", "x"]), - ('\\\\\\\\" x', ["\\\\ x"]), - ('\\\\\\\\"" x', ["\\\\", "x"]), - ('\\\\\\\\""" x', ['\\\\"', "x"]), - ('\\\\\\\\"""" x', ['\\\\" x']), - ('\\\\\\\\""""" x', ['\\\\"', "x"]), - ('\\\\\\\\"""""" x', ['\\\\""', "x"]), - ('\\\\\\\\""""""" x', ['\\\\"" x']), - ('\\\\\\\\"""""""" x', ['\\\\""', "x"]), - ('\\\\\\\\""""""""" x', ['\\\\"""', "x"]), - ('\\\\\\\\"""""""""" x', ['\\\\""" x']), - ('\\\\\\\\""""""""""" x', ['\\\\"""', "x"]), - ('\\\\\\\\"""""""""""" x', ['\\\\""""', "x"]), -] +def cmd_split(s: str) -> List[str]: + assert sys.platform == "win32" + script = os.path.join(os.path.dirname(__file__), "cmdline.py") + cmdline = script + " " + s + proc = subprocess.run(cmdline, shell=True, stdout=subprocess.PIPE, check=True) + args = json.loads(proc.stdout)["CommandLineToArgvW"] + return args[2:] # first two args are "python.exe cmdline.py" -cmd_examples = [ - (r'"foo &whoami bar"', ["foo &whoami bar"]), - (r"^^", ["^"]), - (r'"^"', ["^"]), - (r'"^^"', ["^^"]), - (r"foo^bar", ["foobar"]), - (r"foo^^bar", ["foo^bar"]), - (r'"foo^bar"', ["foo^bar"]), - (r'"foo^^bar"', ["foo^^bar"]), -] +class Example: + def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str]] = None): + self.input = input + self.output = output + self.cmd_output = cmd_output if cmd_output is not None else output +examples = [ + Example(r"", []), + Example(r'"', [""]), + Example(r'""', [""]), + Example(r'"""', ['"']), + Example(r'""""', ['"']), + Example(r'"""""', ['"']), + Example(r'""""""', ['""']), + Example(r'"""""""', ['""']), + Example(r'""""""""', ['""']), + Example(r'"""""""""', ['"""']), + Example(r'""""""""""', ['"""']), + Example(r' "', [""]), + Example(r' ""', [""]), + Example(r' """', ['"']), + Example(r' """"', ['"']), + Example(r' """""', ['"']), + Example(r' """"""', ['""']), + Example(r' """""""', ['""']), + Example(r' """"""""', ['""']), + Example(r' """"""""""', ['"""']), + Example(r" ", []), + Example(r'" ', [" "]), + Example(r'"" ', [""]), + Example(r'""" ', ['"']), + Example(r'"""" ', ['" ']), + Example(r'""""" ', ['"']), + Example(r'"""""" ', ['""']), + Example(r'""""""" ', ['"" ']), + Example(r'"""""""" ', ['""']), + Example(r'"""""""""" ', ['""" ']), + Example(r"x", ["x"]), + Example(r'x"', ["x"]), + Example(r"foo", ["foo"]), + Example(r'foo "bar baz"', ["foo", "bar baz"]), + Example(r'"abc" d e', ["abc", "d", "e"]), + Example(r'a\\\b d"e f"g h', [r"a\\\b", "de fg", "h"]), + Example(r"a\\\"b c d", [r"a\"b", "c", "d"]), + Example(r'a\\\\"b c" d e', [r"a\\b c", "d", "e"]), + Example('"" "" ""', ["", "", ""]), + Example('" x', [" x"]), + Example('"" x', ["", "x"]), + Example('""" x', ['"', "x"]), + Example('"""" x', ['" x']), + Example('""""" x', ['"', "x"]), + Example('"""""" x', ['""', "x"]), + Example('""""""" x', ['"" x']), + Example('"""""""" x', ['""', "x"]), + Example('""""""""" x', ['"""', "x"]), + Example('"""""""""" x', ['""" x']), + Example('""""""""""" x', ['"""', "x"]), + Example('"""""""""""" x', ['""""', "x"]), + Example('""""""""""""" x', ['"""" x']), + Example('"aaa x', ["aaa x"]), + Example('"aaa" x', ["aaa", "x"]), + Example('"aaa"" x', ['aaa"', "x"]), + Example('"aaa""" x', ['aaa" x']), + Example('"aaa"""" x', ['aaa"', "x"]), + Example('"aaa""""" x', ['aaa""', "x"]), + Example('"aaa"""""" x', ['aaa"" x']), + Example('"aaa""""""" x', ['aaa""', "x"]), + Example('"aaa"""""""" x', ['aaa"""', "x"]), + Example('"aaa""""""""" x', ['aaa""" x']), + Example('"aaa"""""""""" x', ['aaa"""', "x"]), + Example('"aaa""""""""""" x', ['aaa""""', "x"]), + Example('"aaa"""""""""""" x', ['aaa"""" x']), + Example('"aaa\\ x', ["aaa\\ x"]), + Example('"aaa\\" x', ['aaa" x']), + Example('"aaa\\"" x', ['aaa"', "x"]), + Example('"aaa\\""" x', ['aaa""', "x"]), + Example('"aaa\\"""" x', ['aaa"" x']), + Example('"aaa\\""""" x', ['aaa""', "x"]), + Example('"aaa\\"""""" x', ['aaa"""', "x"]), + Example('"aaa\\""""""" x', ['aaa""" x']), + Example('"aaa\\"""""""" x', ['aaa"""', "x"]), + Example('"aaa\\""""""""" x', ['aaa""""', "x"]), + Example('"aaa\\"""""""""" x', ['aaa"""" x']), + Example('"aaa\\""""""""""" x', ['aaa""""', "x"]), + Example('"aaa\\"""""""""""" x', ['aaa"""""', "x"]), + Example('"aaa\\\\ x', ["aaa\\\\ x"]), + Example('"aaa\\\\" x', ["aaa\\", "x"]), + Example('"aaa\\\\"" x', ['aaa\\"', "x"]), + Example('"aaa\\\\""" x', ['aaa\\" x']), + Example('"aaa\\\\"""" x', ['aaa\\"', "x"]), + Example('"aaa\\\\""""" x', ['aaa\\""', "x"]), + Example('"aaa\\\\"""""" x', ['aaa\\"" x']), + Example('"aaa\\\\""""""" x', ['aaa\\""', "x"]), + Example('"aaa\\\\"""""""" x', ['aaa\\"""', "x"]), + Example('"aaa\\\\""""""""" x', ['aaa\\""" x']), + Example('"aaa\\\\"""""""""" x', ['aaa\\"""', "x"]), + Example('"aaa\\\\""""""""""" x', ['aaa\\""""', "x"]), + Example('"aaa\\\\"""""""""""" x', ['aaa\\"""" x']), + Example('"aaa\\\\\\ x', ["aaa\\\\\\ x"]), + Example('"aaa\\\\\\" x', ['aaa\\" x']), + Example('"aaa\\\\\\"" x', ['aaa\\"', "x"]), + Example('"aaa\\\\\\""" x', ['aaa\\""', "x"]), + Example('"aaa\\\\\\"""" x', ['aaa\\"" x']), + Example('"aaa\\\\\\""""" x', ['aaa\\""', "x"]), + Example('"aaa\\\\\\"""""" x', ['aaa\\"""', "x"]), + Example('"aaa\\\\\\""""""" x', ['aaa\\""" x']), + Example('"aaa\\\\\\"""""""" x', ['aaa\\"""', "x"]), + Example('"aaa\\\\\\""""""""" x', ['aaa\\""""', "x"]), + Example('"aaa\\\\\\"""""""""" x', ['aaa\\"""" x']), + Example('"aaa\\\\\\""""""""""" x', ['aaa\\""""', "x"]), + Example('"aaa\\\\\\"""""""""""" x', ['aaa\\"""""', "x"]), + Example('"aaa\\\\\\\\ x', ["aaa\\\\\\\\ x"]), + Example('"aaa\\\\\\\\" x', ["aaa\\\\", "x"]), + Example('"aaa\\\\\\\\"" x', ['aaa\\\\"', "x"]), + Example('"aaa\\\\\\\\""" x', ['aaa\\\\" x']), + Example('"aaa\\\\\\\\"""" x', ['aaa\\\\"', "x"]), + Example('"aaa\\\\\\\\""""" x', ['aaa\\\\""', "x"]), + Example('"aaa\\\\\\\\"""""" x', ['aaa\\\\"" x']), + Example('"aaa\\\\\\\\""""""" x', ['aaa\\\\""', "x"]), + Example('"aaa\\\\\\\\"""""""" x', ['aaa\\\\"""', "x"]), + Example('"aaa\\\\\\\\""""""""" x', ['aaa\\\\""" x']), + Example('"aaa\\\\\\\\"""""""""" x', ['aaa\\\\"""', "x"]), + Example('"aaa\\\\\\\\""""""""""" x', ['aaa\\\\""""', "x"]), + Example('"aaa\\\\\\\\"""""""""""" x', ['aaa\\\\"""" x']), + Example(" x", ["x"]), + Example('" x', [" x"]), + Example('"" x', ["", "x"]), + Example('""" x', ['"', "x"]), + Example('"""" x', ['" x']), + Example('""""" x', ['"', "x"]), + Example('"""""" x', ['""', "x"]), + Example('""""""" x', ['"" x']), + Example('"""""""" x', ['""', "x"]), + Example('""""""""" x', ['"""', "x"]), + Example('"""""""""" x', ['""" x']), + Example('""""""""""" x', ['"""', "x"]), + Example('"""""""""""" x', ['""""', "x"]), + Example("\\ x", ["\\", "x"]), + Example('\\" x', ['"', "x"]), + Example('\\"" x', ['" x']), + Example('\\""" x', ['"', "x"]), + Example('\\"""" x', ['""', "x"]), + Example('\\""""" x', ['"" x']), + Example('\\"""""" x', ['""', "x"]), + Example('\\""""""" x', ['"""', "x"]), + Example('\\"""""""" x', ['""" x']), + Example('\\""""""""" x', ['"""', "x"]), + Example('\\"""""""""" x', ['""""', "x"]), + Example('\\""""""""""" x', ['"""" x']), + Example('\\"""""""""""" x', ['""""', "x"]), + Example("\\\\ x", ["\\\\", "x"]), + Example('\\\\" x', ["\\ x"]), + Example('\\\\"" x', ["\\", "x"]), + Example('\\\\""" x', ['\\"', "x"]), + Example('\\\\"""" x', ['\\" x']), + Example('\\\\""""" x', ['\\"', "x"]), + Example('\\\\"""""" x', ['\\""', "x"]), + Example('\\\\""""""" x', ['\\"" x']), + Example('\\\\"""""""" x', ['\\""', "x"]), + Example('\\\\""""""""" x', ['\\"""', "x"]), + Example('\\\\"""""""""" x', ['\\""" x']), + Example('\\\\""""""""""" x', ['\\"""', "x"]), + Example('\\\\"""""""""""" x', ['\\""""', "x"]), + Example("\\\\\\ x", ["\\\\\\", "x"]), + Example('\\\\\\" x', ['\\"', "x"]), + Example('\\\\\\"" x', ['\\" x']), + Example('\\\\\\""" x', ['\\"', "x"]), + Example('\\\\\\"""" x', ['\\""', "x"]), + Example('\\\\\\""""" x', ['\\"" x']), + Example('\\\\\\"""""" x', ['\\""', "x"]), + Example('\\\\\\""""""" x', ['\\"""', "x"]), + Example('\\\\\\"""""""" x', ['\\""" x']), + Example('\\\\\\""""""""" x', ['\\"""', "x"]), + Example('\\\\\\"""""""""" x', ['\\""""', "x"]), + Example('\\\\\\""""""""""" x', ['\\"""" x']), + Example('\\\\\\"""""""""""" x', ['\\""""', "x"]), + Example("\\\\\\\\ x", ["\\\\\\\\", "x"]), + Example('\\\\\\\\" x', ["\\\\ x"]), + Example('\\\\\\\\"" x', ["\\\\", "x"]), + Example('\\\\\\\\""" x', ['\\\\"', "x"]), + Example('\\\\\\\\"""" x', ['\\\\" x']), + Example('\\\\\\\\""""" x', ['\\\\"', "x"]), + Example('\\\\\\\\"""""" x', ['\\\\""', "x"]), + Example('\\\\\\\\""""""" x', ['\\\\"" x']), + Example('\\\\\\\\"""""""" x', ['\\\\""', "x"]), + Example('\\\\\\\\""""""""" x', ['\\\\"""', "x"]), + Example('\\\\\\\\"""""""""" x', ['\\\\""" x']), + Example('\\\\\\\\""""""""""" x', ['\\\\"""', "x"]), + Example('\\\\\\\\"""""""""""" x', ['\\\\""""', "x"]), + Example('"x"', ["x"]), + Example('"^x"', ["^x"]), + Example('"^^x"', ["^^x"]), + Example('"x', ["x"]), + Example('"^x', ["^x"]), + Example('"^^x', ["^^x"]), + Example('"', [""]), + Example('"^', ["^"]), + Example('"^^', ["^^"]), + Example('"^ ', ["^ "]), + Example(":dir", [":dir"]), + Example(";;;a,, b, c===", [";;;a,,", "b,", "c==="]), + Example("^;;a", ["^;;a"], [";;a"]), + Example('a "<>||&&', ["a", "<>||&&"]), + Example('a "<>||&&^', ["a", "<>||&&^"]), + Example('a "<>||&&^^', ["a", "<>||&&^^"]), + Example('"foo &whoami bar"', ["foo &whoami bar"]), + Example("^^", ["^^"], ["^"]), + Example('"^"', ["^"]), + Example('"^^"', ["^^"]), + Example("foo^bar", ["foo^bar"], ["foobar"]), + Example("foo^^bar", ["foo^^bar"], ["foo^bar"]), + Example('"foo^bar"', ["foo^bar"]), + Example('"foo^^bar"', ["foo^^bar"]), + Example('"x"', ["x"]), + Example('"^x"', ["^x"]), + Example('"^^x"', ["^^x"]), + Example('"x', ["x"]), + Example('"^x', ["^x"]), + Example('"^^x', ["^^x"]), + Example('"', [""]), + Example('"^', ["^"]), + Example('"^^', ["^^"]), + Example('"^ ', ["^ "]), + Example(":dir", [":dir"]), + Example(";;;a,, b, c===", [";;;a,,", "b,", "c==="]), + Example('a "<>||&&', ["a", "<>||&&"]), + Example('a "<>||&&^', ["a", "<>||&&^"]), + Example('a "<>||&&^^', ["a", "<>||&&^^"]), + Example("foo", ["foo"]), + Example("foo^", ["foo^"], ["foo"]), + Example("foo^^", ["foo^^"], ["foo^"]), + Example("foo^^^", ["foo^^^"], ["foo^"]), + Example("foo^^^^", ["foo^^^^"], ["foo^^"]), + Example("foo^ bar", ["foo^", "bar"], ["foo", "bar"]), + Example("foo^^ bar", ["foo^^", "bar"], ["foo^", "bar"]), + Example("foo^^^ bar", ["foo^^^", "bar"], ["foo^", "bar"]), + Example("foo^^^^ bar", ["foo^^^^", "bar"], ["foo^^", "bar"]), + Example('"foo^" bar', ["foo^", "bar"], ["foo^", "bar"]), + Example('"foo^^" bar', ["foo^^", "bar"], ["foo^^", "bar"]), + Example('"foo^^^" bar', ["foo^^^", "bar"], ["foo^^^", "bar"]), + Example('"foo^^^^" bar', ["foo^^^^", "bar"], ["foo^^^^", "bar"]), +] + pretty_examples = [ (r"c:\Program Files\FooBar", r'"c:\Program Files\FooBar"'), (r"c:\Program Files (x86)\FooBar", r'"c:\Program Files (x86)\FooBar"'), @@ -221,12 +308,14 @@ def ctypes_split(s): class TestMslex(unittest.TestCase): """Tests for `mslex` package.""" - def case(self, s, ans, cmd): + def case(self, s: str, ans: str, cmd: bool) -> None: + if sys.platform == "win32": + win_split = cmd_split if cmd else ctypes_split try: if ans is not None: self.assertEqual(split(s, like_cmd=cmd), ans) - if sys.platform == "win32" and not cmd: - self.assertEqual(split(s), ctypes_split(s)) + if sys.platform == "win32": + self.assertEqual(split(s, like_cmd=cmd), win_split(s)) except AssertionError: print("in: «{}»".format(s)) print() @@ -238,7 +327,7 @@ def case(self, s, ans, cmd): print("ans: «{}»".format(x)) print() if sys.platform == "win32": - for x in ctypes_split(s): + for x in win_split(s): print("win: «{}»".format(x)) print() raise @@ -273,30 +362,36 @@ def test_multi_quotes(self): self.case(s, None, False) def test_examples(self): - for s, ans in examples: - self.case(s, ans, cmd=False) + for e in examples: + self.case(e.input, e.output, cmd=False) def test_examples_for_cmd(self): - for s, ans in cmd_examples: - self.case(s, ans, cmd=True) + for e in examples: + self.case(e.input, e.cmd_output, cmd=True) def test_quote_examples(self): qu = functools.partial(quote, for_cmd=False) sp = functools.partial(split, like_cmd=False) - for s, ans in itertools.chain(examples, cmd_examples): - self.assertEqual(ans, sp(" ".join(map(qu, ans)))) + for e in examples: + self.assertEqual(e.output, sp(" ".join(map(qu, e.output)))) + if e.output == e.cmd_output: + continue + self.assertEqual(e.cmd_output, sp(" ".join(map(qu, e.cmd_output)))) def test_quote_examples_cmd(self): - for s, ans in itertools.chain(examples, cmd_examples): - self.assertEqual(ans, split(" ".join(map(quote, ans)))) + for e in examples: + self.assertEqual(e.output, split(" ".join(map(quote, e.output)))) + if e.output == e.cmd_output: + continue + self.assertEqual(e.cmd_output, split(" ".join(map(quote, e.cmd_output)))) def test_requote_examples_cmd(self): - for s, ans in examples: - self.assertEqual([s], split(quote(s))) + for e in examples: + self.assertEqual([e.input], split(quote(e.input))) def test_requote_examples(self): - for s, ans in examples: - self.assertEqual([s], split(quote(s, for_cmd=False), like_cmd=False)) + for e in examples: + self.assertEqual([e.input], split(quote(e.input, for_cmd=False), like_cmd=False)) def test_quote_every_string(self): def every_string(): From 7735738bc346ed8acd9172da2fa37a50d9eb13a9 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Tue, 15 Oct 2024 08:13:10 -0400 Subject: [PATCH 2/6] added parser for modern argument parsing rules * Added a split function that supports the modern (post VS 2005) argument parsing rules. * Fixed a bug where mslex failed to raise "Unquoted CMD metacharacters". * Added tests. * Improved quoted strings to be somewhat easier to read. --- .gitattributes | 1 + README.rst | 39 +++++- mslex/__init__.py | 295 ++++++++++++++++++++++++++++++++----------- requirements_dev.txt | 2 + tests/all_strings.py | 93 ++++++++++++++ tests/examples.csv | 3 + tests/test_mslex.py | 250 +++++++++++++++++++++++++++++++----- 7 files changed, 580 insertions(+), 103 deletions(-) create mode 100644 .gitattributes create mode 100644 tests/all_strings.py create mode 100644 tests/examples.csv diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..af51a1f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/examples.csv filter=lfs diff=lfs merge=lfs -text diff --git a/README.rst b/README.rst index 0d3eaa2..28bafc2 100644 --- a/README.rst +++ b/README.rst @@ -29,9 +29,34 @@ functions -- split, quote, and join -- just like shlex. Windows Quoting --------------- -These are excellent articles to read if you really want to face the -sanity-melting reality buried under the surface of how windows passes command -line arguments to your programs. I recommend you read something else. +Since time immemorial, windows quoting behavior has been strange. Prior to +(I think) Visual Studio 2005, it exhibited the extremely strange modulo 3 +periodic behavior which is emulated here in ``split_msvcrt()``. Programs +compiled with the C runtime from Visual Studio 2005 and later exhibit the +somewhat less strange behavior emulated in ``split_ucrt()``. + +Microsoft still ships a dll called ``msvcrt.dll`` as part of Windows, +for compatibility reasons. And even though they have been very clear in +their documentation that nobody should ever link against this dll, people +still do, either for compatibility reasons of their own, or because it +is universally available on any version of windows you might care about +without needing to run an installer. And ``msvcrt.dll`` preserves the +extremely strange argument parsing behavior from prior to VS 2005. + +You can can download the latest version of `msys2`_ today and build an +executable linking ``msvcrt.dll`` on Windows 11, and it will parse +arguments like Windows 95. + +``mslex`` will produce quoted strings that will be parsed correctly by +either modern C runtimes or by ``msvcrt.dll``. When parsing, ``mslex`` +parses it both ways and raises an error if they disagree. This can +be overridden by passing ``ucrt=True`` or ``ucrt=False`` to ``split``. + +See also: + +* `Parsing C Command Line Arguments`_ + +* `Windows is not a Microsoft Visual C/C++ Run-Time delivery channel`_ * `How a Windows Program Splits Its Command Line Into Individual Arguments`_ @@ -43,6 +68,12 @@ line arguments to your programs. I recommend you read something else. .. _`Everyone quotes command line arguments the wrong way`: https://blogs.msdn.microsoft.com/twistylittlepassagesallalike/2011/04/23/everyone-quotes-command-line-arguments-the-wrong-way/ +.. _`Windows is not a Microsoft Visual C/C++ Run-Time delivery channel`: https://devblogs.microsoft.com/oldnewthing/20140411-00/?p=1273 + +.. _`msys2`: https://www.msys2.org/docs/environments/ + +.. _`Parsing C Command Line Arguments`: https://learn.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments?view=msvc-170 + Automatic selection between mslex and shlex ------------------------------------------- @@ -50,3 +81,5 @@ Automatic selection between mslex and shlex If you want to automatically use mslex on Windows, and shlex otherwise, check out the `oslex`_ package. .. _`oslex`: https://pypi.org/project/oslex/ +.. _`msvcrt`: https://devblogs.microsoft.com/oldnewthing/20140411-00/?p=1273 +.. _`UCRT`: https://learn.microsoft.com/en-us/cpp/porting/upgrade-your-code-to-the-universal-crt?view=msvc-170 diff --git a/mslex/__init__.py b/mslex/__init__.py index b054e31..a370f2f 100644 --- a/mslex/__init__.py +++ b/mslex/__init__.py @@ -13,16 +13,24 @@ import re import itertools -from typing import Iterator, List, Match, TextIO # noqa: F401 +from typing import Iterator, List, Match, TextIO, Optional # noqa: F401 from .exceptions import MSLexError -__all__ = ("split", "quote", "join", "MSLexError") +__all__ = ( + "split", + "split_ucrt", + "split_msvcrt", + "strip_carets_like_cmd", + "quote", + "join", + "MSLexError", +) __version__ = "1.2.0" -def iter_arg(peek: Match[str], i: Iterator[Match[str]]) -> Iterator[str]: +def _iter_arg_msvcrt(peek: Match[str], i: Iterator[Match[str]]) -> Iterator[str]: quote_mode = False for m in itertools.chain([peek], i): space, slashes, quotes, text = m.groups() @@ -43,66 +51,229 @@ def iter_arg(peek: Match[str], i: Iterator[Match[str]]) -> Iterator[str]: yield text -def iter_args(s: str) -> Iterator[str]: +def split_msvcrt(s: str) -> List[str]: + """ + Split a string of command line options like `msvcrt.dll`_ does. + + :param s: a string to parse + :return: a list of parsed words + + This parses arguments the same way `CommandLineToArgvW`_ does, except + it does not treat ``argv[0]`` specially. + + Specifically, it is the same as ``CommandLineToArgvW("foo.exe " + s)[1:]`` + + If the first word of ``s`` is a valid command name, then it cannot contain + any quotes, so this is the same as ``CommandLineToArgvW(s)`` + + .. _`CommandLineToArgvW`: https://learn.microsoft.com/en-us/windows/win32/api/shellapi\ + /nf-shellapi-commandlinetoargvw + .. _`msvcrt.dll`: https://devblogs.microsoft.com/oldnewthing/20140411-00/?p=1273 + """ + i = re.finditer(r"(\s+)|(\\*)(\"+)|(.[^\s\\\"]*)", s.lstrip()) + return ["".join(_iter_arg_msvcrt(m, i)) for m in i] + + +def _iter_arg_ucrt(peek: Match[str], i: Iterator[Match[str]]) -> Iterator[str]: + quote_mode = False + for m in itertools.chain([peek], i): + space, slashes, quotes, text = m.groups() + if space: + if quote_mode: + yield space + else: + return + elif quotes: + if slashes: + yield slashes[: len(slashes) // 2] + if len(slashes) % 2: + yield '"' + quotes = quotes[1:] + while quotes: + if quote_mode and len(quotes) >= 2: + yield '"' + quotes = quotes[2:] + else: + quote_mode = not quote_mode + quotes = quotes[1:] + else: + yield text + + +def split_ucrt(s: str) -> List[str]: + """ + Split a string of command line options like `UCRT`_ does. + + :param s: a string to parse + :return: a list of parsed words + + This should compute the same function that is used by a modern windows + C runtime library to convert arguments in ``GetCommandLineW`` to + individual arguments found in ``argv``, except it does not treat + ``argv[0]`` specially. + + see: `Parsing C Command Line Arguments`_ + + .. _`UCRT`: https://learn.microsoft.com/en-us/cpp/porting/\ + upgrade-your-code-to-the-universal-crt + .. _`Parsing C Command Line Arguments`: https://learn.microsoft.com/en-us/cpp/c-language\ + /parsing-c-command-line-arguments + """ i = re.finditer(r"(\s+)|(\\*)(\"+)|(.[^\s\\\"]*)", s.lstrip()) - for m in i: - yield "".join(iter_arg(m, i)) + return ["".join(_iter_arg_ucrt(m, i)) for m in i] cmd_meta = r"([\"\^\&\|\<\>\(\)\%\!])" cmd_meta_or_space = r"[\s\"\^\&\|\<\>\(\)\%\!]" - cmd_meta_inside_quotes = r"([\"\%\!])" -def split(s: str, like_cmd: bool = True, check: bool = True) -> List[str]: +def strip_carets_like_cmd(s: str, check: bool = True) -> str: + """ + Interpret caret escaping like ``cmd.exe`` does. + + :param s: a command line string + :param check: raise an error on unquoted metacharacters + :returns: the string with any carets interpreted as an escape character + """ + + def i() -> Iterator[str]: + quote_mode = False + for m in re.finditer(r"(\^.?)|(\")|([^\^\"]+)", s): + escaped, quote, text = m.groups() + if escaped: + if quote_mode: + yield escaped + if len(escaped) > 1: + if escaped[1] == '"': + quote_mode = False + elif check and escaped[1] in "!%": + raise MSLexError("Unquoted CMD metacharacters in string: " + repr(s)) + else: + yield escaped[1:] + elif quote: + yield '"' + quote_mode = not quote_mode + else: + yield text + if check: + meta = cmd_meta_inside_quotes if quote_mode else cmd_meta + if re.search(meta, text): + raise MSLexError("Unquoted CMD metacharacters in string: " + repr(s)) + + return "".join(i()) + + +def split( + s: str, like_cmd: bool = True, check: bool = True, ucrt: Optional[bool] = None +) -> List[str]: """ Split a string of command line arguments like DOS and Windows do. :param s: a string to parse :param like_cmd: parse it like ``cmd.exe`` + :param ucrt: parse like UCRT :param check: raise an error on unquoted metacharacters :return: a list of parsed words If ``like_cmd`` is true, then this will emulate both ``cmd.exe`` and ``CommandLineToArgvW``. Since ``cmd.exe`` is a shell, and can run - external programs, this function obviously cannot emulate - everything it does. However if the string passed in would - be parsed by cmd as a quoted literal, without command - invocations like ``&whoami``, and without string substitutions like - ``%PATH%``, then this function will split it accurately. + external programs, this function obviously cannot emulate everything it + does. However if the string passed in would be parsed by cmd as a + quoted literal, without command invocations like ``&whoami``, and + without string substitutions like ``%PATH%``, then this function will + split it accurately. f ``like_cmd`` is false, then this will split the string like ``CommandLineToArgvW`` does. - If ``check`` is true, this will raise a ``ValueError`` if cmd metacharacters - occur in the string without being quoted. + If ``check`` is true, this will raise a ``ValueError`` if cmd + metacharacters occur in the string without being quoted. + + If ``ucrt`` is true, this will parse like a modern C runtime. If it + is false, then it will parse like ``msvcrt.dll``. If it is None, then + it will raise an exception if the two methods disagree. + + .. note:: This does not treat ``argv[0]`` specially as described in Microsoft's + `documentation`_, because this function does not have any way of knowing + if the first word of ``s`` is meant to be used as the program name. If + it is, then it should be a valid path name, so it can not contain + quotes, so both methods of interpretation will give the same answer. + + .. _`documentation`: https://learn.microsoft.com/en-us/cpp/c-language/\ + parsing-c-command-line-arguments """ if like_cmd and re.search(cmd_meta, s): + s = strip_carets_like_cmd(s, check=check) + + if ucrt is None: + v = split_ucrt(s) + if v != split_msvcrt(s): + raise MSLexError( + "String is ambiguous, legacy and modern runtimes disagree: " + repr(s) + ) + return v + elif ucrt: + return split_ucrt(s) + else: + return split_msvcrt(s) - def i() -> Iterator[str]: - quote_mode = False - for m in re.finditer(r"(\^.?)|(\")|([^\^\"]+)", s): - escaped, quote, text = m.groups() - if escaped: - if quote_mode: - yield escaped - if len(escaped) > 1 and escaped[1] == '"': - quote_mode = False - else: - yield escaped[1:] - elif quote: - yield '"' - quote_mode = not quote_mode - else: - yield text - if check: - meta = cmd_meta_inside_quotes if quote_mode else cmd_meta - if re.search(meta, text): - raise MSLexError("Unquoted CMD metacharacters in string: " + repr(s)) - s = "".join(i()) - return list(iter_args(s)) +def _escape_quotes(s: str) -> str: + """ + Escape any quotes found in string by prefixing them with an appropriate + number of backslashes. + """ + + i = re.finditer(r"(\\*)(\"+)|(\\+|[^\\\"]+)", s) + + def parts() -> Iterator[str]: + for m in i: + pos, end = m.span() + slashes, quotes, text = m.groups() + if quotes: + yield slashes + yield slashes + yield r"\"" * len(quotes) + else: + yield text + + return "".join(parts()) + + +def _wrap_in_quotes(s: str) -> str: + """ + Wrap a string whose internal quotes have been escaped in double quotes. + This handles adding the correct number of backslashes in front of the + closing quote. + """ + return '"' + re.sub(r"(\\+)$", r"\1\1", s) + '"' + + +def _quote_for_cmd(s: str) -> str: + """ + Quote a string for cmd. Split the string into sections that can be + quoted (or used verbatim), and runs of % and ! characters which must be + escaped with carets outside of quotes, and runs of quote characters, + which must be escaped with a caret for cmd.exe, and a backslash for + CommandLineToArgvW. + """ + + def f(m) -> str: + quotable, subst = m.groups() + if quotable: + # A trailing backslash could combine a backslash escaping a + # quote, so it must be quoted + if re.search(cmd_meta_or_space, quotable) or quotable.endswith("\\"): + return _wrap_in_quotes(quotable) + else: + return quotable + elif subst: + return "^" + subst + else: + return '\\^"' + + return re.sub(r'([^\%\!\"]+)|([\%\!])|"', f, s) def quote(s: str, for_cmd: bool = True) -> str: @@ -120,40 +291,22 @@ def quote(s: str, for_cmd: bool = True) -> str: """ if not s: return '""' - if not re.search(cmd_meta_or_space, s): - return s - if for_cmd and re.search(cmd_meta, s): - if not re.search(cmd_meta_inside_quotes, s): - m = re.search(r"\\+$", s) - if m: - return '"' + s + m.group() + '"' - else: - return '"' + s + '"' - if not re.search(r"[\s\"]", s): - return re.sub(cmd_meta, r"^\1", s) - return re.sub(cmd_meta, r"^\1", quote(s, for_cmd=False)) - i = re.finditer(r"(\\*)(\"+)|(\\+)|([^\\\"]+)", s) - def parts() -> Iterator[str]: - yield '"' - for m in i: - pos, end = m.span() - slashes, quotes, onlyslashes, text = m.groups() - if quotes: - yield slashes - yield slashes - yield r"\"" * len(quotes) - elif onlyslashes: - if end == len(s): - yield onlyslashes - yield onlyslashes - else: - yield onlyslashes - else: - yield text - yield '"' - - return "".join(parts()) + if for_cmd: + if not re.search(cmd_meta_or_space, s): + return s + quoted = _quote_for_cmd(s) + if not re.search(r"[\s\"]", s): + # for example the string «x\!» can be quoted as «x\^!», but + # _quote_for_cmd would quote it as «"x\\"^!» + alt = re.sub(cmd_meta, r"^\1", s) + if len(alt) < len(quoted): + return alt + return quoted + else: + if not re.search(r"\s", s): + return _escape_quotes(s) + return _wrap_in_quotes(_escape_quotes(s)) def join(split_command: List[str], for_cmd: bool = True) -> str: @@ -188,5 +341,5 @@ def split_cli() -> None: else: input = sys.stdin - for s in iter_args(input.read()): + for s in split(input.read(), like_cmd=False): print(s) diff --git a/requirements_dev.txt b/requirements_dev.txt index 03d0397..f488d5d 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -11,3 +11,5 @@ black pytest types-setuptools build +trio +tqdm diff --git a/tests/all_strings.py b/tests/all_strings.py new file mode 100644 index 0000000..b9da524 --- /dev/null +++ b/tests/all_strings.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import shutil +import os +import re +import json +import itertools +from dataclasses import dataclass +from pathlib import Path +from typing import * + +import mslex +import trio +from tqdm import tqdm # type: ignore + +from test_mslex import CSVExample as Example # type: ignore + +chars = [" ", "x", '"', "\\", "^"] + + +def every_string() -> Iterable[str]: + + for qm in (True, False): + for m in range(16): + for n in range(16): + if qm: + s = '"aaa' + else: + s = "" + yield s + "\\" * m + '"' * n + " x" + + for n in range(9): + prod = itertools.product(*itertools.repeat(chars, n)) + for x in prod: + yield "".join(x) + + +testdir = Path(__file__).parent + +N = 32 +limit = trio.Semaphore(N) + + +async def get_one(s: str) -> Example: + cmd = mslex.quote(str(testdir / "cmdline.py")) + " " + s + proc = await trio.run_process(cmd, check=True, capture_stdout=True, shell=True) + j = json.loads(proc.stdout) + cmdline, n = re.subn(r'^.*cmdline.py"\s*', "", j["GetCommandLineW"]) + assert n + return Example(s, cmdline, j["CommandLineToArgvW"][2:], j["sys.argv"][1:]) + + +async def main() -> None: + examples: Dict[str, Example] = dict() + + csv_file = testdir / "examples.csv" + if os.path.exists(csv_file): + with open(csv_file, "r") as f: + for line in f: + e = Example.loads(line) + examples[e.s] = e + + async def job(s) -> None: + try: + examples[s] = await get_one(s) + finally: + limit.release() + + def sync(): + tmp = csv_file.with_suffix(".tmp") + with open(tmp, "w") as f: + for e in sorted(examples.values(), key=lambda e: e.s): + print(e.dumps(), file=f) + shutil.copyfile(tmp, csv_file) + + total = sum(1 for s in every_string()) + + try: + async with trio.open_nursery() as spawning_pool: + for s in tqdm(every_string(), total=total): + await trio.sleep(0) + if examples and len(examples) % 10000 == 0: + sync() + if s in examples: + continue + await limit.acquire() + spawning_pool.start_soon(job, s) + finally: + sync() + + +if __name__ == "__main__": + trio.run(main) diff --git a/tests/examples.csv b/tests/examples.csv new file mode 100644 index 0000000..9f24ef4 --- /dev/null +++ b/tests/examples.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3240a27c7a94cd15ab450c0cf40c9bfe0affb8cccf6aacae012214756df99a94 +size 15504061 diff --git a/tests/test_mslex.py b/tests/test_mslex.py index d4768d4..3a4e71c 100644 --- a/tests/test_mslex.py +++ b/tests/test_mslex.py @@ -11,9 +11,11 @@ import unittest import subprocess from typing import List, cast, Any, Optional +from pathlib import Path -from mslex import split, quote +from mslex import split, quote, split_msvcrt, split_ucrt, strip_carets_like_cmd, MSLexError +testdir = Path(__file__).parent if sys.platform == "win32": import ctypes @@ -34,6 +36,15 @@ def ctypes_split(s): LocalFree(argv) return result + def ctypes_split_exe(s): + if s == "": + return [] + argc = ctypes.c_int() + argv = CommandLineToArgvW(s, ctypes.byref(argc)) + result = [argv[i] for i in range(argc.value)] + LocalFree(argv) + return result + def cmd_split(s: str) -> List[str]: assert sys.platform == "win32" @@ -44,6 +55,38 @@ def cmd_split(s: str) -> List[str]: return args[2:] # first two args are "python.exe cmdline.py" +class CSVExample: + + def __init__(self, s: str, cmdline: str, split_msvcrt: List[str], split_ucrt: List[str]): + self.s = s + self.cmdline = cmdline + self.split_msvcrt = split_msvcrt + self.split_ucrt = split_ucrt + + @staticmethod + def split_argv(s: str) -> List[str]: + if not s: + return [] + return s.removesuffix(";").split(";") + + @staticmethod + def join_argv(v: List[str]) -> str: + if not v: + return "" + return ";".join(v) + ";" + + @classmethod + def loads(cls, line: str): + (s, cmdline, *splits, _) = line.split(",") + (split_msvcrt, split_ucrt) = map(cls.split_argv, splits) + return cls(s, cmdline, split_msvcrt, split_ucrt) + + def dumps(self): + msvcrt = self.join_argv(self.split_msvcrt) + ucrt = self.join_argv(self.split_ucrt) + return ",".join([self.s, self.cmdline, msvcrt, ucrt, ""]) + + class Example: def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str]] = None): self.input = input @@ -291,31 +334,54 @@ def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str] pretty_examples = [ (r"c:\Program Files\FooBar", r'"c:\Program Files\FooBar"'), (r"c:\Program Files (x86)\FooBar", r'"c:\Program Files (x86)\FooBar"'), - (r"^", '"^"'), - (r" ^", '" ^"'), - (r"&", '"&"'), - (r"!", "^!"), - (r"!foo!", "^!foo^!"), + ("^", "^^"), + (" ^", '" ^"'), + ("&", "^&"), + ("!", "^!"), + (r"%foo%", r"^%foo^%"), + ("!foo!", "^!foo^!"), + ("foo bar!", '"foo bar"^!'), + ("!foo bar!", '^!"foo bar"^!'), ("foo\\bar\\baz\\", "foo\\bar\\baz\\"), ("foo bar\\baz\\", '"foo bar\\baz\\\\"'), ("foo () bar\\baz\\", '"foo () bar\\baz\\\\"'), ("foo () bar\\baz\\\\", '"foo () bar\\baz\\\\\\\\"'), ("foo () bar\\baz\\\\\\", '"foo () bar\\baz\\\\\\\\\\\\"'), ("foo () bar\\baz\\\\\\\\", '"foo () bar\\baz\\\\\\\\\\\\\\\\"'), + (r"foo\bar! baz", r'foo\bar^!" baz"'), + (r"x\!", r"x\^!"), + ("foo\\", "foo\\"), + ("\\", "\\"), +] + +pretty_examples_not_cmd = [ + ("\\", "\\"), + ("foo", "foo"), + ("foo\\", "foo\\"), + ("foo!", "foo!"), + ("foo bar", '"foo bar"'), + (r"foo\bar", r"foo\bar"), + (r'foo"bar', r"foo\"bar"), ] class TestMslex(unittest.TestCase): """Tests for `mslex` package.""" - def case(self, s: str, ans: str, cmd: bool) -> None: + def case( + self, s: str, ans: str, cmd: bool, exe: bool = False, ucrt: Optional[bool] = None + ) -> None: + assert not (cmd and exe) if sys.platform == "win32": win_split = cmd_split if cmd else ctypes_split + if exe: + win_split = ctypes_split_exe try: + v = split(s, like_cmd=cmd, ucrt=ucrt) if ans is not None: - self.assertEqual(split(s, like_cmd=cmd), ans) + self.assertEqual(v, ans) if sys.platform == "win32": - self.assertEqual(split(s, like_cmd=cmd), win_split(s)) + self.assertEqual(v, win_split(s)) except AssertionError: print("in: «{}»".format(s)) print() @@ -347,7 +413,7 @@ def every_string(): yield "".join(x) for s in every_string(): - self.case(s, None, cmd=False) + self.case(s, None, cmd=False, ucrt=False) @unittest.skipUnless(sys.platform == "win32", "requires Windows") def test_multi_quotes(self): @@ -359,39 +425,65 @@ def test_multi_quotes(self): else: s = "" s += "\\" * m + '"' * n + " x" - self.case(s, None, False) + self.case(s, None, cmd=False, ucrt=False) def test_examples(self): for e in examples: - self.case(e.input, e.output, cmd=False) + self.case(e.input, e.output, cmd=False, ucrt=False) def test_examples_for_cmd(self): for e in examples: - self.case(e.input, e.cmd_output, cmd=True) + self.case(e.input, e.cmd_output, cmd=True, ucrt=False) def test_quote_examples(self): qu = functools.partial(quote, for_cmd=False) sp = functools.partial(split, like_cmd=False) for e in examples: - self.assertEqual(e.output, sp(" ".join(map(qu, e.output)))) - if e.output == e.cmd_output: - continue - self.assertEqual(e.cmd_output, sp(" ".join(map(qu, e.cmd_output)))) + try: + output = e.output + self.assertEqual(e.output, sp(" ".join(map(qu, output)))) + if e.output == e.cmd_output: + continue + output = e.cmd_output + self.assertEqual(e.cmd_output, sp(" ".join(map(qu, output)))) + except AssertionError: + print("in: «{}»".format(output)) + print("quoted: «{}»".format(quote(output))) + raise def test_quote_examples_cmd(self): for e in examples: - self.assertEqual(e.output, split(" ".join(map(quote, e.output)))) - if e.output == e.cmd_output: - continue - self.assertEqual(e.cmd_output, split(" ".join(map(quote, e.cmd_output)))) + try: + output = e.output + self.assertEqual(e.output, split(" ".join(map(quote, output)))) + if e.output == e.cmd_output: + continue + output = e.cmd_output + self.assertEqual(e.cmd_output, split(" ".join(map(quote, output)))) + except AssertionError: + print("in: «{}»".format(output)) + print("quoted: «{}»".format(quote(output))) + raise def test_requote_examples_cmd(self): for e in examples: - self.assertEqual([e.input], split(quote(e.input))) + try: + self.assertEqual([e.input], split(quote(e.input))) + except AssertionError: + print("in: «{}»".format(e.input)) + print("quoted: «{}»".format(quote(e.input))) + for i, s in enumerate(split(quote(e.input))): + print("split[{}]: «{}»".format(i, s)) + raise def test_requote_examples(self): for e in examples: - self.assertEqual([e.input], split(quote(e.input, for_cmd=False), like_cmd=False)) + try: + self.assertEqual([e.input], split(quote(e.input, for_cmd=False), like_cmd=False)) + except AssertionError: + print("in: «{}»".format(e.input)) + print("quoted: «{}»".format(quote(e.input, for_cmd=False))) + raise def test_quote_every_string(self): def every_string(): @@ -413,9 +505,14 @@ def every_string(): yield "".join(x) for s in every_string(): - q = quote(s) - self.assertEqual([s], split(q)) - self.assertEqual([s, s], split("{} {}".format(q, q))) + try: + q = quote(s) + self.assertEqual([s], split(q)) + self.assertEqual([s, s], split("{} {}".format(q, q))) + except AssertionError: + print("in: «{}»".format(s)) + print("quoted: «{}»".format(q)) + raise @unittest.skipUnless(sys.platform == "win32", "requires Windows") def test_quote_every_string_using_cmd(self): @@ -437,6 +534,101 @@ def every_string(): def test_pretty_examples(self): for s, ans in pretty_examples: - self.assertEqual(quote(s), ans) - self.assertEqual(split(ans), [s]) - self.assertEqual(split(ans + " " + ans + " foo bar"), [s, s, "foo", "bar"]) + try: + self.assertEqual(quote(s), ans) + self.assertEqual(split(ans), [s]) + self.assertEqual(split(ans + " " + ans + " foo bar"), [s, s, "foo", "bar"]) + except AssertionError: + print("in: «{}»".format(s)) + print("quoted: «{}»".format(quote(s))) + print("expected: «{}»".format(ans)) + raise + + def test_pretty_examples_not_cmd(self): + for s, ans in pretty_examples_not_cmd: + try: + self.assertEqual(quote(s, for_cmd=False), ans) + self.assertEqual(split(ans, like_cmd=False), [s]) + self.assertEqual(split(ans + " " + ans, like_cmd=False), [s, s]) + except AssertionError: + print("in: «{}»".format(s)) + print("quoted: «{}»".format(quote(s))) + print("expected: «{}»".format(ans)) + raise + + def test_examples_csv(self): + csv = testdir / "examples.csv" + if not csv.exists(): + self.skipTest("%s missing. clone from github" % csv) + with open(str(csv), "r") as f: + if next(iter(f)).startswith("version https://git-lfs.github.com/spec/"): + self.skipTest("%s not downloaded. turn on git-lfs." % csv) + with open(str(csv), "r") as f: + for line in f: + try: + e = CSVExample.loads(line) + self.assertEqual(e.split_ucrt, split_ucrt(e.cmdline)) + self.assertEqual(e.split_msvcrt, split_msvcrt(e.cmdline)) + try: + self.assertEqual(e.split_msvcrt, split(e.s)) + except MSLexError as err: + assert "ambiguous" in str(err) + assert e.split_ucrt != e.split_msvcrt + else: + assert e.split_ucrt == e.split_msvcrt + self.assertEqual(e.cmdline.lstrip(), strip_carets_like_cmd(e.s).lstrip()) + q = quote(e.s) + self.assertEqual([e.s], split(q)) + self.assertEqual([e.s, e.s], split(q + " " + q)) + q = quote(e.s, for_cmd=False) + self.assertEqual([e.s], split(q, like_cmd=False)) + self.assertEqual([e.s, e.s], split(q + " " + q, like_cmd=False)) + except AssertionError: + print("s: «{}»".format(e.s)) + print("cmdline: «{}»".format(e.cmdline)) + try: + print("q: «{}»".format(q)) + except NameError: + pass + raise + + def test_unquoted(self): + bad = [ + "foo && bar", + "foo || bar", + "foo >bar", + "foo bar"', + 'foo " Date: Tue, 15 Oct 2024 09:13:51 -0400 Subject: [PATCH 3/6] added python 3.12 to tox --- .travis.yml | 1 + tox.ini | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ca633b3..0cb6883 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ dist: bionic language: python python: + - 3.12 - 3.11 - 3.10 - 3.9 diff --git a/tox.ini b/tox.ini index f804012..486b78e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,10 @@ [tox] -envlist = py35, py36, py37, py38, py39, py310, py311, flake8, black +envlist = py35, py36, py37, py38, py39, py310, py311, py312, flake8, black [travis] python = - 3.11: py311, black, flake8 + 3.12: py312, black, flake8 + 3.11: py311 3.10: py310 3.9: py39 3.8: py38 From 8090206076f3ec2b9f3f8832c52b3726a70bd297 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Tue, 15 Oct 2024 10:38:51 -0400 Subject: [PATCH 4/6] add ucrt output to examples --- tests/test_mslex.py | 282 ++++++++++++++++++++++++-------------------- 1 file changed, 152 insertions(+), 130 deletions(-) diff --git a/tests/test_mslex.py b/tests/test_mslex.py index 3a4e71c..20fbd63 100644 --- a/tests/test_mslex.py +++ b/tests/test_mslex.py @@ -88,10 +88,23 @@ def dumps(self): class Example: - def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str]] = None): + def __init__( + self, + input: str, + output: List[str], + cmd: Optional[List[str]] = None, + ucrt: Optional[List[str]] = None, + ): self.input = input self.output = output - self.cmd_output = cmd_output if cmd_output is not None else output + self.cmd_output = cmd if cmd is not None else output + self.ucrt_output = ucrt if ucrt is not None else output + if cmd: + self.ucrt_cmd_output = cmd + elif ucrt: + self.ucrt_cmd_output = ucrt + else: + self.ucrt_cmd_output = output examples = [ @@ -100,31 +113,32 @@ def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str] Example(r'""', [""]), Example(r'"""', ['"']), Example(r'""""', ['"']), - Example(r'"""""', ['"']), + Example(r'"""""', ['"'], ucrt=['""']), Example(r'""""""', ['""']), - Example(r'"""""""', ['""']), - Example(r'""""""""', ['""']), - Example(r'"""""""""', ['"""']), - Example(r'""""""""""', ['"""']), + Example(r'"""""""', ['""'], ucrt=['"""']), + Example(r'""""""""', ['""'], ucrt=['"""']), + Example(r'"""""""""', ['"""'], ucrt=['""""']), + Example(r'""""""""""', ['"""'], ucrt=['""""']), Example(r' "', [""]), Example(r' ""', [""]), Example(r' """', ['"']), Example(r' """"', ['"']), - Example(r' """""', ['"']), + Example(r' """""', ['"'], ucrt=['""']), Example(r' """"""', ['""']), - Example(r' """""""', ['""']), - Example(r' """"""""', ['""']), - Example(r' """"""""""', ['"""']), + Example(r' """""""', ['""'], ucrt=['"""']), + Example(r' """"""""', ['""'], ucrt=['"""']), + Example(r' """"""""""', ['"""'], ucrt=['""""']), Example(r" ", []), Example(r'" ', [" "]), Example(r'"" ', [""]), - Example(r'""" ', ['"']), - Example(r'"""" ', ['" ']), - Example(r'""""" ', ['"']), - Example(r'"""""" ', ['""']), - Example(r'""""""" ', ['"" ']), - Example(r'"""""""" ', ['""']), - Example(r'"""""""""" ', ['""" ']), + Example(r'""" ', ['"'], ucrt=['" ']), + Example(r'"""" ', ['" '], ucrt=['"']), + Example(r'""""" ', ['"'], ucrt=['"" ']), + Example(r'"""""" ', ['""'], ucrt=['""']), + Example(r'""""""" ', ['"" '], ucrt=['""" ']), + Example(r'"""""""" ', ['""'], ucrt=['"""']), + Example(r'""""""""" ', ['"""'], ucrt=['"""" ']), + Example(r'"""""""""" ', ['""" '], ucrt=['""""']), Example(r"x", ["x"]), Example(r'x"', ["x"]), Example(r"foo", ["foo"]), @@ -134,149 +148,137 @@ def __init__(self, input: str, output: List[str], cmd_output: Optional[List[str] Example(r"a\\\"b c d", [r"a\"b", "c", "d"]), Example(r'a\\\\"b c" d e', [r"a\\b c", "d", "e"]), Example('"" "" ""', ["", "", ""]), + Example(" x", ["x"]), Example('" x', [" x"]), Example('"" x', ["", "x"]), - Example('""" x', ['"', "x"]), - Example('"""" x', ['" x']), - Example('""""" x', ['"', "x"]), - Example('"""""" x', ['""', "x"]), - Example('""""""" x', ['"" x']), - Example('"""""""" x', ['""', "x"]), - Example('""""""""" x', ['"""', "x"]), - Example('"""""""""" x', ['""" x']), - Example('""""""""""" x', ['"""', "x"]), - Example('"""""""""""" x', ['""""', "x"]), - Example('""""""""""""" x', ['"""" x']), + Example('""" x', ['"', "x"], ucrt=['" x']), + Example('"""" x', ['" x'], ucrt=['"', "x"]), + Example('""""" x', ['"', "x"], ucrt=['"" x']), + Example('"""""" x', ['""', "x"], ucrt=['""', "x"]), + Example('""""""" x', ['"" x'], ucrt=['""" x']), + Example('"""""""" x', ['""', "x"], ucrt=['"""', "x"]), + Example('""""""""" x', ['"""', "x"], ucrt=['"""" x']), + Example('"""""""""" x', ['""" x'], ucrt=['""""', "x"]), + Example('""""""""""" x', ['"""', "x"], ucrt=['""""" x']), + Example('"""""""""""" x', ['""""', "x"], ucrt=['"""""', "x"]), + Example('""""""""""""" x', ['"""" x'], ucrt=['"""""" x']), Example('"aaa x', ["aaa x"]), Example('"aaa" x', ["aaa", "x"]), - Example('"aaa"" x', ['aaa"', "x"]), - Example('"aaa""" x', ['aaa" x']), - Example('"aaa"""" x', ['aaa"', "x"]), + Example('"aaa"" x', ['aaa"', "x"], ucrt=['aaa" x']), + Example('"aaa""" x', ['aaa" x'], ucrt=['aaa"', "x"]), + Example('"aaa"""" x', ['aaa"', "x"], ucrt=['aaa"" x']), Example('"aaa""""" x', ['aaa""', "x"]), - Example('"aaa"""""" x', ['aaa"" x']), - Example('"aaa""""""" x', ['aaa""', "x"]), - Example('"aaa"""""""" x', ['aaa"""', "x"]), - Example('"aaa""""""""" x', ['aaa""" x']), - Example('"aaa"""""""""" x', ['aaa"""', "x"]), - Example('"aaa""""""""""" x', ['aaa""""', "x"]), - Example('"aaa"""""""""""" x', ['aaa"""" x']), + Example('"aaa"""""" x', ['aaa"" x'], ucrt=['aaa""" x']), + Example('"aaa""""""" x', ['aaa""', "x"], ucrt=['aaa"""', "x"]), + Example('"aaa"""""""" x', ['aaa"""', "x"], ucrt=['aaa"""" x']), + Example('"aaa""""""""" x', ['aaa""" x'], ucrt=['aaa""""', "x"]), + Example('"aaa"""""""""" x', ['aaa"""', "x"], ucrt=['aaa""""" x']), + Example('"aaa""""""""""" x', ['aaa""""', "x"], ucrt=['aaa"""""', "x"]), + Example('"aaa"""""""""""" x', ['aaa"""" x'], ucrt=['aaa"""""" x']), Example('"aaa\\ x', ["aaa\\ x"]), Example('"aaa\\" x', ['aaa" x']), Example('"aaa\\"" x', ['aaa"', "x"]), - Example('"aaa\\""" x', ['aaa""', "x"]), - Example('"aaa\\"""" x', ['aaa"" x']), - Example('"aaa\\""""" x', ['aaa""', "x"]), + Example('"aaa\\""" x', ['aaa""', "x"], ucrt=['aaa"" x']), + Example('"aaa\\"""" x', ['aaa"" x'], ucrt=['aaa""', "x"]), + Example('"aaa\\""""" x', ['aaa""', "x"], ucrt=['aaa""" x']), Example('"aaa\\"""""" x', ['aaa"""', "x"]), - Example('"aaa\\""""""" x', ['aaa""" x']), - Example('"aaa\\"""""""" x', ['aaa"""', "x"]), - Example('"aaa\\""""""""" x', ['aaa""""', "x"]), - Example('"aaa\\"""""""""" x', ['aaa"""" x']), - Example('"aaa\\""""""""""" x', ['aaa""""', "x"]), - Example('"aaa\\"""""""""""" x', ['aaa"""""', "x"]), + Example('"aaa\\""""""" x', ['aaa""" x'], ucrt=['aaa"""" x']), + Example('"aaa\\"""""""" x', ['aaa"""', "x"], ucrt=['aaa""""', "x"]), + Example('"aaa\\""""""""" x', ['aaa""""', "x"], ucrt=['aaa""""" x']), + Example('"aaa\\"""""""""" x', ['aaa"""" x'], ucrt=['aaa"""""', "x"]), + Example('"aaa\\""""""""""" x', ['aaa""""', "x"], ucrt=['aaa"""""" x']), + Example('"aaa\\"""""""""""" x', ['aaa"""""', "x"], ucrt=['aaa""""""', "x"]), Example('"aaa\\\\ x', ["aaa\\\\ x"]), Example('"aaa\\\\" x', ["aaa\\", "x"]), - Example('"aaa\\\\"" x', ['aaa\\"', "x"]), - Example('"aaa\\\\""" x', ['aaa\\" x']), - Example('"aaa\\\\"""" x', ['aaa\\"', "x"]), + Example('"aaa\\\\"" x', ['aaa\\"', "x"], ucrt=['aaa\\" x']), + Example('"aaa\\\\""" x', ['aaa\\" x'], ucrt=['aaa\\"', "x"]), + Example('"aaa\\\\"""" x', ['aaa\\"', "x"], ucrt=['aaa\\"" x']), Example('"aaa\\\\""""" x', ['aaa\\""', "x"]), - Example('"aaa\\\\"""""" x', ['aaa\\"" x']), - Example('"aaa\\\\""""""" x', ['aaa\\""', "x"]), - Example('"aaa\\\\"""""""" x', ['aaa\\"""', "x"]), - Example('"aaa\\\\""""""""" x', ['aaa\\""" x']), - Example('"aaa\\\\"""""""""" x', ['aaa\\"""', "x"]), - Example('"aaa\\\\""""""""""" x', ['aaa\\""""', "x"]), - Example('"aaa\\\\"""""""""""" x', ['aaa\\"""" x']), + Example('"aaa\\\\"""""" x', ['aaa\\"" x'], ucrt=['aaa\\""" x']), + Example('"aaa\\\\""""""" x', ['aaa\\""', "x"], ucrt=['aaa\\"""', "x"]), + Example('"aaa\\\\"""""""" x', ['aaa\\"""', "x"], ucrt=['aaa\\"""" x']), + Example('"aaa\\\\""""""""" x', ['aaa\\""" x'], ucrt=['aaa\\""""', "x"]), + Example('"aaa\\\\"""""""""" x', ['aaa\\"""', "x"], ucrt=['aaa\\""""" x']), + Example('"aaa\\\\""""""""""" x', ['aaa\\""""', "x"], ucrt=['aaa\\"""""', "x"]), + Example('"aaa\\\\"""""""""""" x', ['aaa\\"""" x'], ucrt=['aaa\\"""""" x']), Example('"aaa\\\\\\ x', ["aaa\\\\\\ x"]), Example('"aaa\\\\\\" x', ['aaa\\" x']), Example('"aaa\\\\\\"" x', ['aaa\\"', "x"]), - Example('"aaa\\\\\\""" x', ['aaa\\""', "x"]), - Example('"aaa\\\\\\"""" x', ['aaa\\"" x']), - Example('"aaa\\\\\\""""" x', ['aaa\\""', "x"]), + Example('"aaa\\\\\\""" x', ['aaa\\""', "x"], ucrt=['aaa\\"" x']), + Example('"aaa\\\\\\"""" x', ['aaa\\"" x'], ucrt=['aaa\\""', "x"]), + Example('"aaa\\\\\\""""" x', ['aaa\\""', "x"], ucrt=['aaa\\""" x']), Example('"aaa\\\\\\"""""" x', ['aaa\\"""', "x"]), - Example('"aaa\\\\\\""""""" x', ['aaa\\""" x']), - Example('"aaa\\\\\\"""""""" x', ['aaa\\"""', "x"]), - Example('"aaa\\\\\\""""""""" x', ['aaa\\""""', "x"]), - Example('"aaa\\\\\\"""""""""" x', ['aaa\\"""" x']), - Example('"aaa\\\\\\""""""""""" x', ['aaa\\""""', "x"]), - Example('"aaa\\\\\\"""""""""""" x', ['aaa\\"""""', "x"]), + Example('"aaa\\\\\\""""""" x', ['aaa\\""" x'], ucrt=['aaa\\"""" x']), + Example('"aaa\\\\\\"""""""" x', ['aaa\\"""', "x"], ucrt=['aaa\\""""', "x"]), + Example('"aaa\\\\\\""""""""" x', ['aaa\\""""', "x"], ucrt=['aaa\\""""" x']), + Example('"aaa\\\\\\"""""""""" x', ['aaa\\"""" x'], ucrt=['aaa\\"""""', "x"]), + Example('"aaa\\\\\\""""""""""" x', ['aaa\\""""', "x"], ucrt=['aaa\\"""""" x']), + Example('"aaa\\\\\\"""""""""""" x', ['aaa\\"""""', "x"], ucrt=['aaa\\""""""', "x"]), Example('"aaa\\\\\\\\ x', ["aaa\\\\\\\\ x"]), Example('"aaa\\\\\\\\" x', ["aaa\\\\", "x"]), - Example('"aaa\\\\\\\\"" x', ['aaa\\\\"', "x"]), - Example('"aaa\\\\\\\\""" x', ['aaa\\\\" x']), - Example('"aaa\\\\\\\\"""" x', ['aaa\\\\"', "x"]), + Example('"aaa\\\\\\\\"" x', ['aaa\\\\"', "x"], ucrt=['aaa\\\\" x']), + Example('"aaa\\\\\\\\""" x', ['aaa\\\\" x'], ucrt=['aaa\\\\"', "x"]), + Example('"aaa\\\\\\\\"""" x', ['aaa\\\\"', "x"], ucrt=['aaa\\\\"" x']), Example('"aaa\\\\\\\\""""" x', ['aaa\\\\""', "x"]), - Example('"aaa\\\\\\\\"""""" x', ['aaa\\\\"" x']), - Example('"aaa\\\\\\\\""""""" x', ['aaa\\\\""', "x"]), - Example('"aaa\\\\\\\\"""""""" x', ['aaa\\\\"""', "x"]), - Example('"aaa\\\\\\\\""""""""" x', ['aaa\\\\""" x']), - Example('"aaa\\\\\\\\"""""""""" x', ['aaa\\\\"""', "x"]), - Example('"aaa\\\\\\\\""""""""""" x', ['aaa\\\\""""', "x"]), - Example('"aaa\\\\\\\\"""""""""""" x', ['aaa\\\\"""" x']), - Example(" x", ["x"]), - Example('" x', [" x"]), - Example('"" x', ["", "x"]), - Example('""" x', ['"', "x"]), - Example('"""" x', ['" x']), - Example('""""" x', ['"', "x"]), - Example('"""""" x', ['""', "x"]), - Example('""""""" x', ['"" x']), - Example('"""""""" x', ['""', "x"]), - Example('""""""""" x', ['"""', "x"]), - Example('"""""""""" x', ['""" x']), - Example('""""""""""" x', ['"""', "x"]), - Example('"""""""""""" x', ['""""', "x"]), + Example('"aaa\\\\\\\\"""""" x', ['aaa\\\\"" x'], ucrt=['aaa\\\\""" x']), + Example('"aaa\\\\\\\\""""""" x', ['aaa\\\\""', "x"], ucrt=['aaa\\\\"""', "x"]), + Example('"aaa\\\\\\\\"""""""" x', ['aaa\\\\"""', "x"], ucrt=['aaa\\\\"""" x']), + Example('"aaa\\\\\\\\""""""""" x', ['aaa\\\\""" x'], ucrt=['aaa\\\\""""', "x"]), + Example('"aaa\\\\\\\\"""""""""" x', ['aaa\\\\"""', "x"], ucrt=['aaa\\\\""""" x']), + Example('"aaa\\\\\\\\""""""""""" x', ['aaa\\\\""""', "x"], ucrt=['aaa\\\\"""""', "x"]), + Example('"aaa\\\\\\\\"""""""""""" x', ['aaa\\\\"""" x'], ucrt=['aaa\\\\"""""" x']), Example("\\ x", ["\\", "x"]), Example('\\" x', ['"', "x"]), Example('\\"" x', ['" x']), Example('\\""" x', ['"', "x"]), - Example('\\"""" x', ['""', "x"]), - Example('\\""""" x', ['"" x']), - Example('\\"""""" x', ['""', "x"]), + Example('\\"""" x', ['""', "x"], ucrt=['"" x']), + Example('\\""""" x', ['"" x'], ucrt=['""', "x"]), + Example('\\"""""" x', ['""', "x"], ucrt=['""" x']), Example('\\""""""" x', ['"""', "x"]), - Example('\\"""""""" x', ['""" x']), - Example('\\""""""""" x', ['"""', "x"]), - Example('\\"""""""""" x', ['""""', "x"]), - Example('\\""""""""""" x', ['"""" x']), - Example('\\"""""""""""" x', ['""""', "x"]), + Example('\\"""""""" x', ['""" x'], ucrt=['"""" x']), + Example('\\""""""""" x', ['"""', "x"], ucrt=['""""', "x"]), + Example('\\"""""""""" x', ['""""', "x"], ucrt=['""""" x']), + Example('\\""""""""""" x', ['"""" x'], ucrt=['"""""', "x"]), + Example('\\"""""""""""" x', ['""""', "x"], ucrt=['"""""" x']), Example("\\\\ x", ["\\\\", "x"]), Example('\\\\" x', ["\\ x"]), Example('\\\\"" x', ["\\", "x"]), - Example('\\\\""" x', ['\\"', "x"]), - Example('\\\\"""" x', ['\\" x']), - Example('\\\\""""" x', ['\\"', "x"]), + Example('\\\\""" x', ['\\"', "x"], ucrt=['\\" x']), + Example('\\\\"""" x', ['\\" x'], ucrt=['\\"', "x"]), + Example('\\\\""""" x', ['\\"', "x"], ucrt=['\\"" x']), Example('\\\\"""""" x', ['\\""', "x"]), - Example('\\\\""""""" x', ['\\"" x']), - Example('\\\\"""""""" x', ['\\""', "x"]), - Example('\\\\""""""""" x', ['\\"""', "x"]), - Example('\\\\"""""""""" x', ['\\""" x']), - Example('\\\\""""""""""" x', ['\\"""', "x"]), - Example('\\\\"""""""""""" x', ['\\""""', "x"]), + Example('\\\\""""""" x', ['\\"" x'], ucrt=['\\""" x']), + Example('\\\\"""""""" x', ['\\""', "x"], ucrt=['\\"""', "x"]), + Example('\\\\""""""""" x', ['\\"""', "x"], ucrt=['\\"""" x']), + Example('\\\\"""""""""" x', ['\\""" x'], ucrt=['\\""""', "x"]), + Example('\\\\""""""""""" x', ['\\"""', "x"], ucrt=['\\""""" x']), + Example('\\\\"""""""""""" x', ['\\""""', "x"], ucrt=['\\"""""', "x"]), Example("\\\\\\ x", ["\\\\\\", "x"]), Example('\\\\\\" x', ['\\"', "x"]), Example('\\\\\\"" x', ['\\" x']), Example('\\\\\\""" x', ['\\"', "x"]), - Example('\\\\\\"""" x', ['\\""', "x"]), - Example('\\\\\\""""" x', ['\\"" x']), - Example('\\\\\\"""""" x', ['\\""', "x"]), + Example('\\\\\\"""" x', ['\\""', "x"], ucrt=['\\"" x']), + Example('\\\\\\""""" x', ['\\"" x'], ucrt=['\\""', "x"]), + Example('\\\\\\"""""" x', ['\\""', "x"], ucrt=['\\""" x']), Example('\\\\\\""""""" x', ['\\"""', "x"]), - Example('\\\\\\"""""""" x', ['\\""" x']), - Example('\\\\\\""""""""" x', ['\\"""', "x"]), - Example('\\\\\\"""""""""" x', ['\\""""', "x"]), - Example('\\\\\\""""""""""" x', ['\\"""" x']), - Example('\\\\\\"""""""""""" x', ['\\""""', "x"]), + Example('\\\\\\"""""""" x', ['\\""" x'], ucrt=['\\"""" x']), + Example('\\\\\\""""""""" x', ['\\"""', "x"], ucrt=['\\""""', "x"]), + Example('\\\\\\"""""""""" x', ['\\""""', "x"], ucrt=['\\""""" x']), + Example('\\\\\\""""""""""" x', ['\\"""" x'], ucrt=['\\"""""', "x"]), + Example('\\\\\\"""""""""""" x', ['\\""""', "x"], ucrt=['\\"""""" x']), Example("\\\\\\\\ x", ["\\\\\\\\", "x"]), Example('\\\\\\\\" x', ["\\\\ x"]), Example('\\\\\\\\"" x', ["\\\\", "x"]), - Example('\\\\\\\\""" x', ['\\\\"', "x"]), - Example('\\\\\\\\"""" x', ['\\\\" x']), - Example('\\\\\\\\""""" x', ['\\\\"', "x"]), + Example('\\\\\\\\""" x', ['\\\\"', "x"], ucrt=['\\\\" x']), + Example('\\\\\\\\"""" x', ['\\\\" x'], ucrt=['\\\\"', "x"]), + Example('\\\\\\\\""""" x', ['\\\\"', "x"], ucrt=['\\\\"" x']), Example('\\\\\\\\"""""" x', ['\\\\""', "x"]), - Example('\\\\\\\\""""""" x', ['\\\\"" x']), - Example('\\\\\\\\"""""""" x', ['\\\\""', "x"]), - Example('\\\\\\\\""""""""" x', ['\\\\"""', "x"]), - Example('\\\\\\\\"""""""""" x', ['\\\\""" x']), - Example('\\\\\\\\""""""""""" x', ['\\\\"""', "x"]), - Example('\\\\\\\\"""""""""""" x', ['\\\\""""', "x"]), + Example('\\\\\\\\""""""" x', ['\\\\"" x'], ucrt=['\\\\""" x']), + Example('\\\\\\\\"""""""" x', ['\\\\""', "x"], ucrt=['\\\\"""', "x"]), + Example('\\\\\\\\""""""""" x', ['\\\\"""', "x"], ucrt=['\\\\"""" x']), + Example('\\\\\\\\"""""""""" x', ['\\\\""" x'], ucrt=['\\\\""""', "x"]), + Example('\\\\\\\\""""""""""" x', ['\\\\"""', "x"], ucrt=['\\\\""""" x']), + Example('\\\\\\\\"""""""""""" x', ['\\\\""""', "x"], ucrt=['\\\\"""""', "x"]), Example('"x"', ["x"]), Example('"^x"', ["^x"]), Example('"^^x"', ["^^x"]), @@ -380,19 +382,19 @@ def case( v = split(s, like_cmd=cmd, ucrt=ucrt) if ans is not None: self.assertEqual(v, ans) - if sys.platform == "win32": + if sys.platform == "win32" and not ucrt: self.assertEqual(v, win_split(s)) - except AssertionError: + except (MSLexError, AssertionError): print("in: «{}»".format(s)) print() - for x in split(s, like_cmd=cmd): + for x in split(s, like_cmd=cmd, ucrt=ucrt): print("out: «{}»".format(x)) print() if ans is not None: for x in ans: print("ans: «{}»".format(x)) print() - if sys.platform == "win32": + if sys.platform == "win32" and not ucrt: for x in win_split(s): print("win: «{}»".format(x)) print() @@ -429,11 +431,31 @@ def test_multi_quotes(self): def test_examples(self): for e in examples: - self.case(e.input, e.output, cmd=False, ucrt=False) + try: + self.case(e.input, e.output, cmd=False) + except MSLexError as err: + assert e.output != e.ucrt_output + assert "String is ambiguous" in str(err) + else: + assert e.output == e.ucrt_output + + def test_examples_ucrt(self): + for e in examples: + self.case(e.input, e.ucrt_output, cmd=False, ucrt=True) def test_examples_for_cmd(self): for e in examples: - self.case(e.input, e.cmd_output, cmd=True, ucrt=False) + try: + self.case(e.input, e.cmd_output, cmd=True) + except MSLexError as err: + assert e.cmd_output != e.ucrt_cmd_output + assert "String is ambiguous" in str(err) + else: + assert e.cmd_output == e.ucrt_cmd_output + + def test_examples_for_cmd_ucrt(self): + for e in examples: + self.case(e.input, e.ucrt_cmd_output, cmd=True, ucrt=True) def test_quote_examples(self): qu = functools.partial(quote, for_cmd=False) From 382584e8180af7dd8b00f277559b7f0b8ab4a220 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Tue, 15 Oct 2024 13:18:52 -0400 Subject: [PATCH 5/6] cleanup examples --- tests/test_mslex.py | 105 ++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/tests/test_mslex.py b/tests/test_mslex.py index 20fbd63..f76939e 100644 --- a/tests/test_mslex.py +++ b/tests/test_mslex.py @@ -108,42 +108,43 @@ def __init__( examples = [ - Example(r"", []), - Example(r'"', [""]), - Example(r'""', [""]), - Example(r'"""', ['"']), - Example(r'""""', ['"']), - Example(r'"""""', ['"'], ucrt=['""']), - Example(r'""""""', ['""']), - Example(r'"""""""', ['""'], ucrt=['"""']), - Example(r'""""""""', ['""'], ucrt=['"""']), - Example(r'"""""""""', ['"""'], ucrt=['""""']), - Example(r'""""""""""', ['"""'], ucrt=['""""']), - Example(r' "', [""]), - Example(r' ""', [""]), - Example(r' """', ['"']), - Example(r' """"', ['"']), - Example(r' """""', ['"'], ucrt=['""']), - Example(r' """"""', ['""']), - Example(r' """""""', ['""'], ucrt=['"""']), - Example(r' """"""""', ['""'], ucrt=['"""']), - Example(r' """"""""""', ['"""'], ucrt=['""""']), - Example(r" ", []), - Example(r'" ', [" "]), - Example(r'"" ', [""]), - Example(r'""" ', ['"'], ucrt=['" ']), - Example(r'"""" ', ['" '], ucrt=['"']), - Example(r'""""" ', ['"'], ucrt=['"" ']), - Example(r'"""""" ', ['""'], ucrt=['""']), - Example(r'""""""" ', ['"" '], ucrt=['""" ']), - Example(r'"""""""" ', ['""'], ucrt=['"""']), - Example(r'""""""""" ', ['"""'], ucrt=['"""" ']), - Example(r'"""""""""" ', ['""" '], ucrt=['""""']), - Example(r"x", ["x"]), - Example(r'x"', ["x"]), - Example(r"foo", ["foo"]), - Example(r'foo "bar baz"', ["foo", "bar baz"]), - Example(r'"abc" d e', ["abc", "d", "e"]), + Example("", []), + Example('"', [""]), + Example('""', [""]), + Example('"""', ['"']), + Example('""""', ['"']), + Example('"""""', ['"'], ucrt=['""']), + Example('""""""', ['""']), + Example('"""""""', ['""'], ucrt=['"""']), + Example('""""""""', ['""'], ucrt=['"""']), + Example('"""""""""', ['"""'], ucrt=['""""']), + Example('""""""""""', ['"""'], ucrt=['""""']), + Example(' "', [""]), + Example(' ""', [""]), + Example(' """', ['"']), + Example(' """"', ['"']), + Example(' """""', ['"'], ucrt=['""']), + Example(' """"""', ['""']), + Example(' """""""', ['""'], ucrt=['"""']), + Example(' """"""""', ['""'], ucrt=['"""']), + Example(' """"""""""', ['"""'], ucrt=['""""']), + Example(" ", []), + Example('" ', [" "]), + Example('"" ', [""]), + Example('""" ', ['"'], ucrt=['" ']), + Example('"""" ', ['" '], ucrt=['"']), + Example('""""" ', ['"'], ucrt=['"" ']), + Example('"""""" ', ['""'], ucrt=['""']), + Example('""""""" ', ['"" '], ucrt=['""" ']), + Example('"""""""" ', ['""'], ucrt=['"""']), + Example('""""""""" ', ['"""'], ucrt=['"""" ']), + Example('"""""""""" ', ['""" '], ucrt=['""""']), + Example("x", ["x"]), + Example('x"', ["x"]), + Example("foo", ["foo"]), + Example('foo "bar baz"', ["foo", "bar baz"]), + Example('"abc" d e', ["abc", "d", "e"]), + Example(r'"a\bc" d e', [r"a\bc", "d", "e"]), Example(r'a\\\b d"e f"g h', [r"a\\\b", "de fg", "h"]), Example(r"a\\\"b c d", [r"a\"b", "c", "d"]), Example(r'a\\\\"b c" d e', [r"a\\b c", "d", "e"]), @@ -291,16 +292,16 @@ def __init__( Example('"^ ', ["^ "]), Example(":dir", [":dir"]), Example(";;;a,, b, c===", [";;;a,,", "b,", "c==="]), - Example("^;;a", ["^;;a"], [";;a"]), + Example("^;;a", ["^;;a"], cmd=[";;a"]), Example('a "<>||&&', ["a", "<>||&&"]), Example('a "<>||&&^', ["a", "<>||&&^"]), Example('a "<>||&&^^', ["a", "<>||&&^^"]), Example('"foo &whoami bar"', ["foo &whoami bar"]), - Example("^^", ["^^"], ["^"]), + Example("^^", ["^^"], cmd=["^"]), Example('"^"', ["^"]), Example('"^^"', ["^^"]), - Example("foo^bar", ["foo^bar"], ["foobar"]), - Example("foo^^bar", ["foo^^bar"], ["foo^bar"]), + Example("foo^bar", ["foo^bar"], cmd=["foobar"]), + Example("foo^^bar", ["foo^^bar"], cmd=["foo^bar"]), Example('"foo^bar"', ["foo^bar"]), Example('"foo^^bar"', ["foo^^bar"]), Example('"x"', ["x"]), @@ -319,18 +320,18 @@ def __init__( Example('a "<>||&&^', ["a", "<>||&&^"]), Example('a "<>||&&^^', ["a", "<>||&&^^"]), Example("foo", ["foo"]), - Example("foo^", ["foo^"], ["foo"]), - Example("foo^^", ["foo^^"], ["foo^"]), - Example("foo^^^", ["foo^^^"], ["foo^"]), - Example("foo^^^^", ["foo^^^^"], ["foo^^"]), - Example("foo^ bar", ["foo^", "bar"], ["foo", "bar"]), - Example("foo^^ bar", ["foo^^", "bar"], ["foo^", "bar"]), - Example("foo^^^ bar", ["foo^^^", "bar"], ["foo^", "bar"]), - Example("foo^^^^ bar", ["foo^^^^", "bar"], ["foo^^", "bar"]), - Example('"foo^" bar', ["foo^", "bar"], ["foo^", "bar"]), - Example('"foo^^" bar', ["foo^^", "bar"], ["foo^^", "bar"]), - Example('"foo^^^" bar', ["foo^^^", "bar"], ["foo^^^", "bar"]), - Example('"foo^^^^" bar', ["foo^^^^", "bar"], ["foo^^^^", "bar"]), + Example("foo^", ["foo^"], cmd=["foo"]), + Example("foo^^", ["foo^^"], cmd=["foo^"]), + Example("foo^^^", ["foo^^^"], cmd=["foo^"]), + Example("foo^^^^", ["foo^^^^"], cmd=["foo^^"]), + Example("foo^ bar", ["foo^", "bar"], cmd=["foo", "bar"]), + Example("foo^^ bar", ["foo^^", "bar"], cmd=["foo^", "bar"]), + Example("foo^^^ bar", ["foo^^^", "bar"], cmd=["foo^", "bar"]), + Example("foo^^^^ bar", ["foo^^^^", "bar"], cmd=["foo^^", "bar"]), + Example('"foo^" bar', ["foo^", "bar"], cmd=["foo^", "bar"]), + Example('"foo^^" bar', ["foo^^", "bar"], cmd=["foo^^", "bar"]), + Example('"foo^^^" bar', ["foo^^^", "bar"], cmd=["foo^^^", "bar"]), + Example('"foo^^^^" bar', ["foo^^^^", "bar"], cmd=["foo^^^^", "bar"]), ] pretty_examples = [ From 7181889e054575278d9e641e165d82e3ad7e1080 Mon Sep 17 00:00:00 2001 From: Lawrence D'Anna Date: Mon, 14 Oct 2024 14:03:39 -0400 Subject: [PATCH 6/6] =?UTF-8?q?Bump=20version:=201.2.0=20=E2=86=92=201.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- mslex/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8c32af3..bc04ac0 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -version=1.2.0 +version=1.3.0 WHEEL=dist/mslex-$(version)-py3-none-any.whl SDIST=dist/mslex-$(version).tar.gz diff --git a/mslex/__init__.py b/mslex/__init__.py index a370f2f..1475292 100644 --- a/mslex/__init__.py +++ b/mslex/__init__.py @@ -27,7 +27,7 @@ "MSLexError", ) -__version__ = "1.2.0" +__version__ = "1.3.0" def _iter_arg_msvcrt(peek: Match[str], i: Iterator[Match[str]]) -> Iterator[str]: diff --git a/setup.cfg b/setup.cfg index e7a9455..d2c6ce9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.0 +current_version = 1.3.0 commit = True tag = True diff --git a/setup.py b/setup.py index 5a76ed8..a31193c 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.rst") as readme_file: readme = readme_file.read() -version = "1.2.0" +version = "1.3.0" setup( author="Lawrence D'Anna",