Skip to content

Commit

Permalink
Fix handling of backslashes when parsing envfiles #118
Browse files Browse the repository at this point in the history
Bring parsing behavior more in line with bash wrt escaping
- outside of quotes:
  - escaped new lines are omitted
  - other escaped characters are always included
    (including backslashes, whitespace and semicolons)
  - non-escaped backslashes are omitted
- inside single quotes
  - backslashes are treated like normal character - no escaping
- inside double quotes
  - escaped new lines are omitted
  - escaped backslashes and double-quotes are kept
  - backslashes not used for escaping are kept
  • Loading branch information
nat-n committed Jan 15, 2023
1 parent e056c26 commit 457f7d9
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 20 deletions.
58 changes: 45 additions & 13 deletions poethepoet/env/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ class ParserState(Enum):


def parse_env_file(content_lines: Sequence[str]):
"""
This function implements envfile parsing similar to bash.
Line commenting is respected via # outside of quotes and following a non-escaped
whitespace char.
Escaping rules:
- outside of quotes:
- escaped new lines are omitted
- other escaped characters are always included
(including backslashes, whitespace and semicolons)
- non-escaped backslashes are omitted
- inside single quotes
- backslashes are treated like normal character - no escaping
- inside double quotes
- escaped new lines are omitted
- escaped backslashes and double-quotes are kept
- backslashes not used for escaping are kept
"""

content = "".join(content_lines) + "\n"
result = {}
cursor = 0
Expand Down Expand Up @@ -86,6 +106,7 @@ def parse_env_file(content_lines: Sequence[str]):

if state == ParserState.SCAN_VALUE:
# collect up until the first quote, whitespace, or group of backslashes

match = re.search(UNQUOTED_VALUE_PATTERN, content[cursor:], re.MULTILINE)
assert match
new_var_content, match_terminator = match.groups()
Expand Down Expand Up @@ -116,11 +137,16 @@ def parse_env_file(content_lines: Sequence[str]):
var_content.append("\\" * (num_backslashes // 2))
cursor += num_backslashes

if num_backslashes % 2 > 0:
# Odd number of backslashes, means the next char is escaped
if num_backslashes % 2 != 0:
next_char = content[cursor]
var_content.append(next_char)
cursor += 1

if next_char == "\n":
# Omit escaped new line
continue

# Non-escaped backslashes that don't precede a terminator are dropped
var_content.append(next_char)
continue

if state == ParserState.IN_SINGLE_QUOTE:
Expand All @@ -129,9 +155,7 @@ def parse_env_file(content_lines: Sequence[str]):
SINGLE_QUOTE_VALUE_PATTERN, content[cursor:], re.MULTILINE
)
if match is None:
raise ParserException(
f"Unmatched single quote", cursor - 1, content_lines
)
raise ParserException(f"Unmatched single quote", cursor, content_lines)
var_content.append(match.group(1))
cursor += match.end()
state = ParserState.SCAN_VALUE
Expand All @@ -143,9 +167,7 @@ def parse_env_file(content_lines: Sequence[str]):
DOUBLE_QUOTE_VALUE_PATTERN, content[cursor:], re.MULTILINE
)
if match is None:
raise ParserException(
f"Unmatched double quote", cursor - 1, content_lines
)
raise ParserException(f"Unmatched double quote", cursor, content_lines)
new_var_content, backslashes_or_dquote = match.groups()
var_content.append(new_var_content)
cursor += match.end()
Expand All @@ -154,13 +176,23 @@ def parse_env_file(content_lines: Sequence[str]):
state = ParserState.SCAN_VALUE
continue

# We found one or more backslashes
num_backslashes = len(backslashes_or_dquote)

# Keep the excess (escaped) backslashes
var_content.append("\\" * (len(backslashes_or_dquote) // 2))
var_content.append("\\" * (num_backslashes // 2))

if len(backslashes_or_dquote) % 2 == 0:
# whatever follows is escaped
if num_backslashes % 2 != 0:
# Odd number of backslashes maybe an escape sequence
next_char = content[cursor]
var_content.append(next_char)
cursor += 1
if next_char == "\n":
# Omit escaped new line
pass
if next_char == '"':
var_content.append(next_char)
else:
# otherwise keep the backslash
var_content.append("\\" + next_char)

return result
102 changes: 95 additions & 7 deletions tests/unit/test_parse_env_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,17 @@
thing"
SINGLE_QUOTED_WORD='some
thing'
DOUBLE_QUOTED_WORD_ESC="some\
thing"
SINGLE_QUOTED_WORD_ESC='some\
thing'
""",
{
"WORD": "some\nthing",
"WORD": "something",
"DOUBLE_QUOTED_WORD": "some\n thing",
"SINGLE_QUOTED_WORD": "some\n thing",
"DOUBLE_QUOTED_WORD_ESC": "some thing",
"SINGLE_QUOTED_WORD_ESC": "some thing",
},
),
(
Expand All @@ -67,12 +73,12 @@
(
"""
# with semicolons
; FOO=BAR;BAR=FOO ;
; FOO=BAR;BAR=FOO\\;! ;
;
BAZ="2;'2"#;
\tQUX=3\t;
""",
{"FOO": "BAR", "BAR": "FOO", "BAZ": "2;'2#", "QUX": "3"},
{"FOO": "BAR", "BAR": "FOO;!", "BAZ": "2;'2#", "QUX": "3"},
),
(
r"""
Expand All @@ -81,14 +87,22 @@
BAR='a\\\ b'
BAZ="a\\\ b"
""",
{"FOO": r"a\ b", "BAR": r"a\\\ b", "BAZ": r"a\ b"},
{"FOO": r"a\ b", "BAR": r"a\\\ b", "BAZ": r"a\\ b"},
),
( # a value with many parts and some empty vars
r"""FOO=a\\\ b'a\\\ b'"a\\\ b"#"#"'\'' ;'#;\t
r"""FOO=a\\\ b'a\\\ b'"a\\\ b"#"#"'\'' ;'#; #\t
BAR=
BAZ= # still empty
QUX=""",
{"FOO": r"a\ ba\\\ ba\ b##\ ;#", "BAR": "", "BAZ": "", "QUX": ""},
QUX=
WUT='a'"b"\
c """,
{
"FOO": r"a\ ba\\\ ba\\ b##\ ;#",
"BAR": "",
"BAZ": "",
"QUX": "",
"WUT": "abc",
},
),
# export keyword is allowed
(
Expand All @@ -98,6 +112,78 @@
""",
{"answer": "42", "question": "undefined", "dinner": "chicken"},
),
# handling escapes
(
"""
ESCAPED_DQUOTE=\\"
ESCAPED_DQUOTE_DQUOTES='\\"'
ESCAPED_DQUOTE_SQUOTES="\\""
ESCAPED_NEWLINE=a\\
b;
ESCAPED_NEWLINE_DQUOTES='\\"'
ESCAPED_NEWLINE_SQUOTES="\\""
""",
{
"ESCAPED_DQUOTE": '"',
"ESCAPED_DQUOTE_DQUOTES": '\\"',
"ESCAPED_DQUOTE_SQUOTES": '"',
"ESCAPED_NEWLINE": "ab",
"ESCAPED_NEWLINE_DQUOTES": '\\"',
"ESCAPED_NEWLINE_SQUOTES": '"',
},
),
# comments
(
r"""# at start
EX1=BAR#NOT_COMMENT
EX2="BAR#NOT_COMMENT"
EX3='BAR#NOT_COMMENT'
EX4=BAR\ #NOT_COMMENT
EX5="BAR #NOT_COMMENT"
EX6='BAR #NOT_COMMENT'
EX7=BAR\
#NOT_COMMENT
EX8=BAR #COMMENT
EX9=BAR\ #COMMENT
EX10=BAR; #COMMENT
EX11=BAR;#COMMENT
EX12=BAR\;;#COMMENT
EX13=BAR\
#COMMENT'
#COMMENT'
""",
{
"EX1": "BAR#NOT_COMMENT",
"EX2": "BAR#NOT_COMMENT",
"EX3": "BAR#NOT_COMMENT",
"EX4": "BAR #NOT_COMMENT",
"EX5": "BAR #NOT_COMMENT",
"EX6": "BAR #NOT_COMMENT",
"EX7": "BAR#NOT_COMMENT",
"EX8": "BAR",
"EX9": "BAR ",
"EX10": "BAR",
"EX11": "BAR",
"EX12": "BAR;",
"EX13": "BAR",
},
),
(
r"""
EQL=="="
FOO=\\x\n\x77
FOOSQ='\\x\n\x77'
FOODQ="\\x\n\x77"
""",
{"EQL": "==", "FOO": "\\xnx77", "FOOSQ": r"\\x\n\x77", "FOODQ": "\\x\\n\\x77"},
),
(
r"""
FOO=first
FOO=second
""",
{"FOO": "second"},
),
]


Expand All @@ -120,6 +206,8 @@
r"foo\==bar",
r"export;foo=bar",
r"export\nfoo=bar",
r"""foo='\'' """,
r"""foo="\" """,
]


Expand Down

0 comments on commit 457f7d9

Please sign in to comment.