Skip to content

Commit cd9ce77

Browse files
authored
Merge pull request #7953 from frendsick/fix/expr-regex-anchors
expr: Fix parsing regex anchors '^' and '$'
2 parents aa5dd0e + 2933286 commit cd9ce77

File tree

2 files changed

+53
-8
lines changed

2 files changed

+53
-8
lines changed

src/uu/expr/src/syntax_tree.rs

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ impl StringOp {
156156
re_string.push('^');
157157

158158
// Handle first character from the input pattern
159-
let mut pattern_chars = right.chars();
159+
let mut pattern_chars = right.chars().peekable();
160160
let first = pattern_chars.next();
161161
match first {
162162
Some('^') => {} // Start of string anchor is already added
@@ -166,16 +166,42 @@ impl StringOp {
166166
};
167167

168168
// Handle the rest of the input pattern.
169-
// Escaped previous character should not affect the current.
170169
let mut prev = first.unwrap_or_default();
171170
let mut prev_is_escaped = false;
172-
for curr in pattern_chars {
171+
while let Some(curr) = pattern_chars.next() {
173172
match curr {
174-
// Carets are interpreted literally, unless used as character class negation "[^a]"
175-
'^' if prev_is_escaped || !matches!(prev, '\\' | '[') => {
176-
re_string.push_str(r"\^");
173+
'^' => match (prev, prev_is_escaped) {
174+
// Start of a capturing group
175+
('(', true)
176+
// Start of an alternative pattern
177+
| ('|', true)
178+
// Character class negation "[^a]"
179+
| ('[', false)
180+
// Explicitly escaped caret
181+
| ('\\', false) => re_string.push(curr),
182+
_ => re_string.push_str(r"\^"),
183+
},
184+
'$' => {
185+
if let Some('\\') = pattern_chars.peek() {
186+
// The next character was checked to be a backslash
187+
let backslash = pattern_chars.next().unwrap_or_default();
188+
match pattern_chars.peek() {
189+
// End of a capturing group
190+
Some(')') => re_string.push('$'),
191+
// End of an alternative pattern
192+
Some('|') => re_string.push('$'),
193+
_ => re_string.push_str(r"\$"),
194+
}
195+
re_string.push(backslash);
196+
} else if (prev_is_escaped || prev != '\\')
197+
&& pattern_chars.peek().is_some()
198+
{
199+
re_string.push_str(r"\$");
200+
} else {
201+
re_string.push('$');
202+
}
177203
}
178-
char => re_string.push(char),
204+
_ => re_string.push(curr),
179205
}
180206

181207
prev_is_escaped = prev == '\\' && !prev_is_escaped;

tests/by-util/test_expr.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,26 @@ fn test_regex() {
282282
.args(&["a^b", ":", "a\\^b"])
283283
.succeeds()
284284
.stdout_only("3\n");
285+
new_ucmd!()
286+
.args(&["b", ":", "a\\|^b"])
287+
.succeeds()
288+
.stdout_only("1\n");
289+
new_ucmd!()
290+
.args(&["ab", ":", "\\(^a\\)b"])
291+
.succeeds()
292+
.stdout_only("a\n");
285293
new_ucmd!()
286294
.args(&["a$b", ":", "a\\$b"])
287295
.succeeds()
288296
.stdout_only("3\n");
297+
new_ucmd!()
298+
.args(&["a", ":", "a$\\|b"])
299+
.succeeds()
300+
.stdout_only("1\n");
301+
new_ucmd!()
302+
.args(&["ab", ":", "a\\(b$\\)"])
303+
.succeeds()
304+
.stdout_only("b\n");
289305
new_ucmd!()
290306
.args(&["abc", ":", "^abc"])
291307
.succeeds()
@@ -298,6 +314,10 @@ fn test_regex() {
298314
.args(&["b^$ic", ":", "b^\\$ic"])
299315
.succeeds()
300316
.stdout_only("5\n");
317+
new_ucmd!()
318+
.args(&["a$c", ":", "a$\\c"])
319+
.succeeds()
320+
.stdout_only("3\n");
301321
new_ucmd!()
302322
.args(&["^^^^^^^^^", ":", "^^^"])
303323
.succeeds()
@@ -766,7 +786,6 @@ mod gnu_expr {
766786
.stdout_only("3\n");
767787
}
768788

769-
#[ignore]
770789
#[test]
771790
fn test_bre11() {
772791
new_ucmd!()

0 commit comments

Comments
 (0)