Skip to content

Commit 5a770dc

Browse files
committed
syntax: permit empty character classes
An empty character class is effectively a way to write something that can never match anything. The regex crate has pretty much always returned an error for such things because it was never taught how to handle "always fail" states. Partly because I just didn't think about it when initially writing the regex engines and partly because it isn't often useful. With that said, it should be supported for completeness and because there is no real reason to not support it. Moreover, it can be useful in certain contexts where regexes are generated and you want to insert an expression that can never match. It's somewhat contrived, but it happens when the interface is a regex pattern. Previously, the ban on empty character classes was implemented in the regex-syntax crate. But with the rewrite in #656 getting closer and closer to landing, it's now time to relax this restriction. However, we do keep the overall restriction in the 'regex' API by returning an error in the NFA compiler. Once #656 is done, the new regex engines will permit this case.
1 parent 06df9ac commit 5a770dc

File tree

3 files changed

+15
-84
lines changed

3 files changed

+15
-84
lines changed

regex-syntax/src/hir/mod.rs

-14
Original file line numberDiff line numberDiff line change
@@ -78,21 +78,8 @@ pub enum ErrorKind {
7878
/// available, and the regular expression required Unicode aware case
7979
/// insensitivity.
8080
UnicodeCaseUnavailable,
81-
/// This occurs when the translator attempts to construct a character class
82-
/// that is empty.
83-
///
84-
/// Note that this restriction in the translator may be removed in the
85-
/// future.
86-
EmptyClassNotAllowed,
8781
}
8882

89-
// BREADCRUMBS:
90-
//
91-
// Remove EmptyClassNotAllowed
92-
// Make errors non_exhaustive
93-
// Simplify repetitions (get rid of ZeroOrOne, OneOrMore etc)
94-
// Get rid of deprecated things
95-
9683
impl std::error::Error for Error {}
9784

9885
impl fmt::Display for Error {
@@ -118,7 +105,6 @@ impl fmt::Display for ErrorKind {
118105
"Unicode-aware case insensitivity matching is not available \
119106
(make sure the unicode-case feature is enabled)"
120107
}
121-
EmptyClassNotAllowed => "empty character classes are not allowed",
122108
};
123109
f.write_str(msg)
124110
}

regex-syntax/src/hir/translate.rs

+5-68
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
322322
ast.negated,
323323
&mut cls,
324324
)?;
325-
if cls.ranges().is_empty() {
326-
return Err(self.error(
327-
ast.span,
328-
ErrorKind::EmptyClassNotAllowed,
329-
));
330-
}
331325
let expr = Hir::class(hir::Class::Unicode(cls));
332326
self.push(HirFrame::Expr(expr));
333327
} else {
@@ -337,13 +331,6 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
337331
ast.negated,
338332
&mut cls,
339333
)?;
340-
if cls.ranges().is_empty() {
341-
return Err(self.error(
342-
ast.span,
343-
ErrorKind::EmptyClassNotAllowed,
344-
));
345-
}
346-
347334
let expr = Hir::class(hir::Class::Bytes(cls));
348335
self.push(HirFrame::Expr(expr));
349336
}
@@ -839,11 +826,6 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
839826
ast_class.negated,
840827
class,
841828
)?;
842-
if class.ranges().is_empty() {
843-
let err = self
844-
.error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
845-
return Err(err);
846-
}
847829
}
848830
result
849831
}
@@ -2357,16 +2339,7 @@ mod tests {
23572339
#[test]
23582340
#[cfg(feature = "unicode-gencat")]
23592341
fn class_unicode_any_empty() {
2360-
assert_eq!(
2361-
t_err(r"\P{any}"),
2362-
TestError {
2363-
kind: hir::ErrorKind::EmptyClassNotAllowed,
2364-
span: Span::new(
2365-
Position::new(0, 1, 1),
2366-
Position::new(7, 1, 8)
2367-
),
2368-
}
2369-
);
2342+
assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
23702343
}
23712344

23722345
#[test]
@@ -2518,27 +2491,9 @@ mod tests {
25182491
}
25192492
);
25202493
#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2521-
assert_eq!(
2522-
t_err(r"[^\s\S]"),
2523-
TestError {
2524-
kind: hir::ErrorKind::EmptyClassNotAllowed,
2525-
span: Span::new(
2526-
Position::new(0, 1, 1),
2527-
Position::new(7, 1, 8)
2528-
),
2529-
}
2530-
);
2494+
assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
25312495
#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2532-
assert_eq!(
2533-
t_err(r"(?-u)[^\s\S]"),
2534-
TestError {
2535-
kind: hir::ErrorKind::EmptyClassNotAllowed,
2536-
span: Span::new(
2537-
Position::new(5, 1, 6),
2538-
Position::new(12, 1, 13)
2539-
),
2540-
}
2541-
);
2496+
assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
25422497
}
25432498

25442499
#[test]
@@ -2686,27 +2641,9 @@ mod tests {
26862641
hir_uclass(&[('C', 'C'), ('c', 'c')])
26872642
);
26882643

2689-
assert_eq!(
2690-
t_err(r"[^a-c[^c]]"),
2691-
TestError {
2692-
kind: hir::ErrorKind::EmptyClassNotAllowed,
2693-
span: Span::new(
2694-
Position::new(0, 1, 1),
2695-
Position::new(10, 1, 11)
2696-
),
2697-
}
2698-
);
2644+
assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
26992645
#[cfg(feature = "unicode-case")]
2700-
assert_eq!(
2701-
t_err(r"(?i)[^a-c[^c]]"),
2702-
TestError {
2703-
kind: hir::ErrorKind::EmptyClassNotAllowed,
2704-
span: Span::new(
2705-
Position::new(4, 1, 5),
2706-
Position::new(14, 1, 15)
2707-
),
2708-
}
2709-
);
2646+
assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
27102647
}
27112648

27122649
#[test]

src/compile.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,11 @@ impl Compiler {
457457
fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
458458
use std::mem::size_of;
459459

460-
assert!(!ranges.is_empty());
460+
if ranges.is_empty() {
461+
return Err(Error::Syntax(
462+
"empty character classes are not allowed".to_string(),
463+
));
464+
}
461465
if self.compiled.uses_bytes() {
462466
Ok(Some(CompileClass { c: self, ranges }.compile()?))
463467
} else {
@@ -482,7 +486,11 @@ impl Compiler {
482486
&mut self,
483487
ranges: &[hir::ClassBytesRange],
484488
) -> ResultOrEmpty {
485-
debug_assert!(!ranges.is_empty());
489+
if ranges.is_empty() {
490+
return Err(Error::Syntax(
491+
"empty character classes are not allowed".to_string(),
492+
));
493+
}
486494

487495
let first_split_entry = self.insts.len();
488496
let mut holes = vec![];

0 commit comments

Comments
 (0)