Skip to content

Commit 09fa296

Browse files
committed
syntax: \p{Lc} should map to \p{Cased_Letter}
This is more similar to the \p{Cf} bug than the \p{Sc} bug, but basically, 'lc' is an abbreviation for both 'Cased_Letter' and 'Lowercase_Mapping'. Since we don't support the latter (currently), we make 'lc' map to 'Cased_Letter'. If we do ever add 'Lowercase_Mapping' in the future, then we will just require users to type out its full form. Fixes #965
1 parent 02428f1 commit 09fa296

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

regex-syntax/src/unicode.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,12 @@ impl<'a> ClassQuery<'a> {
248248
// also the abbreviation for the 'Script' property. So we avoid calling
249249
// 'canonical_prop' for it too, which would erroneously normalize it
250250
// to 'Script'.
251-
if norm != "cf" && norm != "sc" {
251+
//
252+
// Another case: 'lc' is an abbreviation for the 'Cased_Letter'
253+
// general category, but is also an abbreviation for the 'Lowercase_Mapping'
254+
// property. We don't currently support the latter, so as with 'cf'
255+
// above, we treat 'lc' as 'Cased_Letter'.
256+
if norm != "cf" && norm != "sc" && norm != "lc" {
252257
if let Some(canon) = canonical_prop(&norm)? {
253258
return Ok(CanonicalClassQuery::Binary(canon));
254259
}

tests/unicode.rs

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
3535
// We should test more, but there's a lot. Write a script to generate more of
3636
// these tests.
3737
mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3)));
38+
mat!(uni_class_gencat_cased_letter2, r"\p{gc=LC}", "A", Some((0, 3)));
39+
mat!(uni_class_gencat_cased_letter3, r"\p{LC}", "A", Some((0, 3)));
3840
mat!(
3941
uni_class_gencat_close_punctuation,
4042
r"\p{Close_Punctuation}",

0 commit comments

Comments
 (0)