Skip to content

Commit 4d36401

Browse files
committed
regcomp.c: Avoid use after free
It turns out that the SV returned by re_intuit_string() may be freed by future calls to re_intuit_start(). Thus, the caller doesn't get clear title to the returned SV. (This wasn't documented until the commit immediately prior to this one.) Cope with this situation by making a mortalized copy. This commit also changes to use the copy's PV directly, simplifying some 'if' statements. re_intuit_string() is effectively in the API, as it is an element in the regex engine structure, callable by anyone. It should not be returning a tenuous SV. That returned scalar should not freed before the pattern it is for is freed. It is too late in the development cycle to change this, so this workaround is presented instead for 5.32. This fixes #17734.
1 parent e8c3bac commit 4d36401

File tree

2 files changed

+27
-13
lines changed

2 files changed

+27
-13
lines changed

regcomp.c

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25013,8 +25013,10 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
2501325013
where we are now */
2501425014
bool found_matches = FALSE; /* Did any name match so far? */
2501525015
SV * empty; /* For matching zero length names */
25016-
SV * must; /* What substring, if any, must be in a name
25017-
for the subpattern to match */
25016+
SV * must_sv; /* Contains the substring, if any, that must be
25017+
in a name for the subpattern to match */
25018+
char * must; /* The PV of 'must' */
25019+
STRLEN must_len; /* And its length */
2501825020
SV * syllable_name = NULL; /* For Hangul syllables */
2501925021
const char hangul_prefix[] = "HANGUL SYLLABLE ";
2502025022
const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
@@ -25079,7 +25081,17 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
2507925081

2508025082
/* Compile the subpattern consisting of the name being looked for */
2508125083
subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
25082-
must = re_intuit_string(subpattern_re);
25084+
25085+
must_sv = re_intuit_string(subpattern_re);
25086+
if (must_sv) {
25087+
/* regexec.c can free the re_intuit_string() return. GH #17734 */
25088+
must_sv = sv_2mortal(newSVsv(must_sv));
25089+
must = SvPV(must_sv, must_len);
25090+
}
25091+
else {
25092+
must = "";
25093+
must_len = 0;
25094+
}
2508325095

2508425096
/* (Note: 'must' could contain a NUL. And yet we use strspn() below on it.
2508525097
* This works because the NUL causes the function to return early, thus
@@ -25095,10 +25107,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
2509525107

2509625108
/* And match against the string of all names /gc. Don't even try if it
2509725109
* must match a character not found in any name. */
25098-
if ( ! must
25099-
|| SvCUR(must) == 0
25100-
|| strspn(SvPVX(must), "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()")
25101-
== SvCUR(must))
25110+
if (strspn(must, "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()") == must_len)
2510225111
{
2510325112
while (execute_wildcard(subpattern_re,
2510425113
cur_pos,
@@ -25238,9 +25247,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
2523825247
* one of the characters in that isn't in any Hangul syllable. */
2523925248
if ( prog->minlen <= (SSize_t) syl_max_len
2524025249
&& prog->maxlen > 0
25241-
&& ( ! must
25242-
|| SvCUR(must) == 0
25243-
|| strspn(SvPVX(must), "\n ABCDEGHIJKLMNOPRSTUWY") == SvCUR(must)))
25250+
&& (strspn(must, "\n ABCDEGHIJKLMNOPRSTUWY") == must_len))
2524425251
{
2524525252
/* These constants, names, values, and algorithm are adapted from the
2524625253
* Unicode standard, version 5.1, section 3.12, and should never
@@ -25335,9 +25342,7 @@ S_handle_names_wildcard(pTHX_ const char * wname, /* wildcard name to match */
2533525342
* series */
2533625343
if ( prog->minlen <= (SSize_t) SvCUR(algo_name)
2533725344
&& prog->maxlen > 0
25338-
&& ( ! must
25339-
|| SvCUR(must) == 0
25340-
|| strspn(SvPVX(must), legal) == SvCUR(must)))
25345+
&& (strspn(must, legal) == must_len))
2534125346
{
2534225347
for (j = low; j <= high; j++) { /* For each code point in the series */
2534325348

t/re/pat_advanced.t

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,6 +2553,15 @@ EOF
25532553
{}, "Too large negative relative group number");
25542554
}
25552555

2556+
{ # GH #17734, ASAN use after free
2557+
fresh_perl_like('no warnings "experimental::uniprop_wildcards";
2558+
my $re = q<[[\p{name=/[Y-]+Z/}]]>;
2559+
eval { "\N{BYZANTINE MUSICAL SYMBOL PSILI}"
2560+
=~ /$re/ }; print $@ if $@; print "Done\n";',
2561+
qr/Done/,
2562+
{}, "GH #17734");
2563+
}
2564+
25562565

25572566
# !!! NOTE that tests that aren't at all likely to crash perl should go
25582567
# a ways above, above these last ones. There's a comment there that, like

0 commit comments

Comments
 (0)