Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pat codeblocks: add size to struct reg_code_blocks and fix leak #22201

Merged
merged 2 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 89 additions & 39 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,19 +502,42 @@ S_free_codeblocks(pTHX_ struct reg_code_blocks *cbs)
Safefree(cbs);
}

/* Ensure that there are at least 'required' spare code block slots
* available, using a simple doubling */

static void
S_grow_code_blocks(pTHX_ struct reg_code_blocks *cbs, int required)
{
required += cbs->count;
if (required < 1)
return;

if (required < cbs->size)
return;

int new_size = cbs->size;
if (new_size < 1)
new_size = 1;

while (new_size < required)
new_size *= 2;

Renew(cbs->cb, new_size, struct reg_code_block);
cbs->size = new_size;
}


static struct reg_code_blocks *
S_alloc_code_blocks(pTHX_ int ncode)
{
struct reg_code_blocks *cbs;
Newx(cbs, 1, struct reg_code_blocks);
cbs->count = ncode;
cbs->size = 0;
cbs->count = 0;
cbs->cb = NULL;
cbs->refcnt = 1;
SAVEDESTRUCTOR_X(S_free_codeblocks, cbs);
if (ncode)
Newx(cbs->cb, ncode, struct reg_code_block);
else
cbs->cb = NULL;
S_grow_code_blocks(aTHX_ cbs, ncode);
return cbs;
}

Expand All @@ -528,7 +551,7 @@ S_alloc_code_blocks(pTHX_ int ncode)

static void
S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
char **pat_p, STRLEN *plen_p, int num_code_blocks)
char **pat_p, STRLEN *plen_p)
{
U8 *const src = (U8*)*pat_p;
U8 *dst, *d;
Expand All @@ -540,14 +563,19 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
DEBUG_PARSE_r(Perl_re_printf( aTHX_
"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));

int nblocks = 0;
if (pRExC_state->code_blocks)
nblocks = pRExC_state->code_blocks->count;


/* 1 for each byte + 1 for each byte that expands to two, + trailing NUL */
Newx(dst, *plen_p + variant_under_utf8_count(src, src + *plen_p) + 1, U8);
d = dst;

while (s < *plen_p) {
append_utf8_from_native_byte(src[s], &d);

if (n < num_code_blocks) {
if (n < nblocks) {
assert(pRExC_state->code_blocks);
if (!do_end && pRExC_state->code_blocks->cb[n].start == s) {
pRExC_state->code_blocks->cb[n].start = d - dst - 1;
Expand Down Expand Up @@ -591,7 +619,6 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
OP *oplist, bool *recompile_p, SV *delim)
{
SV **svp;
int n = 0;
bool use_delim = FALSE;
bool alloced = FALSE;

Expand Down Expand Up @@ -680,11 +707,15 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
if (oplist->op_type == OP_NULL
&& (oplist->op_flags & OPf_SPECIAL))
{
assert(n < pRExC_state->code_blocks->count);
pRExC_state->code_blocks->cb[n].start = pat ? SvCUR(pat) : 0;
pRExC_state->code_blocks->cb[n].block = oplist;
pRExC_state->code_blocks->cb[n].src_regex = NULL;
n++;
/* process next literal code block */
struct reg_code_blocks *cbs = pRExC_state->code_blocks;
S_grow_code_blocks(aTHX_ cbs, 1);
int n = cbs->count;

cbs->cb[n].start = pat ? SvCUR(pat) : 0;
cbs->cb[n].block = oplist;
cbs->cb[n].src_regex = NULL;
cbs->count++;
code = 1;
oplist = OpSIBLING(oplist); /* skip CONST */
assert(oplist);
Expand All @@ -711,11 +742,27 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
{
sv_setsv(pat, sv);
/* overloading involved: all bets are off over literal
* code. Pretend we haven't seen it */
if (n)
pRExC_state->code_blocks->count -= n;
n = 0;
/* pat now represents the return value of overloaded
* concatenation of of two values:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/of of/of/

* 1) all the components previously concatenated;
* 2) the current pattern element.
* Since the return value can be anything, any previously
* found code-blocks (even literal ones) should be discarded.
* For example, in:
* qr/(?{A})$obj/
* the overloaded concatenation of '(?{A})' and $obj
* could return anything, and not necessarily the literal
* code block. So throw away any previously found code blocks,
* and so any code-block bits in the returned string will be
* treated as run-time.
*/
struct reg_code_blocks *cbs = pRExC_state->code_blocks;
if (cbs) {
for (int n = 0; n < cbs->count; n++) {
SvREFCNT_dec(cbs->cb[n].src_regex);
}
cbs->count = 0;
}
}
else {
/* ... or failing that, try "" overload */
Expand All @@ -741,7 +788,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
char *dst = SvPV_force_nomg(pat, dlen);
orig_patlen = dlen;
if (SvUTF8(msv) && !SvUTF8(pat)) {
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen, n);
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen);
sv_setpvn(pat, dst, dlen);
SvUTF8_on(pat);
}
Expand All @@ -763,8 +810,11 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
}
}

if (code)
pRExC_state->code_blocks->cb[n-1].end = SvCUR(pat)-1;
/* was this pattern element a literal code block? */
if (code) {
struct reg_code_blocks *cbs = pRExC_state->code_blocks;
cbs->cb[cbs->count - 1].end = SvCUR(pat) - 1;
}
}

/* extract any code blocks within any embedded qr//'s */
Expand All @@ -780,13 +830,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
* qr// may not have changed, but it may be a
* different closure than last time */
*recompile_p = 1;
if (pRExC_state->code_blocks) {
int new_count = pRExC_state->code_blocks->count
+ ri->code_blocks->count;
Renew(pRExC_state->code_blocks->cb,
new_count, struct reg_code_block);
pRExC_state->code_blocks->count = new_count;
}

if (pRExC_state->code_blocks)
S_grow_code_blocks(aTHX_ pRExC_state->code_blocks,
ri->code_blocks->count);
else
pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_
ri->code_blocks->count);
Expand All @@ -795,21 +842,22 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
struct reg_code_block *src, *dst;
STRLEN offset = orig_patlen
+ ReANY((REGEXP *)rx)->pre_prefix;
assert(n < pRExC_state->code_blocks->count);
src = &ri->code_blocks->cb[i];
dst = &pRExC_state->code_blocks->cb[n];
dst = &pRExC_state->code_blocks->cb[
pRExC_state->code_blocks->count++];
dst->start = src->start + offset;
dst->end = src->end + offset;
dst->block = src->block;
dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*)
src->src_regex
? src->src_regex
: (REGEXP*)rx);
n++;
}
}
}
}

} /* for (patternp) */

/* avoid calling magic multiple times on a single element e.g. =~ $qr */
if (alloced)
SvSETMAGIC(pat);
Expand Down Expand Up @@ -1059,7 +1107,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
: src->src_regex;
dst++;
}
r1->code_blocks->count += r2c;
r1->code_blocks->count = r1->code_blocks->size = r1c + r2c;
Safefree(r1->code_blocks->cb);
r1->code_blocks->cb = new_block;
}
Expand Down Expand Up @@ -1478,7 +1526,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,

/* set expr to the first arg op */

if (pRExC_state->code_blocks && pRExC_state->code_blocks->count
/* ->size > 0 if we alloced above with ncode > 0 */
if (pRExC_state->code_blocks && pRExC_state->code_blocks->size
&& expr->op_type != OP_CONST)
{
expr = cLISTOPx(expr)->op_first;
Expand Down Expand Up @@ -1602,8 +1651,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
/* whoops, we have a non-utf8 pattern, whilst run-time code
* got compiled as utf8. Try again with a utf8 pattern */
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen);
goto redo_parse;
}
}
Expand Down Expand Up @@ -1750,8 +1798,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
variant_under_utf8_count((U8 *) exp, (U8 *) exp
+ RExC_latest_warn_offset);
}
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen);
DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse after upgrade\n"));
}
else {
Expand Down Expand Up @@ -13669,14 +13716,17 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
if (ri->code_blocks) {
int n;
Newx(reti->code_blocks, 1, struct reg_code_blocks);

Newx(reti->code_blocks->cb, ri->code_blocks->count,
struct reg_code_block);
reti->code_blocks->size = ri->code_blocks->count;

Copy(ri->code_blocks->cb, reti->code_blocks->cb,
ri->code_blocks->count, struct reg_code_block);
for (n = 0; n < ri->code_blocks->count; n++)
reti->code_blocks->cb[n].src_regex = (REGEXP*)
sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
reti->code_blocks->count = ri->code_blocks->count;
reti->code_blocks->count = ri->code_blocks->count;
reti->code_blocks->refcnt = 1;
}
else
Expand Down
3 changes: 2 additions & 1 deletion regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ struct reg_code_block {

struct reg_code_blocks {
int refcnt; /* we may be pointed to from a regex and from the savestack */
int count; /* how many code blocks */
int count; /* how many code block slots currently in use */
int size; /* how many slots allocated in code_block[] */
struct reg_code_block *cb; /* array of reg_code_block's */
};

Expand Down
29 changes: 28 additions & 1 deletion t/op/svleak.t
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ BEGIN {

use Config;

plan tests => 156;
plan tests => 157;

# run some code N times. If the number of SVs at the end of loop N is
# greater than (N-1)*delta at the end of loop 1, we've got a leak
Expand Down Expand Up @@ -683,3 +683,30 @@ leak 2, 0, sub {
} } 1..2;
},
'sort block return';


# Avoid leaks when overloading causes a compile-time pattern code block
# to be recompiled at runtime.

package myconcat {
use overload
'""' => sub { ${$_[0]} },
'.' => sub {
my ($x, $y) = @_[ $_[2] ? (1,0) : (0,1) ];
my ($xx, $yy) = ("$x", "$y");
"$xx$yy";
}
;

::leak(2, 0,
sub {
my $r1 = qr/(?{1})/;
my $r2 = qr/(?{2})/;
bless $r2, 'myconcat';
use re "eval";
qr/$r1$r2/;
1;
},
'overloaded pattern with code block'
);
}
Loading
Loading