Skip to content

Remove "offsets" debugging code from regcomp.c #19407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -2029,9 +2029,12 @@ ERS |SV* |make_exactf_invlist |NN RExC_state_t *pRExC_state \
ES |regnode_offset|reg |NN RExC_state_t *pRExC_state \
|I32 paren|NN I32 *flagp|U32 depth
ES |regnode_offset|regnode_guts|NN RExC_state_t *pRExC_state \
|const U8 op \
|const STRLEN extra_len \
|NN const char* const name
|const STRLEN extra_len
#ifdef DEBUGGING
ES |regnode_offset|regnode_guts_debug|NN RExC_state_t *pRExC_state \
|const U8 op \
|const STRLEN extra_len
#endif
ES |void |change_engine_size|NN RExC_state_t *pRExC_state|const Ptrdiff_t size
ES |regnode_offset|reganode|NN RExC_state_t *pRExC_state|U8 op \
|U32 arg
Expand Down Expand Up @@ -2108,13 +2111,12 @@ ES |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
|NN SV** invlist
ES |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
|NN I32 *flagp \
|NN char * parse_start \
|NN char * backref_parse_start \
|char ch
ESTR |unsigned int|regex_set_precedence|const U8 my_operator
ES |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \
|NULLOK SV ** return_invlist \
|NN I32 *flagp|U32 depth \
|NN char * const oregcomp_parse
|NN I32 *flagp|U32 depth
ES |void |set_regex_pv |NN RExC_state_t *pRExC_state|NN REGEXP *Rx
# if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
ES |void |dump_regex_sets_structures \
Expand Down
5 changes: 3 additions & 2 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,7 @@
#endif
#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b)
#define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b)
#define regnode_guts_debug(a,b,c) S_regnode_guts_debug(aTHX_ a,b,c)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
# endif
# if defined(PERL_IN_REGEXEC_C)
Expand Down Expand Up @@ -1077,7 +1078,7 @@
#define handle_named_backref(a,b,c,d) S_handle_named_backref(aTHX_ a,b,c,d)
#define handle_names_wildcard(a,b,c,d) S_handle_names_wildcard(aTHX_ a,b,c,d)
#define handle_possible_posix(a,b,c,d,e) S_handle_possible_posix(aTHX_ a,b,c,d,e)
#define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e)
#define handle_regex_sets(a,b,c,d) S_handle_regex_sets(aTHX_ a,b,c,d)
#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j) S_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
#define invlist_contents(a,b) S_invlist_contents(aTHX_ a,b)
#define invlist_is_iterating S_invlist_is_iterating
Expand All @@ -1104,7 +1105,7 @@
#define regclass(a,b,c,d,e,f,g,h,i) S_regclass(aTHX_ a,b,c,d,e,f,g,h,i)
#define regex_set_precedence S_regex_set_precedence
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regnode_guts(a,b,c,d) S_regnode_guts(aTHX_ a,b,c,d)
#define regnode_guts(a,b) S_regnode_guts(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
#define regpnode(a,b,c) S_regpnode(aTHX_ a,b,c)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
Expand Down
31 changes: 3 additions & 28 deletions ext/re/re.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package re;
use strict;
use warnings;

our $VERSION = "0.41";
our $VERSION = "0.42";
our @ISA = qw(Exporter);
our @EXPORT_OK = qw{
is_regexp regexp_pattern
Expand Down Expand Up @@ -71,8 +71,6 @@ my %flags = (

EXTRA => 0x3FF0000,
TRIEM => 0x0010000,
OFFSETS => 0x0020000,
OFFSETSDBG => 0x0040000,
STATE => 0x0080000,
OPTIMISEM => 0x0100000,
STACK => 0x0280000,
Expand All @@ -81,9 +79,7 @@ my %flags = (
DUMP_PRE_OPTIMIZE => 0x1000000,
WILDCARD => 0x2000000,
);
$flags{ALL} = -1 & ~($flags{OFFSETS}
|$flags{OFFSETSDBG}
|$flags{BUFFERS}
$flags{ALL} = -1 & ~($flags{BUFFERS}
|$flags{DUMP_PRE_OPTIMIZE}
|$flags{WILDCARD}
);
Expand Down Expand Up @@ -626,26 +622,6 @@ Enable debugging of the \G modifier.
Enable enhanced optimisation debugging and start-point optimisations.
Probably not useful except when debugging the regexp engine itself.

=item OFFSETS

Dump offset information. This can be used to see how regops correlate
to the pattern. Output format is

NODENUM:POSITION[LENGTH]

Where 1 is the position of the first char in the string. Note that position
can be 0, or larger than the actual length of the pattern, likewise length
can be zero.

=item OFFSETSDBG

Enable debugging of offsets information. This emits copious
amounts of trace information and doesn't mesh well with other
debug options.

Almost definitely only useful to people hacking
on the offsets part of the debug engine.

=item DUMP_PRE_OPTIMIZE

Enable the dumping of the compiled pattern before the optimization phase.
Expand Down Expand Up @@ -687,8 +663,7 @@ These are useful shortcuts to save on the typing.

=item ALL

Enable all options at once except OFFSETS, OFFSETSDBG, BUFFERS, WILDCARD, and
DUMP_PRE_OPTIMIZE.
Enable all options at once except BUFFERS, WILDCARD, and DUMP_PRE_OPTIMIZE.
(To get every single option without exception, use both ALL and EXTRA, or
starting in 5.30 on a C<-DDEBUGGING>-enabled perl interpreter, use
the B<-Drv> command-line switches.)
Expand Down
2 changes: 1 addition & 1 deletion ext/re/t/regop.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC TEST);
use re Debug=>qw(DUMP EXECUTE TRIEC TEST);
my @tests=(
XY => 'X(A|[B]Q||C|D)Y' ,
foobar => '[f][o][o][b][a][r]',
Expand Down
15 changes: 0 additions & 15 deletions ext/re/t/regop.t
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ Freeing REx: "[f][o][o][b][a][r]"
minlen 3
---
# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
# Got 164 bytes for offset annotations.
# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
# Char : Match Base Ofs A B C P G E D
# State|---------------------------------------------------
Expand All @@ -166,8 +165,6 @@ minlen 3
# <D>
# 20: END (0)
# anchored "ABC" at 0 (checking anchored) minlen 4
# Offsets: [20]
# 1:4[3] 3:4[15] 19:32[0] 20:34[0]
# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
# Found anchored substr "ABC" at offset 0...
# Guessed: match at offset 0
Expand Down Expand Up @@ -210,8 +207,6 @@ anchored "ABC" at 0
# 47: EOL(48)
# 48: END(0)
#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
#Offsets: [48]
# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
#Found floating substr ""$ at offset 30...
#Starting position does not contradict /^/m...
Expand All @@ -233,30 +228,23 @@ anchored "ABC" at 0
#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
%MATCHED%
floating ""$ at 3..4 (checking floating)
#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
#stclass EXACTF <.> minlen 3
#Found floating substr ""$ at offset 30...
#Does not contradict STCLASS...
#Guessed: match at offset 26
#Matching stclass EXACTF <.> against ".exe"
---
#Compiling REx "[q]"
#size 3 nodes Got 7 bytes for offset annotations.
#first at 1
#Final program:
# 1: EXACT <q>(3)
# 3: END(0)
#anchored "q" at 0 (checking anchored isall) minlen 1
#Offsets: [3]
# 1:1[3] 3:4[0]
#Guessing start of match, REx "[q]" against "q"...
#Found anchored substr "q" at offset 0...
#Guessed: match at offset 0
#%MATCHED%
#Freeing REx: "[q]"
Got 7 bytes for offset annotations.
Offsets: [3]
1:1[3] 3:4[0]
%MATCHED%
Freeing REx: "[q]"
---
Expand All @@ -281,7 +269,6 @@ Freeing REx: "[q]"
Freeing REx: "^(\S{1,9}):\s*(\d+)$"
---
#Compiling REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"...
#Got 532 bytes for offset annotations.
study_chunk_recursed_count: 5
#Final program:
# 1: DEFINEP (3)
Expand Down Expand Up @@ -317,8 +304,6 @@ study_chunk_recursed_count: 5
# 61: TAIL (62)
# 62: END (0)
minlen 0
#Offsets: [66]
# 1:3[0] 3:10[0] 5:17[1] 7:18[3] 9:21[1] 11:21[0] 13:22[0] 14:25[0] 16:32[0] 18:39[1] 20:41[3] 23:47[3] 25:50[1] 27:50[0] 29:51[0] 30:54[0] 32:61[0] 34:68[1] 36:70[3] 39:76[3] 41:79[1] 43:79[0] 45:80[0] 46:83[0] 48:90[0] 50:97[1] 52:99[3] 55:105[3] 57:108[1] 59:108[0] 61:109[0] 62:110[0]
#Matching REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... against ""
# 0 <> <> | 1:DEFINEP(3)
# 0 <> <> | 3:IFTHEN(14)
Expand Down
67 changes: 46 additions & 21 deletions pod/perlreguts.pod
Original file line number Diff line number Diff line change
Expand Up @@ -827,29 +827,18 @@ The following structure is used as the C<pprivate> struct by perl's
regex engine. Since it is specific to perl it is only of curiosity
value to other engine implementations.

typedef struct regexp_internal {
U32 *offsets; /* offset annotations 20001228 MJD
* data about mapping the program to
* the string*/
regnode *regstclass; /* Optional startclass as identified or
* constructed by the optimiser */
struct reg_data *data; /* Additional miscellaneous data used
* by the program. Used to make it
* easier to clone and free arbitrary
* data that the regops need. Often the
* ARG field of a regop is an index
* into this structure */
regnode program[1]; /* Unwarranted chumminess with
* compiler. */
} regexp_internal;
typedef struct regexp_internal {
regnode *regstclass;
struct reg_data *data;
struct reg_code_blocks *code_blocks;
U32 proglen;
U32 name_list_idx;
regnode program[1];
} regexp_internal;

=over 5

=item C<offsets>
Description of the attributes is as follows:

Offsets holds a mapping of offset in the C<program>
to offset in the C<precomp> string. This is only used by ActiveState's
visual regex debugger.
=over 5

=item C<regstclass>

Expand Down Expand Up @@ -878,6 +867,42 @@ what array. During compilation regops that need special structures stored
will add an element to each array using the add_data() routine and then store
the index in the regop.

In modern perls the 0th element of this structure is reserved and is NEVER
used to store anything of use. This is to allow things that need to index
into this array to represent "no value".

=item C<code_blocks>

This optional structure is used to manage C<(?{})> constructs in the
pattern. It is made up of the following structures.

/* record the position of a (?{...}) within a pattern */
struct reg_code_block {
STRLEN start;
STRLEN end;
OP *block;
REGEXP *src_regex;
};

/* array of reg_code_block's plus header info */
struct reg_code_blocks {
int refcnt; /* we may be pointed to from a regex
and from the savestack */
int count; /* how many code blocks */
struct reg_code_block *cb; /* array of reg_code_block's */
};

=item C<proglen>

Stores the length of the compiled program in units of regops.

=item C<name_list_idx>

This is the index into the data array where an AV is stored that contains
the names of any named capture buffers in the pattern, should there be
any. This is only used in the debugging version of the regex engine and
when RXp_PAREN_NAMES(prog) is true. It will be 0 if there is no such data.

=item C<program>

Compiled program. Inlined into the structure so the entire struct can be
Expand Down
15 changes: 9 additions & 6 deletions proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -4720,6 +4720,9 @@ STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
#define PERL_ARGS_ASSERT_REGDUMP_EXTFLAGS
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
#define PERL_ARGS_ASSERT_REGDUMP_INTFLAGS
STATIC regnode_offset S_regnode_guts_debug(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len);
#define PERL_ARGS_ASSERT_REGNODE_GUTS_DEBUG \
assert(pRExC_state)
STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
Expand Down Expand Up @@ -5877,18 +5880,18 @@ STATIC U32 S_get_quantifier_value(pTHX_ RExC_state_t *pRExC_state, const char *
STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
assert(pRExC_state); assert(flagp)
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
STATIC regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * backref_parse_start, char ch);
#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \
assert(pRExC_state); assert(flagp); assert(parse_start)
assert(pRExC_state); assert(flagp); assert(backref_parse_start)
STATIC bool S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname_len, SV ** prop_definition, AV ** strings);
#define PERL_ARGS_ASSERT_HANDLE_NAMES_WILDCARD \
assert(wname); assert(prop_definition); assert(strings)
STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
assert(pRExC_state); assert(s)
STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \
assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
assert(pRExC_state); assert(flagp)
STATIC SV * S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
#define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY \
assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
Expand Down Expand Up @@ -5990,9 +5993,9 @@ STATIC unsigned int S_regex_set_precedence(const U8 my_operator)
STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, const regnode_offset operand, const U32 depth);
#define PERL_ARGS_ASSERT_REGINSERT \
assert(pRExC_state)
STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_len);
#define PERL_ARGS_ASSERT_REGNODE_GUTS \
assert(pRExC_state); assert(name)
assert(pRExC_state)
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
Expand Down
Loading