Skip to content

Commit 6efd6f6

Browse files
committed
[MERGE #5669 @dilijev] Make some tweaks to RegExp debug switches.
Merge pull request #5669 from dilijev:re-debug-switches This change only affects debug output displayed under `-RegexDebug`. Prior to this change, `-RegexDebug` has 3 "chapters" of output, all enabled with no option to disable: AST, Annotated AST, and (Bytecode) Program. This change adds flags `-RegexDebugAST[-]` and `-RegexDebugAnnotatedAST[-]` to disable the AST output chapters and bring focus to the resulting bytecode program, which is the most relevant for seeing how effective the regex bytecode gen was for a given case. The AST output can be noisy and distracts from this goal. This change leaves the AST output chapters on-by-default to match existing behavior. The AST may be interesting information when working on the Regex engine even if not doing work on optimizations. In that case the on-by-default behavior for `-RegexDebug` should probably be: `-RegexDebugAST -RegexDebugAnnotatedAST-`
2 parents 915b02c + d0f8e19 commit 6efd6f6

File tree

5 files changed

+62
-2
lines changed

5 files changed

+62
-2
lines changed

lib/Common/ConfigFlagsList.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,8 @@ PHASE(All)
559559
#define DEFAULT_CONFIG_RegexTracing (false)
560560
#define DEFAULT_CONFIG_RegexProfile (false)
561561
#define DEFAULT_CONFIG_RegexDebug (false)
562+
#define DEFAULT_CONFIG_RegexDebugAST (true)
563+
#define DEFAULT_CONFIG_RegexDebugAnnotatedAST (true)
562564
#define DEFAULT_CONFIG_RegexBytecodeDebug (false)
563565
#define DEFAULT_CONFIG_RegexOptimize (true)
564566
#define DEFAULT_CONFIG_DynamicRegexMruListSize (16)
@@ -1547,6 +1549,8 @@ FLAGNR(Boolean, ValidateHeapEnum , "Validate that heap enumeration is repor
15471549
FLAGR (Boolean, RegexTracing , "Trace all Regex invocations to the output.", DEFAULT_CONFIG_RegexTracing)
15481550
FLAGR (Boolean, RegexProfile , "Collect usage statistics on all Regex invocations.", DEFAULT_CONFIG_RegexProfile)
15491551
FLAGR (Boolean, RegexDebug , "Trace compilation of UnifiedRegex expressions.", DEFAULT_CONFIG_RegexDebug)
1552+
FLAGR (Boolean, RegexDebugAST , "Display Regex AST (requires -RegexDebug to view). [default on]", DEFAULT_CONFIG_RegexDebugAST)
1553+
FLAGR (Boolean, RegexDebugAnnotatedAST, "Display Regex Annotated AST (requires -RegexDebug and -RegexDebugAST to view). [default on]", DEFAULT_CONFIG_RegexDebugAnnotatedAST)
15501554
FLAGR (Boolean, RegexBytecodeDebug , "Display layout of UnifiedRegex bytecode (requires -RegexDebug to view).", DEFAULT_CONFIG_RegexBytecodeDebug)
15511555
FLAGR (Boolean, RegexOptimize , "Optimize regular expressions in the unified Regex system (default: true)", DEFAULT_CONFIG_RegexOptimize)
15521556
FLAGR (Number, DynamicRegexMruListSize, "Size of the MRU list for dynamic regexes", DEFAULT_CONFIG_DynamicRegexMruListSize)

lib/Parser/RegexCompileTime.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4611,7 +4611,7 @@ namespace UnifiedRegex
46114611
{
46124612

46134613
#if ENABLE_REGEX_CONFIG_OPTIONS
4614-
if (w != 0)
4614+
if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST))
46154615
{
46164616
w->PrintEOL(_u("REGEX AST /%s/ {"), PointerValue(program->source));
46174617
w->Indent();
@@ -4723,7 +4723,7 @@ namespace UnifiedRegex
47234723
root->AnnotatePass4(compiler);
47244724

47254725
#if ENABLE_REGEX_CONFIG_OPTIONS
4726-
if (w != 0)
4726+
if (w != 0 && REGEX_CONFIG_FLAG(RegexDebugAST) && REGEX_CONFIG_FLAG(RegexDebugAnnotatedAST))
47274727
{
47284728
w->PrintEOL(_u("REGEX ANNOTATED AST /%s/ {"), PointerValue(program->source));
47294729
w->Indent();

lib/Parser/RegexPattern.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ namespace UnifiedRegex
183183
case _u('\x2029'):
184184
w->PrintEscapedChar(c);
185185
break;
186+
case _u('-'):
187+
w->Print(_u("-"));
188+
break;
186189
case _u('\\'):
187190
Assert(i + 1 < str.GetLength()); // cannot end in a '\'
188191
w->Print(_u("\\%lc"), str.GetBuffer()[++i]);

lib/Parser/RegexRuntime.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ namespace UnifiedRegex
778778
#if ENABLE_REGEX_CONFIG_OPTIONS
779779
void TrieMixin::Print(DebugWriter* w, const char16* litbuf) const
780780
{
781+
w->PrintEOL(_u(""));
781782
trie.Print(w);
782783
}
783784
#endif
@@ -5752,6 +5753,26 @@ namespace UnifiedRegex
57525753
w->PrintEOL(_u("Program {"));
57535754
w->Indent();
57545755
w->PrintEOL(_u("source: %s"), PointerValue(source));
5756+
5757+
w->Print(_u("litbuf: "));
5758+
const char16 *litbuf = this->rep.insts.litbuf;
5759+
size_t litbufLen = 0;
5760+
if (litbuf == nullptr)
5761+
{
5762+
w->PrintEOL(_u("<NONE>"));
5763+
}
5764+
else
5765+
{
5766+
litbufLen = this->rep.insts.litbufLen;
5767+
for (size_t i = 0; i < litbufLen; ++i)
5768+
{
5769+
const char16 c = (char16)litbuf[i];
5770+
w->PrintEscapedChar(c);
5771+
}
5772+
w->PrintEOL(_u(""));
5773+
}
5774+
w->PrintEOL(_u("litbufLen: %u"), litbufLen);
5775+
57555776
w->Print(_u("flags: "));
57565777
if ((flags & GlobalRegexFlag) != 0) w->Print(_u("global "));
57575778
if ((flags & MultilineRegexFlag) != 0) w->Print(_u("multiline "));

test/Regex/BoiHardFail.baseline

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ REGEX ANNOTATED AST /^token/ {
7575
REGEX PROGRAM /^token/
7676
Program {
7777
source: ^token
78+
litbuf: token
79+
litbufLen: 5
7880
flags:
7981
numGroups: 1
8082
numLoops: 0
@@ -235,6 +237,8 @@ REGEX ANNOTATED AST /(\w)?^token/ {
235237
REGEX PROGRAM /(\w)?^token/
236238
Program {
237239
source: (\w)?^token
240+
litbuf: token
241+
litbufLen: 5
238242
flags:
239243
numGroups: 2
240244
numLoops: 0
@@ -276,6 +280,8 @@ REGEX ANNOTATED AST /token/ {
276280
REGEX PROGRAM /token/
277281
Program {
278282
source: token
283+
litbuf: token
284+
litbufLen: 5
279285
flags:
280286
numGroups: 1
281287
numLoops: 0
@@ -383,6 +389,8 @@ REGEX ANNOTATED AST /^^token/ {
383389
REGEX PROGRAM /^^token/
384390
Program {
385391
source: ^^token
392+
litbuf: token
393+
litbufLen: 5
386394
flags:
387395
numGroups: 1
388396
numLoops: 0
@@ -469,6 +477,8 @@ REGEX ANNOTATED AST /token^/ {
469477
REGEX PROGRAM /token^/
470478
Program {
471479
source: token^
480+
litbuf: token
481+
litbufLen: 5
472482
flags:
473483
numGroups: 1
474484
numLoops: 0
@@ -577,6 +587,8 @@ REGEX ANNOTATED AST /token^token/ {
577587
REGEX PROGRAM /token^token/
578588
Program {
579589
source: token^token
590+
litbuf: tokentoken
591+
litbufLen: 10
580592
flags:
581593
numGroups: 1
582594
numLoops: 0
@@ -760,6 +772,8 @@ REGEX ANNOTATED AST /^token|^abc/ {
760772
REGEX PROGRAM /^token|^abc/
761773
Program {
762774
source: ^token|^abc
775+
litbuf: tokenabc
776+
litbufLen: 8
763777
flags:
764778
numGroups: 1
765779
numLoops: 0
@@ -898,6 +912,8 @@ REGEX ANNOTATED AST /(?!token)^abc/ {
898912
REGEX PROGRAM /(?!token)^abc/
899913
Program {
900914
source: (?!token)^abc
915+
litbuf: tokenabc
916+
litbufLen: 8
901917
flags:
902918
numGroups: 1
903919
numLoops: 0
@@ -1013,6 +1029,8 @@ REGEX ANNOTATED AST /(?=^abc)/ {
10131029
REGEX PROGRAM /(?=^abc)/
10141030
Program {
10151031
source: (?=^abc)
1032+
litbuf: abc
1033+
litbufLen: 3
10161034
flags:
10171035
numGroups: 1
10181036
numLoops: 0
@@ -1127,6 +1145,8 @@ REGEX ANNOTATED AST /(^token)/ {
11271145
REGEX PROGRAM /(^token)/
11281146
Program {
11291147
source: (^token)
1148+
litbuf: token
1149+
litbufLen: 5
11301150
flags:
11311151
numGroups: 2
11321152
numLoops: 0
@@ -1266,6 +1286,8 @@ REGEX ANNOTATED AST /(^a)+/ {
12661286
REGEX PROGRAM /(^a)+/
12671287
Program {
12681288
source: (^a)+
1289+
litbuf: <NONE>
1290+
litbufLen: 0
12691291
flags:
12701292
numGroups: 2
12711293
numLoops: 1
@@ -1333,6 +1355,8 @@ REGEX ANNOTATED AST /(?=^)/ {
13331355
REGEX PROGRAM /(?=^)/
13341356
Program {
13351357
source: (?=^)
1358+
litbuf: <NONE>
1359+
litbufLen: 0
13361360
flags:
13371361
numGroups: 1
13381362
numLoops: 0
@@ -1398,6 +1422,8 @@ REGEX ANNOTATED AST /(^)/ {
13981422
REGEX PROGRAM /(^)/
13991423
Program {
14001424
source: (^)
1425+
litbuf: <NONE>
1426+
litbufLen: 0
14011427
flags:
14021428
numGroups: 2
14031429
numLoops: 0
@@ -1488,6 +1514,8 @@ REGEX ANNOTATED AST /(^)+/ {
14881514
REGEX PROGRAM /(^)+/
14891515
Program {
14901516
source: (^)+
1517+
litbuf: <NONE>
1518+
litbufLen: 0
14911519
flags:
14921520
numGroups: 2
14931521
numLoops: 1
@@ -1554,6 +1582,8 @@ REGEX ANNOTATED AST /(?!^)/ {
15541582
REGEX PROGRAM /(?!^)/
15551583
Program {
15561584
source: (?!^)
1585+
litbuf: <NONE>
1586+
litbufLen: 0
15571587
flags:
15581588
numGroups: 1
15591589
numLoops: 0
@@ -1667,6 +1697,8 @@ REGEX ANNOTATED AST /(?:^abc)+?/ {
16671697
REGEX PROGRAM /(?:^abc)+?/
16681698
Program {
16691699
source: (?:^abc)+?
1700+
litbuf: abc
1701+
litbufLen: 3
16701702
flags:
16711703
numGroups: 1
16721704
numLoops: 1

0 commit comments

Comments
 (0)