Skip to content

Commit

Permalink
add a new (sub) PEG special
Browse files Browse the repository at this point in the history
(sub) will first match one pattern, then match another pattern against the
text that the first pattern advanced over.
  • Loading branch information
ianthehenry committed Dec 29, 2023
1 parent 772f4c2 commit ea75086
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 5 deletions.
54 changes: 50 additions & 4 deletions src/core/peg.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
typedef struct {
const uint8_t *text_start;
const uint8_t *text_end;
/* text_end will be restricted in a (sub) rule, but
outer_text_end will always contain the real end of
input, which we need to generate a line mapping */
const uint8_t *outer_text_end;
const uint32_t *bytecode;
const Janet *constants;
JanetArray *captures;
Expand Down Expand Up @@ -114,12 +118,12 @@ static LineCol get_linecol_from_position(PegState *s, int32_t position) {
/* Generate if not made yet */
if (s->linemaplen < 0) {
int32_t newline_count = 0;
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
for (const uint8_t *c = s->text_start; c < s->outer_text_end; c++) {
if (*c == '\n') newline_count++;
}
int32_t *mem = janet_smalloc(sizeof(int32_t) * newline_count);
size_t index = 0;
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
for (const uint8_t *c = s->text_start; c < s->outer_text_end; c++) {
if (*c == '\n') mem[index++] = (int32_t)(c - s->text_start);
}
s->linemaplen = newline_count;
Expand Down Expand Up @@ -179,7 +183,7 @@ static const uint8_t *peg_rule(
const uint32_t *rule,
const uint8_t *text) {
tail:
switch (*rule & 0x1F) {
switch (*rule) {
default:
janet_panic("unexpected opcode");
return NULL;
Expand Down Expand Up @@ -482,6 +486,30 @@ static const uint8_t *peg_rule(
return result;
}

case RULE_SUB: {
const uint8_t *text_start = text;
const uint32_t *rule_window = s->bytecode + rule[1];
const uint32_t *rule_subpattern = s->bytecode + rule[2];
down1(s);
const uint8_t *window_end = peg_rule(s, rule_window, text);
up1(s);
if (!window_end) {
return NULL;
}
const uint8_t *saved_end = s->text_end;
s->text_end = window_end;
down1(s);
const uint8_t *next_text = peg_rule(s, rule_subpattern, text_start);
up1(s);
s->text_end = saved_end;

if (!next_text) {
return NULL;
}

return window_end;
}

case RULE_REPLACE:
case RULE_MATCHTIME: {
uint32_t tag = rule[3];
Expand Down Expand Up @@ -1107,6 +1135,14 @@ static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
}

static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 2);
Reserve r = reserve(b, 3);
uint32_t subrule1 = peg_compile1(b, argv[0]);
uint32_t subrule2 = peg_compile1(b, argv[1]);
emit_2(r, RULE_SUB, subrule1, subrule2);
}

#ifdef JANET_INT_TYPES
#define JANET_MAX_READINT_WIDTH 8
#else
Expand Down Expand Up @@ -1190,6 +1226,7 @@ static const SpecialPair peg_specials[] = {
{"sequence", spec_sequence},
{"set", spec_set},
{"some", spec_some},
{"sub", spec_sub},
{"thru", spec_thru},
{"to", spec_to},
{"uint", spec_uint_le},
Expand Down Expand Up @@ -1431,7 +1468,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
uint32_t instr = bytecode[i];
uint32_t *rule = bytecode + i;
op_flags[i] |= 0x02;
switch (instr & 0x1F) {
switch (instr) {
case RULE_LITERAL:
i += 2 + ((rule[1] + 3) >> 2);
break;
Expand Down Expand Up @@ -1524,6 +1561,14 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
op_flags[rule[1]] |= 0x01;
i += 4;
break;
case RULE_SUB:
/* [rule, rule] */
if (rule[1] >= blen) goto bad;
if (rule[2] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;
op_flags[rule[2]] |= 0x01;
i += 3;
break;
case RULE_ERROR:
case RULE_DROP:
case RULE_NOT:
Expand Down Expand Up @@ -1677,6 +1722,7 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
ret.s.mode = PEG_MODE_NORMAL;
ret.s.text_start = ret.bytes.bytes;
ret.s.text_end = ret.bytes.bytes + ret.bytes.len;
ret.s.outer_text_end = ret.s.text_end;
ret.s.depth = JANET_RECURSION_GUARD;
ret.s.captures = janet_array(0);
ret.s.tagged_captures = janet_array(0);
Expand Down
3 changes: 2 additions & 1 deletion src/include/janet.h
Original file line number Diff line number Diff line change
Expand Up @@ -2140,7 +2140,8 @@ typedef enum {
RULE_LINE, /* [tag] */
RULE_COLUMN, /* [tag] */
RULE_UNREF, /* [rule, tag] */
RULE_CAPTURE_NUM /* [rule, tag] */
RULE_CAPTURE_NUM, /* [rule, tag] */
RULE_SUB /* [rule, rule] */
} JanetPegOpcod;

typedef struct {
Expand Down
50 changes: 50 additions & 0 deletions test/suite-peg.janet
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@
(marshpeg '(if-not "abcdf" 123))
(marshpeg ~(cmt "abcdf" ,identity))
(marshpeg '(group "abc"))
(marshpeg '(sub "abcdf" "abc"))
(marshpeg '(* (sub 1 1)))

# Peg swallowing errors
# 159651117
Expand Down Expand Up @@ -660,5 +662,53 @@
(peg/match '(if (not (* (constant 7) "a")) "hello") "hello")
@[]) "peg if not")

(defn test [name peg input expected]
(assert (deep= (peg/match peg input) expected) name))

(test "sub: matches the same input twice"
~(sub "abcd" "abc")
"abcdef"
@[])

(test "sub: second pattern cannot match more than the first pattern"
~(sub "abcd" "abcde")
"abcdef"
nil)

(test "sub: fails if first pattern fails"
~(sub "x" "abc")
"abcdef"
nil)

(test "sub: fails if second pattern fails"
~(sub "abc" "x")
"abcdef"
nil)

(test "sub: keeps captures from both patterns"
~(sub '"abcd" '"abc")
"abcdef"
@["abcd" "abc"])

(test "sub: second pattern can reference captures from first"
~(* (constant 5 :tag) (sub (capture "abc" :tag) (backref :tag)))
"abcdef"
@[5 "abc" "abc"])

(test "sub: second pattern can't see past what the first pattern matches"
~(sub "abc" (* "abc" -1))
"abcdef"
@[])

(test "sub: positions inside second match are still relative to the entire input"
~(* "one\ntw" (sub "o" (* ($) (line) (column))))
"one\ntwo\nthree\n"
@[6 2 3])

(test "sub: advances to the end of the first pattern's match"
~(* (sub "abc" "ab") "d")
"abcdef"
@[])

(end-suite)

0 comments on commit ea75086

Please sign in to comment.