Skip to content

Commit

Permalink
[PoC] Intoroduce parameterizing rules with conditonal
Browse files Browse the repository at this point in the history
I would like to propose a new grammar in this PR.
I believe that more parameterizing rules can handle more abstract rules if we can switch between rules and actions that are expanded by conditions in order to make rules common.

Syntax is as follows:
```
%rule defined_rule(X, condition): /* empty */
                                | X { $$ = $1; } %if(condition) /* 1 */
                                | %if(condition) X %endif X { $$ = $1; } /* 2 */
                                ;

%%

r_true        : defined_rule(number, %true)
              ;

r_false       : defined_rule(number, %false)
              ;
```

1. It's like a postfix if in Ruby. If condition is false, it is equivalent to missing this line.
2. If statementIf condition is false, it is equivalent to missing RHS between `%if` and`% endif`.

I believe it will solve the problem mentioned in the article below with the tight coupling with Lexer "to disable certain generation rules under certain conditions" and I would like to propose this feature to solve this problem.
https://yui-knk.hatenablog.com/entry/2023/04/04/190413

We can trace the RHS to [f_args](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5523-L5575) > [args_tail](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5487-L5503) > [args_forward](https://github.com/ruby/ruby/blob/2f916812a9b818b432ee7c299e021ec62d4727fb/parse.y#L5586-L5597), where f_args is the RHS of both the lambda argument (f_larglist) and the method definition argument (f_arglist).
So if we can switch between RHS and actions by passing parameters, we can break up the Lexer/Parser coupling here.
  • Loading branch information
ydah committed May 9, 2024
1 parent 95e0cc2 commit c15c765
Show file tree
Hide file tree
Showing 13 changed files with 800 additions and 425 deletions.
7 changes: 6 additions & 1 deletion lib/lrama/grammar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Grammar
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
:symbols_resolver, :types,
:rules, :rule_builders,
:sym_to_rules, :no_stdlib
:sym_to_rules, :no_stdlib, :if_count

def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
Expand Down Expand Up @@ -58,6 +58,7 @@ def initialize(rule_counter)
@accept_symbol = nil
@aux = Auxiliary.new
@no_stdlib = false
@if_count = 0

append_special_symbols
end
Expand Down Expand Up @@ -170,6 +171,10 @@ def find_rules_by_symbol(sym)
@sym_to_rules[sym.number]
end

def initialize_if_count
@if_count = 0
end

private

def compute_nullable
Expand Down
4 changes: 4 additions & 0 deletions lib/lrama/grammar/binding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ def resolve_symbol(symbol)
if symbol.is_a?(Lexer::Token::InstantiateRule)
resolved_args = symbol.args.map { |arg| resolve_symbol(arg) }
Lrama::Lexer::Token::InstantiateRule.new(s_value: symbol.s_value, location: symbol.location, args: resolved_args, lhs_tag: symbol.lhs_tag)
elsif symbol.is_a?(Lexer::Token::ControlSyntax)
resolved = symbol.dup
resolved.condition = @parameter_to_arg[symbol.condition_value]
resolved
else
@parameter_to_arg[symbol.s_value] || symbol
end
Expand Down
25 changes: 25 additions & 0 deletions lib/lrama/grammar/parameterizing_rule/rhs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,31 @@ def initialize
@precedence_sym = nil
end

def skip?(bindings)
return false unless @symbols.last
last_sym = bindings.resolve_symbol(@symbols.last)
last_sym.is_a?(Lexer::Token::ControlSyntax) && last_sym.if? && last_sym.false?
end

def resolve_symbols(bindings)
is_skip = []
@symbols.map do |sym|
resolved = bindings.resolve_symbol(sym)
if resolved.is_a?(Lexer::Token::ControlSyntax)
if resolved.if?
is_skip.push(resolved.false?)
elsif resolved.endif?
is_skip.pop
else
raise "Unexpected control syntax: #{resolved.condition_value}"
end
nil
else
resolved unless is_skip.last
end
end.compact
end

def resolve_user_code(bindings)
return unless user_code

Expand Down
5 changes: 4 additions & 1 deletion lib/lrama/grammar/rule_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ def process_rhs
parameterizing_rule.rhs_list.each do |r|
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
rule_builder.lhs = lhs_token
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
next if r.skip?(bindings)
r.resolve_symbols(bindings).each do |sym|
rule_builder.add_rhs(sym)
end
rule_builder.line = line
rule_builder.precedence_sym = r.precedence_sym
rule_builder.user_code = r.resolve_user_code(bindings)
Expand Down
4 changes: 4 additions & 0 deletions lib/lrama/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class Lexer
%rule
%no-stdlib
%inline
%if
%endif
%true
%false
)

def initialize(grammar_file)
Expand Down
1 change: 1 addition & 0 deletions lib/lrama/lexer/token.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'lrama/lexer/token/char'
require 'lrama/lexer/token/control_syntax'
require 'lrama/lexer/token/ident'
require 'lrama/lexer/token/instantiate_rule'
require 'lrama/lexer/token/tag'
Expand Down
34 changes: 34 additions & 0 deletions lib/lrama/lexer/token/control_syntax.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Lrama
class Lexer
class Token
class ControlSyntax < Token
attr_accessor :condition

def initialize(s_value:, location:, condition: nil)
@condition = condition
super(s_value: s_value, location: location)
end

def if?
s_value == '%if'
end

def endif?
s_value == '%endif'
end

def true?
!!@condition&.s_value
end

def false?
!true?
end

def condition_value
@condition&.s_value
end
end
end
end
end
947 changes: 527 additions & 420 deletions lib/lrama/parser.rb

Large diffs are not rendered by default.

33 changes: 30 additions & 3 deletions parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ rule
{
rule = Grammar::ParameterizingRule::Rule.new(val[2].s_value, [], val[4], is_inline: true)
@grammar.add_parameterizing_rule(rule)
@grammar.initialize_if_count
}

rule_args: IDENTIFIER { result = [val[0]] }
Expand Down Expand Up @@ -282,7 +283,7 @@ rule
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]])
result = builder
}
| rule_rhs IDENTIFIER "(" parameterizing_args ")" tag_opt
| rule_rhs IDENTIFIER "(" parameterizing_rule_args ")" tag_opt
{
builder = val[0]
builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3], lhs_tag: val[5])
Expand Down Expand Up @@ -316,6 +317,21 @@ rule
builder.precedence_sym = sym
result = builder
}
| rule_rhs "%if" "(" IDENTIFIER ")"
{
builder = val[0]
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location, condition: val[3])
@grammar.if_count += 1
result = builder
}
| rule_rhs "%endif"
{
on_action_error("no %if before %endif", val[0]) if @grammar.if_count == 0
builder = val[0]
builder.symbols << Lrama::Lexer::Token::ControlSyntax.new(s_value: val[1], location: @lexer.location)
@grammar.if_count -= 1
result = builder
}

int_opt: # empty
| INTEGER
Expand Down Expand Up @@ -491,11 +507,22 @@ rule
| "+" { result = "nonempty_list" }
| "*" { result = "list" }

parameterizing_args: symbol { result = [val[0]] }
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
parameterizing_rule_args: symbol { result = [val[0]] }
| parameterizing_args ',' symbol { result = val[0].append(val[2]) }
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }

parameterizing_args: symbol_or_bool { result = [val[0]] }
| parameterizing_args ',' symbol_or_bool { result = val[0].append(val[2]) }
| symbol parameterizing_suffix { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[0])] }
| IDENTIFIER "(" parameterizing_args ")" { result = [Lrama::Lexer::Token::InstantiateRule.new(s_value: val[0].s_value, location: @lexer.location, args: val[2])] }

symbol_or_bool: symbol
| bool

bool: "%true" { result = Lrama::Lexer::Token::Ident.new(s_value: true) }
| "%false" { result = Lrama::Lexer::Token::Ident.new(s_value: false) }

named_ref_opt: # empty
| '[' IDENTIFIER ']' { result = val[1].s_value }

Expand Down
2 changes: 2 additions & 0 deletions sig/lrama/grammar/parameterizing_rule/rhs.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ module Lrama
attr_reader precedence_sym: Lexer::Token?

def initialize: () -> void
def skip?: (Grammar::Binding bindings) -> bool
def resolve_symbols: (Grammar::Binding bindings) -> Array[untyped]
def resolve_user_code: (Grammar::Binding bindings) -> Lexer::Token::UserCode?
end
end
Expand Down
16 changes: 16 additions & 0 deletions sig/lrama/lexer/token/control_syntax.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module Lrama
class Lexer
class Token
class ControlSyntax < Token
attr_accessor condition: Lexer::Token::Ident?

def initialize: (s_value: String, location: Location, ?condition: Lexer::Token::Ident?) -> void
def if?: () -> bool
def endif?: () -> bool
def true?: () -> bool
def false?: () -> bool
def condition_value: () -> String?
end
end
end
end
44 changes: 44 additions & 0 deletions spec/fixtures/parameterizing_rules/user_defined/if.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* This is comment for this file.
*/

%{
// Prologue
static int yylex(YYSTYPE *val, YYLTYPE *loc);
static int yyerror(YYLTYPE *loc, const char *str);
%}

%union {
int i;
}

%token <i> number

%rule defined_rule(X, condition): /* empty */
| X { $$ = $1; } %if(condition)
| %if(condition) X %endif X { $$ = $1; }
;

%%

r_true : defined_rule(number, %true)
;

r_false : defined_rule(number, %false)
;

%%

static int yylex(YYSTYPE *yylval, YYLTYPE *loc)
{
return 0;
}

static int yyerror(YYLTYPE *loc, const char *str)
{
return 0;
}

int main(int argc, char *argv[])
{
}
103 changes: 103 additions & 0 deletions spec/lrama/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2278,6 +2278,109 @@
])
end
end

context "when if" do
let(:path) { "parameterizing_rules/user_defined/if.y" }

it "expands parameterizing rules" do
expect(grammar.nterms.sort_by(&:number)).to match_symbols([
Sym.new(id: T::Ident.new(s_value: "$accept"), alias_name: nil, number: 4, tag: nil, term: false, token_id: 0, nullable: false),
Sym.new(id: T::Ident.new(s_value: "defined_rule_number_true"), alias_name: nil, number: 5, tag: nil, term: false, token_id: 1, nullable: true),
Sym.new(id: T::Ident.new(s_value: "r_true"), alias_name: nil, number: 6, tag: nil, term: false, token_id: 2, nullable: true),
Sym.new(id: T::Ident.new(s_value: "defined_rule_number_false"), alias_name: nil, number: 7, tag: nil, term: false, token_id: 3, nullable: true),
Sym.new(id: T::Ident.new(s_value: "r_false"), alias_name: nil, number: 8, tag: nil, term: false, token_id: 4, nullable: true),
])

expect(grammar.rules).to eq([
Rule.new(
id: 0,
lhs: grammar.find_symbol_by_s_value!("$accept"),
rhs: [
grammar.find_symbol_by_s_value!("r_true"),
grammar.find_symbol_by_s_value!("YYEOF"),
],
token_code: nil,
nullable: false,
precedence_sym: grammar.find_symbol_by_s_value!("YYEOF"),
lineno: 24,
),
Rule.new(
id: 1,
lhs: grammar.find_symbol_by_s_value!("defined_rule_number_true"),
rhs: [],
token_code: nil,
nullable: true,
precedence_sym: nil,
lineno: 24,
),
Rule.new(
id: 2,
lhs: grammar.find_symbol_by_s_value!("defined_rule_number_true"),
rhs: [
grammar.find_symbol_by_s_value!("number"),
],
token_code: T::UserCode.new(s_value: " $$ = $1; "),
nullable: false,
precedence_sym: grammar.find_symbol_by_s_value!("number"),
lineno: 24,
),
Rule.new(
id: 3,
lhs: grammar.find_symbol_by_s_value!("defined_rule_number_true"),
rhs: [
grammar.find_symbol_by_s_value!("number"),
grammar.find_symbol_by_s_value!("number"),
],
token_code: T::UserCode.new(s_value: " $$ = $1; "),
nullable: false,
precedence_sym: grammar.find_symbol_by_s_value!("number"),
lineno: 24,
),
Rule.new(
id: 4,
lhs: grammar.find_symbol_by_s_value!("r_true"),
rhs: [
grammar.find_symbol_by_s_value!("defined_rule_number_true"),
],
token_code: nil,
nullable: true,
precedence_sym: nil,
lineno: 24,
),
Rule.new(
id: 5,
lhs: grammar.find_symbol_by_s_value!("defined_rule_number_false"),
rhs: [],
token_code: nil,
nullable: true,
precedence_sym: nil,
lineno: 27,
),
Rule.new(
id: 6,
lhs: grammar.find_symbol_by_s_value!("defined_rule_number_false"),
rhs: [
grammar.find_symbol_by_s_value!("number"),
],
token_code: T::UserCode.new(s_value: " $$ = $1; "),
nullable: false,
precedence_sym: grammar.find_symbol_by_s_value!("number"),
lineno: 27,
),
Rule.new(
id: 7,
lhs: grammar.find_symbol_by_s_value!("r_false"),
rhs: [
grammar.find_symbol_by_s_value!("defined_rule_number_false"),
],
token_code: nil,
nullable: true,
precedence_sym: nil,
lineno: 27,
),
])
end
end
end

context 'when error case' do
Expand Down

0 comments on commit c15c765

Please sign in to comment.