Skip to content

Commit

Permalink
Merge pull request #140 from lutaml/maxirmx-segfault-again
Browse files Browse the repository at this point in the history
Antlr & Rice updated; refactored Token wrapper; added CodeQL workflow
  • Loading branch information
maxirmx authored Jan 21, 2024
2 parents 33b6a7e + aaca3d0 commit e1759d7
Show file tree
Hide file tree
Showing 34 changed files with 115 additions and 48 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: "CodeQL"

on:
push:
branches: [ main ]
schedule:
- cron: '0 23 * * 3'
workflow_dispatch:

concurrency:
group: '${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.ref_name }}'
cancel-in-progress: true

env:
BUNDLER_VER: 2.4.22

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Install Ruby
uses: ruby/setup-ruby@master
with:
ruby-version: 3.1
bundler: ${{ env.BUNDLER_VER }}
bundler-cache: false

- name: Bundle
run: bundle install --jobs 4 --retry 3

- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: "ruby, cpp"

- name: Build native extension
run: bundle exec rake compile

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
3 changes: 2 additions & 1 deletion .github/workflows/rake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
- '**.adoc'
- '**.md'
- .github/workflows/release.yml
- .github/workflows/codeql.yml
pull_request:
workflow_dispatch:

Expand Down Expand Up @@ -81,7 +82,7 @@ jobs:
id: cache
with:
path: lib/expressir/express/express_parser.*
key: v4-${{ matrix.os }}-${{ matrix.ruby }}-${{ hashFiles('ext/express-parser/extconf.rb', 'ext/express-parser/antlrgen/**', 'ext/express-parser/express_parser.cpp', '.git/modules/ext/express-parser/antlr4-upstream/HEAD') }}
key: v4-${{ matrix.os }}-${{ matrix.ruby }}-${{ hashFiles('ext/express_parser/extconf.rb', 'ext/express_parser/antlrgen/**', 'ext/express_parser/express_parser.cpp', '.git/modules/ext/express_parser/antlr4-upstream/HEAD') }}

- name: Build native extension
if: steps.cache.outputs.cache-hit != 'true'
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ concurrency:
group: '${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.ref_name }}'
cancel-in-progress: true

env:
BUNDLER_VER: 2.4.22

jobs:
bump:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -129,7 +132,7 @@ jobs:
EOF
chmod 0600 ~/.gem/credentials
gem signin
for gem in pkg/*.gem; do echo "gem push $gem -V"; done
for gem in pkg/*.gem; do gem push $gem -V; done
sleep 5
verify:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
/lib/expressir/express/*/express_parser.bundle
/lib/expressir/express/*/express_parser.so
/spec/syntax/*-pretty.exp
/ext/express-parser/rice-embed
/ext/express_parser/rice-embed

# rspec failure tracking
.rspec_status
Expand Down
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[submodule "ext/express-parser/antlr4-upstream"]
path = ext/express-parser/antlr4-upstream
[submodule "ext/express_parser/antlr4-upstream"]
path = ext/express_parser/antlr4-upstream
url = https://github.com/antlr/antlr4
[submodule "ext/express-grammar"]
path = ext/express-grammar
Expand Down
2 changes: 1 addition & 1 deletion exe/generate-parser
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ generator = Antlr4Native::Generator.new(
generator.generate

# fix issues with generated parser
parser_source_file = File.join("ext", "express-parser", "express_parser.cpp")
parser_source_file = File.join("ext", "express_parser", "express_parser.cpp")
parser_source_lines = File.read(parser_source_file).split("\n")

# - add ParserProxy tokens method, simple compensation for missing exposed BufferedTokenStream
Expand Down
8 changes: 4 additions & 4 deletions expressir.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@ Gem::Specification.new do |spec|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")

spec.files = `git ls-files`.split("\n")\
+ Dir.glob("ext/express-parser/antlr4-upstream/runtime/Cpp/runtime/**/*")
+ Dir.glob("ext/express_parser/antlr4-upstream/runtime/Cpp/runtime/**/*")

spec.test_files = `git ls-files -- {spec}/*`.split("\n")

spec.bindir = "exe"
spec.require_paths = ["lib"]
spec.executables = %w[expressir]

spec.extensions = File.join(*%w(ext express-parser extconf.rb))
spec.extensions = File.join(*%w(ext express_parser extconf.rb))

spec.add_runtime_dependency "rice", "~> 4.1"
spec.add_runtime_dependency "rice", "~> 4.2"
spec.add_runtime_dependency "thor", "~> 1.0"
spec.add_development_dependency "antlr4-native", "~> 2.1.0"
spec.add_development_dependency "antlr4-native", "~> 2.2"
spec.add_development_dependency "asciidoctor", "~> 2.0.13"
spec.add_development_dependency "bundler", "~> 2.3"
spec.add_development_dependency "byebug", "~> 11.1"
Expand Down
1 change: 0 additions & 1 deletion ext/express-parser/antlr4-upstream
Submodule antlr4-upstream deleted from 44d87b
1 change: 1 addition & 0 deletions ext/express_parser/antlr4-upstream
Submodule antlr4-upstream added at ebb511
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,13 @@ class ContextProxy {
Object getStart() {
auto token = ((ParserRuleContext*) orig) -> getStart();

return detail::To_Ruby<Token*>().convert(token);
return detail::To_Ruby<TokenProxy>().convert(TokenProxy(token));
}

Object getStop() {
auto token = ((ParserRuleContext*) orig) -> getStop();

return detail::To_Ruby<Token*>().convert(token);
return detail::To_Ruby<TokenProxy>().convert(TokenProxy(token));
}

Array getChildren() {
Expand Down Expand Up @@ -16242,7 +16242,7 @@ class VisitorProxy : public ExpressBaseVisitor, public Director {
auto result = visit(proxy -> getOriginal());
try {
return std::any_cast<Object>(result);
} catch(std::bad_cast) {
} catch(std::bad_any_cast) {
return Qnil;
}
}
Expand All @@ -16251,7 +16251,7 @@ class VisitorProxy : public ExpressBaseVisitor, public Director {
auto result = visitChildren(proxy -> getOriginal());
try {
return std::any_cast<Object>(result);
} catch(std::bad_cast) {
} catch(std::bad_any_cast) {
return Qnil;
}
}
Expand Down Expand Up @@ -18160,12 +18160,12 @@ class ParserProxyExt : public Object {
return detail::To_Ruby<SyntaxContextProxy>().convert(proxy);
}

Array getTokens() {
Array a;
Object getTokens() {
std::vector<TokenProxy> tk;
for (auto token : tokens -> getTokens()) {
a.push(new TokenProxy(token));
tk.push_back(TokenProxy(token));
}
return a;
return detail::To_Ruby<std::vector<TokenProxy>>().convert(tk);
}

Object visit(VisitorProxy* visitor) {
Expand All @@ -18176,7 +18176,7 @@ class ParserProxyExt : public Object {

try {
return std::any_cast<Object>(result);
} catch(std::bad_cast) {
} catch(std::bad_any_cast) {
return Qnil;
}
}
Expand Down Expand Up @@ -18418,8 +18418,8 @@ void Init_express_parser() {
rb_cParser = define_class_under<ParserProxy>(rb_mExpressParser, "Parser")
.define_singleton_function("parse", &ParserProxy::parse)
.define_singleton_function("parse_file", &ParserProxy::parseFile)
.define_method("syntax", &ParserProxy::syntax, Return().keepAlive())
.define_method("visit", &ParserProxy::visit, Return().keepAlive());
.define_method("syntax", &ParserProxy::syntax)
.define_method("visit", &ParserProxy::visit);

rb_cTokenExt = define_class_under<TokenProxy>(rb_mExpressParser, "TokenExt")
.define_method("text", &TokenProxy::getText)
Expand All @@ -18428,9 +18428,11 @@ void Init_express_parser() {

rb_cParserExt = define_class_under<ParserProxyExt>(rb_mExpressParser, "ParserExt")
.define_constructor(Constructor<ParserProxyExt, string>())
.define_method("syntax", &ParserProxyExt::syntax, Return().keepAlive())
.define_method("tokens", &ParserProxyExt::getTokens)
.define_method("visit", &ParserProxyExt::visit, Return().keepAlive());
.define_method("syntax", &ParserProxyExt::syntax)
.define_method("tokens", &ParserProxyExt::getTokens, Return().keepAlive())
.define_method("visit", &ParserProxyExt::visit);

define_vector<std::vector<TokenProxy>>("TokenVector");

rb_cAttributeRefContext = define_class_under<AttributeRefContextProxy, ContextProxy>(rb_mExpressParser, "AttributeRefContext")
.define_method("attribute_id", &AttributeRefContextProxy::attributeId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
antlr4_src.to_s,
"#{antlr4_src}/atn",
"#{antlr4_src}/dfa",
"#{antlr4_src}/internal",
"#{antlr4_src}/misc",
"#{antlr4_src}/support",
"#{antlr4_src}/tree",
Expand Down
1 change: 0 additions & 1 deletion lib/expressir/express/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def self.from_file(file, skip_references: nil, include_source: nil)
# so in this class we keep those C++ structure marked for GC so they are not freed
@parser = ::ExpressParser::ParserExt.new(file.to_s)
@parse_tree = @parser.syntax()

@visitor = Visitor.new(@parser.tokens, include_source: include_source)
@repository = @visitor.visit(@parse_tree)

Expand Down
19 changes: 11 additions & 8 deletions lib/expressir/express/visitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class Visitor < ::ExpressParser::Visitor

private_constant :REMARK_CHANNEL

# @param [Array<::ExpressParser::Token>] tokens
# @param [::ExpressParser::TokenVector] Rice-wrapped std::vector<TokenProxy>
# @param [Boolean] include_source attach original source code to model elements
def initialize(tokens, include_source: nil)
@tokens = tokens
Expand Down Expand Up @@ -101,7 +101,11 @@ def get_tokens(ctx)
[ctx.start.token_index, ctx.stop.token_index]
end

@tokens[start_index..stop_index]
selected_tokens = []
(start_index..stop_index).each do |i|
selected_tokens << @tokens[i]
end
selected_tokens
end

def attach_source(ctx, node)
Expand Down Expand Up @@ -150,15 +154,14 @@ def find_remark_target(node, path)
end

def attach_remarks(ctx, node)
@remark_tokens = get_tokens(ctx)
@remark_tokens = @remark_tokens.select{ |x| x.channel == REMARK_CHANNEL
}
remark_tokens = get_tokens(ctx)
remark_tokens = remark_tokens.select{ |x| x.channel == REMARK_CHANNEL }

# skip already attached remarks
@remark_tokens = @remark_tokens.select{|x| !@attached_remark_tokens.include?(x)}
remark_tokens = remark_tokens.select{|x| !@attached_remark_tokens.include?(x.token_index)}

# parse remarks, find remark targets
tagged_remark_tokens = @remark_tokens.map do |remark_token|
tagged_remark_tokens = remark_tokens.map do |remark_token|
_, remark_tag, remark_text = if remark_token.text.start_with?('--')
remark_token.text.match(/^--"([^"]*)"(.*)$/).to_a
else
Expand All @@ -181,7 +184,7 @@ def attach_remarks(ctx, node)
remark_target.remarks << remark_text

# mark remark as attached, so that it is not attached again at higher nesting level
@attached_remark_tokens << remark_token
@attached_remark_tokens << remark_token.token_index
end
end

Expand Down
32 changes: 22 additions & 10 deletions rakelib/antlr4-native.rake
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@ def create_pp_class_definition(parser_source_lines)
return detail::To_Ruby<SyntaxContextProxy>().convert(proxy);
}
Array getTokens() {
Array a;
Object getTokens() {
std::vector<TokenProxy> tk;
for (auto token : tokens -> getTokens()) {
a.push(new TokenProxy(token));
tk.push_back(TokenProxy(token));
}
return a;
return detail::To_Ruby<std::vector<TokenProxy>>().convert(tk);
}
Object visit(VisitorProxy* visitor) {
Expand Down Expand Up @@ -105,7 +105,7 @@ def create_pp_class_definition(parser_source_lines)
end

def create_class_api(parser_source_lines)
i = parser_source_lines.index { |x| x == " .define_method(\"visit\", &ParserProxy::visit, Return().keepAlive());" }
i = parser_source_lines.index { |x| x == " .define_method(\"visit\", &ParserProxy::visit);" }
parser_source_lines[i] += <<~CPP.split("\n").map { |x| x == "" ? x : " #{x}" }.join("\n")
Expand All @@ -116,17 +116,29 @@ def create_class_api(parser_source_lines)
rb_cParserExt = define_class_under<ParserProxyExt>(rb_mExpressParser, "ParserExt")
.define_constructor(Constructor<ParserProxyExt, string>())
.define_method("syntax", &ParserProxyExt::syntax, Return().keepAlive())
.define_method("tokens", &ParserProxyExt::getTokens)
.define_method("visit", &ParserProxyExt::visit, Return().keepAlive());
.define_method("syntax", &ParserProxyExt::syntax)
.define_method("tokens", &ParserProxyExt::getTokens, Return().keepAlive())
.define_method("visit", &ParserProxyExt::visit);
define_vector<std::vector<TokenProxy>>("TokenVector");
CPP
end

def create_vector_definition(parser_source_lines)
i = parser_source_lines.index { |x| x == " .define_method(\"visit\", &ParserProxy::visit);" }
parser_source_lines[i] += <<~CPP.split("\n").map { |x| x == "" ? x : " #{x}" }.join("\n")
CPP
end

def generate_extended_parser
# Generate extended parser that provide Ruby access to token stream
parser_source_file = File.join("ext", "express-parser", "express_parser.cpp")
parser_source_lines = File.read(parser_source_file).split("\n")
parser_source_file = File.join("ext", "express_parser", "express_parser.cpp")
parser_source_lines = File.read(parser_source_file)
.gsub!("bad_cast", "bad_any_cast")
.gsub!("return detail::To_Ruby<Token*>().convert(token)", "return detail::To_Ruby<TokenProxy>().convert(TokenProxy(token))")
.split("\n")
create_class_declarations(parser_source_lines)
create_tp_class_definition(parser_source_lines)
create_pp_class_definition(parser_source_lines)
Expand Down
2 changes: 1 addition & 1 deletion rakelib/cross-ruby.rake
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ end
require "rake/extensiontask"

Rake::ExtensionTask.new("express_parser", GEMSPEC) do |ext|
ext.ext_dir = "ext/express-parser"
ext.ext_dir = "ext/express_parser"
ext.lib_dir = File.join(*["lib", "expressir", "express", ENV.fetch("FAT_DIR", nil)].compact)
ext.config_options << ENV.fetch("EXTOPTS", nil)
ext.cross_compile = true
Expand Down
1 change: 1 addition & 0 deletions spec/acceptance/version_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
RSpec.describe "Expressir" do
describe "version" do
it "has a version number" do |example|
GC.stress = ENV["GC_STRESS"] == "true"
print "\n[#{example.description}] "
expect(Expressir::VERSION).not_to be nil

Expand Down
2 changes: 1 addition & 1 deletion spec/expressir/express/cache_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
GC.verify_internal_consistency
end

it "fails parsing a file from a different Expressir version" do |example|
it "fails parsing a cache from a different Expressir version" do |example|
print "\n[#{example.description}] "
temp_file = Tempfile.new

Expand Down
2 changes: 0 additions & 2 deletions spec/expressir/express/formatter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
print "\n[#{example.description}] "
exp_file = Expressir.root_path.join("spec", "syntax", "single.exp")
formatted_exp_file = Expressir.root_path.join("spec", "syntax", "single_formatted.exp")

repo = Expressir::Express::Parser.from_file(exp_file)

result = Expressir::Express::Formatter.format(repo)
# File.write(formatted_exp_file, result)
expected_result = File.read(formatted_exp_file)
Expand Down

0 comments on commit e1759d7

Please sign in to comment.